data_sampler 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +0 -3
- data/README +7 -3
- data/bin/data_sampler +0 -1
- data/data_sampler.gemspec +3 -0
- data/lib/data_sampler/dependency.rb +1 -1
- data/lib/data_sampler/sample.rb +9 -6
- data/lib/data_sampler/table_sample.rb +5 -1
- data/lib/data_sampler/version.rb +1 -1
- metadata +57 -9
data/Gemfile
CHANGED
data/README
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
|
2
|
-
Ever found yourself wanting a modest amount of fresh rows from a production database
|
3
|
-
put back by the need to maintain referential integrity
|
4
|
-
|
2
|
+
Ever found yourself wanting a modest amount of fresh rows from a production database
|
3
|
+
for development purposes, but put back by the need to maintain referential integrity
|
4
|
+
in the extracted data sample?
|
5
|
+
|
6
|
+
This data sampling utility will take care that referential dependencies are
|
7
|
+
fulfilled by recursively expanding the sample with unfilled dependencies until
|
8
|
+
the sample is referentially consistent.
|
5
9
|
|
6
10
|
COMMANDS:
|
7
11
|
|
data/bin/data_sampler
CHANGED
@@ -23,7 +23,6 @@ command :sample do |c|
|
|
23
23
|
:database => 'test',
|
24
24
|
:username => 'root',
|
25
25
|
:encoding => 'utf8',
|
26
|
-
:socket => '/opt/local/var/run/mysql5/mysqld.sock',
|
27
26
|
:rows => 1000
|
28
27
|
ActiveRecord::Base.logger = Logger.new(options.log) if options.log
|
29
28
|
ActiveRecord::Base.establish_connection(options.__hash__).with_connection do |conn|
|
data/data_sampler.gemspec
CHANGED
data/lib/data_sampler/sample.rb
CHANGED
@@ -14,20 +14,23 @@ module DataSampler
|
|
14
14
|
def compute!
|
15
15
|
@connection.tables.each do |table_name|
|
16
16
|
# Workaround for inconsistent casing in table definitions (http://bugs.mysql.com/bug.php?id=60773)
|
17
|
-
table_name.downcase!
|
17
|
+
# table_name.downcase!
|
18
18
|
@table_samples[table_name] = TableSample.new(@connection, table_name, @rows_per_table)
|
19
19
|
end
|
20
|
-
warn "Sampling #{@table_samples.count} tables
|
20
|
+
warn "Sampling #{@table_samples.count} tables from database `#{@connection.current_database}`..."
|
21
21
|
@table_samples.values.map &:sample!
|
22
22
|
warn "Ensuring referential integrity..."
|
23
23
|
begin
|
24
24
|
new_dependencies = 0
|
25
25
|
@table_samples.values.each do |table_sample|
|
26
|
-
|
26
|
+
if table_sample.ensure_referential_integrity(@table_samples)
|
27
|
+
new_dependencies += 1
|
28
|
+
warn " Found new dependents for table `#{table_sample.table_name}`"
|
29
|
+
end
|
27
30
|
end
|
28
|
-
warn "
|
31
|
+
warn " Discovered #{new_dependencies} new dependencies" if new_dependencies > 0
|
29
32
|
end while new_dependencies > 0
|
30
|
-
warn "
|
33
|
+
warn "Referential integrity obtained"
|
31
34
|
@computed = true
|
32
35
|
end
|
33
36
|
|
@@ -38,4 +41,4 @@ module DataSampler
|
|
38
41
|
|
39
42
|
end
|
40
43
|
|
41
|
-
end
|
44
|
+
end
|
@@ -26,7 +26,9 @@ module DataSampler
|
|
26
26
|
return if fulfilled?(dependency)
|
27
27
|
where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
|
28
28
|
sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
|
29
|
-
|
29
|
+
row = @connection.select_one(sql)
|
30
|
+
raise "Dependent row not found for #{dependency} (using SQL: #{sql})" if row.nil?
|
31
|
+
add row
|
30
32
|
end
|
31
33
|
|
32
34
|
def fulfilled?(dependency)
|
@@ -58,6 +60,7 @@ module DataSampler
|
|
58
60
|
deps_in_progress = @pending_dependencies
|
59
61
|
@pending_dependencies = Set.new
|
60
62
|
deps_in_progress.each do |dependency|
|
63
|
+
raise "Table sample for #{dependency.table_name} not found" unless table_samples[dependency.table_name]
|
61
64
|
any_new = true if table_samples[dependency.table_name].fulfil(dependency)
|
62
65
|
end
|
63
66
|
any_new
|
@@ -83,6 +86,7 @@ module DataSampler
|
|
83
86
|
protected
|
84
87
|
|
85
88
|
def fetch_sample(count)
|
89
|
+
warn " Sampling #{count} rows from table `#{@table_name}`"
|
86
90
|
sql = "SELECT * FROM #{@connection.quote_table_name @table_name}"
|
87
91
|
pk = @connection.primary_key(@table_name)
|
88
92
|
sql += " ORDER BY #{@connection.quote_column_name pk} DESC" unless pk.nil?
|
data/lib/data_sampler/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_sampler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schema_plus
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: activerecord
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ! '>='
|
@@ -32,10 +37,31 @@ dependencies:
|
|
32
37
|
version: '0'
|
33
38
|
type: :runtime
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: commander
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: mysql
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
39
65
|
none: false
|
40
66
|
requirements:
|
41
67
|
- - ! '>='
|
@@ -43,7 +69,28 @@ dependencies:
|
|
43
69
|
version: '0'
|
44
70
|
type: :runtime
|
45
71
|
prerelease: false
|
46
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: pry
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
47
94
|
description: ! 'Ever found yourself wanting a modest amount of fresh rows from a production
|
48
95
|
database for development purposes, but
|
49
96
|
|
@@ -90,9 +137,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
137
|
version: '0'
|
91
138
|
requirements: []
|
92
139
|
rubyforge_project: data_sampler
|
93
|
-
rubygems_version: 1.8.
|
140
|
+
rubygems_version: 1.8.23
|
94
141
|
signing_key:
|
95
142
|
specification_version: 3
|
96
143
|
summary: Extract a sample of records from a database while maintaining referential
|
97
144
|
integrity.
|
98
145
|
test_files: []
|
146
|
+
has_rdoc:
|