data_sampler 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +0 -3
- data/README +7 -3
- data/bin/data_sampler +0 -1
- data/data_sampler.gemspec +3 -0
- data/lib/data_sampler/dependency.rb +1 -1
- data/lib/data_sampler/sample.rb +9 -6
- data/lib/data_sampler/table_sample.rb +5 -1
- data/lib/data_sampler/version.rb +1 -1
- metadata +57 -9
data/Gemfile
CHANGED
data/README
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
|
2
|
-
Ever found yourself wanting a modest amount of fresh rows from a production database
|
3
|
-
put back by the need to maintain referential integrity
|
4
|
-
|
2
|
+
Ever found yourself wanting a modest amount of fresh rows from a production database
|
3
|
+
for development purposes, but put back by the need to maintain referential integrity
|
4
|
+
in the extracted data sample?
|
5
|
+
|
6
|
+
This data sampling utility will take care that referential dependencies are
|
7
|
+
fulfilled by recursively expanding the sample with unfilled dependencies until
|
8
|
+
the sample is referentially consistent.
|
5
9
|
|
6
10
|
COMMANDS:
|
7
11
|
|
data/bin/data_sampler
CHANGED
@@ -23,7 +23,6 @@ command :sample do |c|
|
|
23
23
|
:database => 'test',
|
24
24
|
:username => 'root',
|
25
25
|
:encoding => 'utf8',
|
26
|
-
:socket => '/opt/local/var/run/mysql5/mysqld.sock',
|
27
26
|
:rows => 1000
|
28
27
|
ActiveRecord::Base.logger = Logger.new(options.log) if options.log
|
29
28
|
ActiveRecord::Base.establish_connection(options.__hash__).with_connection do |conn|
|
data/data_sampler.gemspec
CHANGED
data/lib/data_sampler/sample.rb
CHANGED
@@ -14,20 +14,23 @@ module DataSampler
|
|
14
14
|
def compute!
|
15
15
|
@connection.tables.each do |table_name|
|
16
16
|
# Workaround for inconsistent casing in table definitions (http://bugs.mysql.com/bug.php?id=60773)
|
17
|
-
table_name.downcase!
|
17
|
+
# table_name.downcase!
|
18
18
|
@table_samples[table_name] = TableSample.new(@connection, table_name, @rows_per_table)
|
19
19
|
end
|
20
|
-
warn "Sampling #{@table_samples.count} tables
|
20
|
+
warn "Sampling #{@table_samples.count} tables from database `#{@connection.current_database}`..."
|
21
21
|
@table_samples.values.map &:sample!
|
22
22
|
warn "Ensuring referential integrity..."
|
23
23
|
begin
|
24
24
|
new_dependencies = 0
|
25
25
|
@table_samples.values.each do |table_sample|
|
26
|
-
|
26
|
+
if table_sample.ensure_referential_integrity(@table_samples)
|
27
|
+
new_dependencies += 1
|
28
|
+
warn " Found new dependents for table `#{table_sample.table_name}`"
|
29
|
+
end
|
27
30
|
end
|
28
|
-
warn "
|
31
|
+
warn " Discovered #{new_dependencies} new dependencies" if new_dependencies > 0
|
29
32
|
end while new_dependencies > 0
|
30
|
-
warn "
|
33
|
+
warn "Referential integrity obtained"
|
31
34
|
@computed = true
|
32
35
|
end
|
33
36
|
|
@@ -38,4 +41,4 @@ module DataSampler
|
|
38
41
|
|
39
42
|
end
|
40
43
|
|
41
|
-
end
|
44
|
+
end
|
@@ -26,7 +26,9 @@ module DataSampler
|
|
26
26
|
return if fulfilled?(dependency)
|
27
27
|
where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
|
28
28
|
sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
|
29
|
-
|
29
|
+
row = @connection.select_one(sql)
|
30
|
+
raise "Dependent row not found for #{dependency} (using SQL: #{sql})" if row.nil?
|
31
|
+
add row
|
30
32
|
end
|
31
33
|
|
32
34
|
def fulfilled?(dependency)
|
@@ -58,6 +60,7 @@ module DataSampler
|
|
58
60
|
deps_in_progress = @pending_dependencies
|
59
61
|
@pending_dependencies = Set.new
|
60
62
|
deps_in_progress.each do |dependency|
|
63
|
+
raise "Table sample for #{dependency.table_name} not found" unless table_samples[dependency.table_name]
|
61
64
|
any_new = true if table_samples[dependency.table_name].fulfil(dependency)
|
62
65
|
end
|
63
66
|
any_new
|
@@ -83,6 +86,7 @@ module DataSampler
|
|
83
86
|
protected
|
84
87
|
|
85
88
|
def fetch_sample(count)
|
89
|
+
warn " Sampling #{count} rows from table `#{@table_name}`"
|
86
90
|
sql = "SELECT * FROM #{@connection.quote_table_name @table_name}"
|
87
91
|
pk = @connection.primary_key(@table_name)
|
88
92
|
sql += " ORDER BY #{@connection.quote_column_name pk} DESC" unless pk.nil?
|
data/lib/data_sampler/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_sampler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schema_plus
|
16
|
-
requirement:
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,15 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
25
30
|
- !ruby/object:Gem::Dependency
|
26
31
|
name: activerecord
|
27
|
-
requirement:
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
28
33
|
none: false
|
29
34
|
requirements:
|
30
35
|
- - ! '>='
|
@@ -32,10 +37,31 @@ dependencies:
|
|
32
37
|
version: '0'
|
33
38
|
type: :runtime
|
34
39
|
prerelease: false
|
35
|
-
version_requirements:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: commander
|
38
|
-
requirement:
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: mysql
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
39
65
|
none: false
|
40
66
|
requirements:
|
41
67
|
- - ! '>='
|
@@ -43,7 +69,28 @@ dependencies:
|
|
43
69
|
version: '0'
|
44
70
|
type: :runtime
|
45
71
|
prerelease: false
|
46
|
-
version_requirements:
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: pry
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
47
94
|
description: ! 'Ever found yourself wanting a modest amount of fresh rows from a production
|
48
95
|
database for development purposes, but
|
49
96
|
|
@@ -90,9 +137,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
137
|
version: '0'
|
91
138
|
requirements: []
|
92
139
|
rubyforge_project: data_sampler
|
93
|
-
rubygems_version: 1.8.
|
140
|
+
rubygems_version: 1.8.23
|
94
141
|
signing_key:
|
95
142
|
specification_version: 3
|
96
143
|
summary: Extract a sample of records from a database while maintaining referential
|
97
144
|
integrity.
|
98
145
|
test_files: []
|
146
|
+
has_rdoc:
|