data_sampler 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/data_sampler/dependency.rb +1 -1
- data/lib/data_sampler/sample.rb +4 -3
- data/lib/data_sampler/table_sample.rb +23 -19
- data/lib/data_sampler/version.rb +1 -1
- metadata +1 -1
data/lib/data_sampler/sample.rb
CHANGED
@@ -23,9 +23,10 @@ module DataSampler
|
|
23
23
|
begin
|
24
24
|
new_dependencies = 0
|
25
25
|
@table_samples.values.each do |table_sample|
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
newly_added = table_sample.ensure_referential_integrity(@table_samples)
|
27
|
+
if newly_added > 0
|
28
|
+
new_dependencies += newly_added
|
29
|
+
warn " Found #{newly_added} new dependents for table `#{table_sample.table_name}`"
|
29
30
|
end
|
30
31
|
end
|
31
32
|
warn " Discovered #{new_dependencies} new dependencies" if new_dependencies > 0
|
@@ -23,11 +23,11 @@ module DataSampler
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def fulfil(dependency)
|
26
|
-
return if fulfilled?(dependency)
|
26
|
+
return 0 if fulfilled?(dependency)
|
27
27
|
where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
|
28
28
|
sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
|
29
29
|
row = @connection.select_one(sql)
|
30
|
-
raise "Could not find
|
30
|
+
raise "Could not find #{dependency}" if row.nil?
|
31
31
|
add row
|
32
32
|
end
|
33
33
|
|
@@ -44,43 +44,47 @@ module DataSampler
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def add(row)
|
47
|
-
return
|
47
|
+
return 0 unless @sample.add? row
|
48
48
|
@sampled_ids.add row['id'] if row['id']
|
49
|
-
|
49
|
+
newly_added = 0
|
50
50
|
dependencies_for(row).each do |dep|
|
51
|
-
|
51
|
+
newly_added += 1 if @pending_dependencies.add?(dep)
|
52
52
|
end
|
53
|
-
|
53
|
+
newly_added
|
54
54
|
rescue ActiveRecord::StatementInvalid => e
|
55
55
|
# Don't choke on unknown table engines, such as Sphinx
|
56
56
|
end
|
57
57
|
|
58
58
|
def ensure_referential_integrity(table_samples)
|
59
|
-
|
59
|
+
newly_added = 0
|
60
60
|
deps_in_progress = @pending_dependencies
|
61
61
|
@pending_dependencies = Set.new
|
62
62
|
deps_in_progress.each do |dependency|
|
63
63
|
raise "Table sample for #{dependency.table_name} not found" unless table_samples[dependency.table_name]
|
64
|
-
|
64
|
+
newly_added += table_samples[dependency.table_name].fulfil(dependency)
|
65
65
|
end
|
66
|
-
|
66
|
+
newly_added
|
67
67
|
end
|
68
68
|
|
69
69
|
def to_sql
|
70
|
-
ret =
|
70
|
+
ret = "-- #{@table_name}: #{@sample.count} rows\n"
|
71
71
|
unless @sample.empty?
|
72
72
|
quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col }
|
73
|
-
|
74
|
-
@sample.
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
73
|
+
# INSERT in batches of 1000
|
74
|
+
@sample.each_slice(1000) do |rows|
|
75
|
+
values = rows.collect { |row|
|
76
|
+
quoted_vals = []
|
77
|
+
row.each_pair do |field,val|
|
78
|
+
# HACK: Brute attempt at not revealing sensitive data
|
79
|
+
val.gsub! /./, '*' if field.downcase == 'password'
|
80
|
+
quoted_vals << @connection.quote(val)
|
81
|
+
end
|
82
|
+
quoted_vals * ','
|
83
|
+
} * '),('
|
84
|
+
ret << "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','}) VALUES (#{values});\n"
|
81
85
|
end
|
82
86
|
end
|
83
|
-
ret
|
87
|
+
ret
|
84
88
|
end
|
85
89
|
|
86
90
|
protected
|
data/lib/data_sampler/version.rb
CHANGED