data_sampler 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/data_sampler/dependency.rb +1 -1
- data/lib/data_sampler/sample.rb +4 -3
- data/lib/data_sampler/table_sample.rb +23 -19
- data/lib/data_sampler/version.rb +1 -1
- metadata +1 -1
data/lib/data_sampler/sample.rb
CHANGED
@@ -23,9 +23,10 @@ module DataSampler
|
|
23
23
|
begin
|
24
24
|
new_dependencies = 0
|
25
25
|
@table_samples.values.each do |table_sample|
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
newly_added = table_sample.ensure_referential_integrity(@table_samples)
|
27
|
+
if newly_added > 0
|
28
|
+
new_dependencies += newly_added
|
29
|
+
warn " Found #{newly_added} new dependents for table `#{table_sample.table_name}`"
|
29
30
|
end
|
30
31
|
end
|
31
32
|
warn " Discovered #{new_dependencies} new dependencies" if new_dependencies > 0
|
@@ -23,11 +23,11 @@ module DataSampler
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def fulfil(dependency)
|
26
|
-
return if fulfilled?(dependency)
|
26
|
+
return 0 if fulfilled?(dependency)
|
27
27
|
where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
|
28
28
|
sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
|
29
29
|
row = @connection.select_one(sql)
|
30
|
-
raise "Could not find
|
30
|
+
raise "Could not find #{dependency}" if row.nil?
|
31
31
|
add row
|
32
32
|
end
|
33
33
|
|
@@ -44,43 +44,47 @@ module DataSampler
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def add(row)
|
47
|
-
return
|
47
|
+
return 0 unless @sample.add? row
|
48
48
|
@sampled_ids.add row['id'] if row['id']
|
49
|
-
|
49
|
+
newly_added = 0
|
50
50
|
dependencies_for(row).each do |dep|
|
51
|
-
|
51
|
+
newly_added += 1 if @pending_dependencies.add?(dep)
|
52
52
|
end
|
53
|
-
|
53
|
+
newly_added
|
54
54
|
rescue ActiveRecord::StatementInvalid => e
|
55
55
|
# Don't choke on unknown table engines, such as Sphinx
|
56
56
|
end
|
57
57
|
|
58
58
|
def ensure_referential_integrity(table_samples)
|
59
|
-
|
59
|
+
newly_added = 0
|
60
60
|
deps_in_progress = @pending_dependencies
|
61
61
|
@pending_dependencies = Set.new
|
62
62
|
deps_in_progress.each do |dependency|
|
63
63
|
raise "Table sample for #{dependency.table_name} not found" unless table_samples[dependency.table_name]
|
64
|
-
|
64
|
+
newly_added += table_samples[dependency.table_name].fulfil(dependency)
|
65
65
|
end
|
66
|
-
|
66
|
+
newly_added
|
67
67
|
end
|
68
68
|
|
69
69
|
def to_sql
|
70
|
-
ret =
|
70
|
+
ret = "-- #{@table_name}: #{@sample.count} rows\n"
|
71
71
|
unless @sample.empty?
|
72
72
|
quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col }
|
73
|
-
|
74
|
-
@sample.
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
73
|
+
# INSERT in batches of 1000
|
74
|
+
@sample.each_slice(1000) do |rows|
|
75
|
+
values = rows.collect { |row|
|
76
|
+
quoted_vals = []
|
77
|
+
row.each_pair do |field,val|
|
78
|
+
# HACK: Brute attempt at not revealing sensitive data
|
79
|
+
val.gsub! /./, '*' if field.downcase == 'password'
|
80
|
+
quoted_vals << @connection.quote(val)
|
81
|
+
end
|
82
|
+
quoted_vals * ','
|
83
|
+
} * '),('
|
84
|
+
ret << "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','}) VALUES (#{values});\n"
|
81
85
|
end
|
82
86
|
end
|
83
|
-
ret
|
87
|
+
ret
|
84
88
|
end
|
85
89
|
|
86
90
|
protected
|
data/lib/data_sampler/version.rb
CHANGED