data_sampler 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,7 +16,7 @@ module DataSampler
16
16
  end
17
17
 
18
18
  def to_s
19
- "#{keys} in table `#{table_name}` (referred from `#{referring_table_name}`)"
19
+ "row with keys #{keys} in table `#{table_name}` (referred from `#{referring_table_name}`)"
20
20
  end
21
21
 
22
22
  end
@@ -23,9 +23,10 @@ module DataSampler
23
23
  begin
24
24
  new_dependencies = 0
25
25
  @table_samples.values.each do |table_sample|
26
- if table_sample.ensure_referential_integrity(@table_samples)
27
- new_dependencies += 1
28
- warn " Found new dependents for table `#{table_sample.table_name}`"
26
+ newly_added = table_sample.ensure_referential_integrity(@table_samples)
27
+ if newly_added > 0
28
+ new_dependencies += newly_added
29
+ warn " Found #{newly_added} new dependents for table `#{table_sample.table_name}`"
29
30
  end
30
31
  end
31
32
  warn " Discovered #{new_dependencies} new dependencies" if new_dependencies > 0
@@ -23,11 +23,11 @@ module DataSampler
23
23
  end
24
24
 
25
25
  def fulfil(dependency)
26
- return if fulfilled?(dependency)
26
+ return 0 if fulfilled?(dependency)
27
27
  where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
28
28
  sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
29
29
  row = @connection.select_one(sql)
30
- raise "Could not find dependent row: #{dependency} (using SQL: #{sql})" if row.nil?
30
+ raise "Could not find #{dependency}" if row.nil?
31
31
  add row
32
32
  end
33
33
 
@@ -44,43 +44,47 @@ module DataSampler
44
44
  end
45
45
 
46
46
  def add(row)
47
- return false unless @sample.add? row
47
+ return 0 unless @sample.add? row
48
48
  @sampled_ids.add row['id'] if row['id']
49
- any_new = false
49
+ newly_added = 0
50
50
  dependencies_for(row).each do |dep|
51
- any_new = true if @pending_dependencies.add?(dep)
51
+ newly_added += 1 if @pending_dependencies.add?(dep)
52
52
  end
53
- any_new
53
+ newly_added
54
54
  rescue ActiveRecord::StatementInvalid => e
55
55
  # Don't choke on unknown table engines, such as Sphinx
56
56
  end
57
57
 
58
58
  def ensure_referential_integrity(table_samples)
59
- any_new = false
59
+ newly_added = 0
60
60
  deps_in_progress = @pending_dependencies
61
61
  @pending_dependencies = Set.new
62
62
  deps_in_progress.each do |dependency|
63
63
  raise "Table sample for #{dependency.table_name} not found" unless table_samples[dependency.table_name]
64
- any_new = true if table_samples[dependency.table_name].fulfil(dependency)
64
+ newly_added += table_samples[dependency.table_name].fulfil(dependency)
65
65
  end
66
- any_new
66
+ newly_added
67
67
  end
68
68
 
69
69
  def to_sql
70
- ret = ["-- #{@table_name}: #{@sample.count} rows"]
70
+ ret = "-- #{@table_name}: #{@sample.count} rows\n"
71
71
  unless @sample.empty?
72
72
  quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col }
73
- sql = "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','})"
74
- @sample.each do |row|
75
- quoted_vals = []
76
- row.each_pair do |field,val|
77
- val.gsub! /./, '*' if field.downcase == 'password'
78
- quoted_vals << @connection.quote(val)
79
- end
80
- ret << sql + " VALUES (#{quoted_vals * ','});"
73
+ # INSERT in batches of 1000
74
+ @sample.each_slice(1000) do |rows|
75
+ values = rows.collect { |row|
76
+ quoted_vals = []
77
+ row.each_pair do |field,val|
78
+ # HACK: Brute attempt at not revealing sensitive data
79
+ val.gsub! /./, '*' if field.downcase == 'password'
80
+ quoted_vals << @connection.quote(val)
81
+ end
82
+ quoted_vals * ','
83
+ } * '),('
84
+ ret << "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','}) VALUES (#{values});\n"
81
85
  end
82
86
  end
83
- ret * "\n"
87
+ ret
84
88
  end
85
89
 
86
90
  protected
@@ -1,3 +1,3 @@
1
1
  module DataSampler
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_sampler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: