data_sampler 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ module DataSampler
16
16
  end
17
17
 
18
18
  def to_s
19
- "#{keys} in table `#{table_name}` (referred from `#{referring_table_name}`)"
19
+ "row with keys #{keys} in table `#{table_name}` (referred from `#{referring_table_name}`)"
20
20
  end
21
21
 
22
22
  end
@@ -23,9 +23,10 @@ module DataSampler
23
23
  begin
24
24
  new_dependencies = 0
25
25
  @table_samples.values.each do |table_sample|
26
- if table_sample.ensure_referential_integrity(@table_samples)
27
- new_dependencies += 1
28
- warn " Found new dependents for table `#{table_sample.table_name}`"
26
+ newly_added = table_sample.ensure_referential_integrity(@table_samples)
27
+ if newly_added > 0
28
+ new_dependencies += newly_added
29
+ warn " Found #{newly_added} new dependents for table `#{table_sample.table_name}`"
29
30
  end
30
31
  end
31
32
  warn " Discovered #{new_dependencies} new dependencies" if new_dependencies > 0
@@ -23,11 +23,11 @@ module DataSampler
23
23
  end
24
24
 
25
25
  def fulfil(dependency)
26
- return if fulfilled?(dependency)
26
+ return 0 if fulfilled?(dependency)
27
27
  where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
28
28
  sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
29
29
  row = @connection.select_one(sql)
30
- raise "Could not find dependent row: #{dependency} (using SQL: #{sql})" if row.nil?
30
+ raise "Could not find #{dependency}" if row.nil?
31
31
  add row
32
32
  end
33
33
 
@@ -44,43 +44,47 @@ module DataSampler
44
44
  end
45
45
 
46
46
  def add(row)
47
- return false unless @sample.add? row
47
+ return 0 unless @sample.add? row
48
48
  @sampled_ids.add row['id'] if row['id']
49
- any_new = false
49
+ newly_added = 0
50
50
  dependencies_for(row).each do |dep|
51
- any_new = true if @pending_dependencies.add?(dep)
51
+ newly_added += 1 if @pending_dependencies.add?(dep)
52
52
  end
53
- any_new
53
+ newly_added
54
54
  rescue ActiveRecord::StatementInvalid => e
55
55
  # Don't choke on unknown table engines, such as Sphinx
56
56
  end
57
57
 
58
58
  def ensure_referential_integrity(table_samples)
59
- any_new = false
59
+ newly_added = 0
60
60
  deps_in_progress = @pending_dependencies
61
61
  @pending_dependencies = Set.new
62
62
  deps_in_progress.each do |dependency|
63
63
  raise "Table sample for #{dependency.table_name} not found" unless table_samples[dependency.table_name]
64
- any_new = true if table_samples[dependency.table_name].fulfil(dependency)
64
+ newly_added += table_samples[dependency.table_name].fulfil(dependency)
65
65
  end
66
- any_new
66
+ newly_added
67
67
  end
68
68
 
69
69
  def to_sql
70
- ret = ["-- #{@table_name}: #{@sample.count} rows"]
70
+ ret = "-- #{@table_name}: #{@sample.count} rows\n"
71
71
  unless @sample.empty?
72
72
  quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col }
73
- sql = "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','})"
74
- @sample.each do |row|
75
- quoted_vals = []
76
- row.each_pair do |field,val|
77
- val.gsub! /./, '*' if field.downcase == 'password'
78
- quoted_vals << @connection.quote(val)
79
- end
80
- ret << sql + " VALUES (#{quoted_vals * ','});"
73
+ # INSERT in batches of 1000
74
+ @sample.each_slice(1000) do |rows|
75
+ values = rows.collect { |row|
76
+ quoted_vals = []
77
+ row.each_pair do |field,val|
78
+ # HACK: Brute attempt at not revealing sensitive data
79
+ val.gsub! /./, '*' if field.downcase == 'password'
80
+ quoted_vals << @connection.quote(val)
81
+ end
82
+ quoted_vals * ','
83
+ } * '),('
84
+ ret << "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','}) VALUES (#{values});\n"
81
85
  end
82
86
  end
83
- ret * "\n"
87
+ ret
84
88
  end
85
89
 
86
90
  protected
@@ -1,3 +1,3 @@
1
1
  module DataSampler
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_sampler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: