data_miner 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 2.3.0 / 2012-06-21
2
+
3
+ * Enhancements
4
+
5
+ * Using https://github.com/seamusabshere/upsert to speed up import steps when possible.
6
+
1
7
  2.2.0 / 2012-06-11
2
8
 
3
9
  * Breaking changes
data/data_miner.gemspec CHANGED
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
23
23
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
24
24
  s.add_runtime_dependency 'errata', '>=1.0.1'
25
25
  s.add_runtime_dependency 'remote_table', '>=1.2.2'
26
+ s.add_runtime_dependency 'upsert'
26
27
 
27
28
  s.add_development_dependency 'dkastner-alchemist'
28
29
  s.add_development_dependency 'conversions'
@@ -31,7 +32,16 @@ Gem::Specification.new do |s|
31
32
  s.add_development_dependency 'lock_method'
32
33
  s.add_development_dependency 'minitest'
33
34
  s.add_development_dependency 'minitest-reporters'
34
- s.add_development_dependency 'mysql2'
35
35
  s.add_development_dependency 'rake'
36
36
  s.add_development_dependency 'yard'
37
+ if RUBY_PLATFORM == 'java'
38
+ s.add_development_dependency 'jruby-openssl'
39
+ s.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
40
+ s.add_development_dependency 'activerecord-jdbcmysql-adapter'
41
+ s.add_development_dependency 'activerecord-jdbcpostgresql-adapter'
42
+ else
43
+ s.add_development_dependency 'sqlite3'
44
+ s.add_development_dependency 'mysql2'
45
+ s.add_development_dependency 'pg'
46
+ end
37
47
  end
@@ -25,13 +25,7 @@ class DataMiner
25
25
  errors
26
26
  end
27
27
  end
28
-
29
- def number_column?
30
- return @number_column_query[0] if @number_column_query.is_a?(Array)
31
- @number_column_query = [model.columns_hash[name.to_s].number?]
32
- @number_column_query[0]
33
- end
34
-
28
+
35
29
  VALID_OPTIONS = [
36
30
  :from_units,
37
31
  :to_units,
@@ -211,22 +205,34 @@ class DataMiner
211
205
  end
212
206
  end
213
207
 
214
- # @private
208
+ # # @private
209
+ # TODO make sure that nil handling is replicated when using upsert
215
210
  def set_from_row(local_record, remote_row)
216
211
  previously_nil = local_record.send(name).nil?
217
212
  currently_nil = false
218
-
219
213
  if previously_nil or overwrite
220
214
  new_value = read remote_row
221
215
  local_record.send "#{name}=", new_value
222
216
  currently_nil = new_value.nil?
223
217
  end
224
-
225
218
  if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
226
219
  local_record.send "#{name}_units=", final_to_units
227
220
  end
228
221
  end
229
222
 
223
+ # @private
224
+ def updates(remote_row)
225
+ v = read remote_row
226
+ if persist_units?
227
+ v_units = unless v.nil?
228
+ to_units || read_units(remote_row)
229
+ end
230
+ { name => v, "#{name}_units" => v_units }
231
+ else
232
+ { name => v }
233
+ end
234
+ end
235
+
230
236
  # @private
231
237
  def read(row)
232
238
  if matcher and matcher_output = matcher.match(row)
@@ -316,7 +322,7 @@ class DataMiner
316
322
  def refresh
317
323
  @dictionary = nil
318
324
  end
319
-
325
+
320
326
  private
321
327
 
322
328
  def model
@@ -324,9 +330,15 @@ class DataMiner
324
330
  end
325
331
 
326
332
  def text_column?
327
- return @text_column_query[0] if @text_column_query.is_a?(Array)
333
+ return @text_column_query.first if @text_column_query.is_a?(Array)
328
334
  @text_column_query = [model.columns_hash[name.to_s].text?]
329
- @text_column_query[0]
335
+ @text_column_query.first
336
+ end
337
+
338
+ def number_column?
339
+ return @number_column_query.first if @number_column_query.is_a?(Array)
340
+ @number_column_query = [model.columns_hash[name.to_s].number?]
341
+ @number_column_query.first
330
342
  end
331
343
 
332
344
  def static?
@@ -1,5 +1,6 @@
1
1
  require 'errata'
2
2
  require 'remote_table'
3
+ require 'upsert'
3
4
 
4
5
  class DataMiner
5
6
  class Step
@@ -84,10 +85,25 @@ class DataMiner
84
85
 
85
86
  # @private
86
87
  def start
87
- table.each do |row|
88
- record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
89
- attributes.each { |_, attr| attr.set_from_row record, row }
90
- record.save!
88
+ if storing_primary_key? or table_has_autoincrementing_primary_key?
89
+ c = ActiveRecord::Base.connection_pool.checkout
90
+ Upsert.stream(c, model.table_name) do |upsert|
91
+ table.each do |row|
92
+ selector = { @key => attributes[@key].read(row) }
93
+ document = attributes.except(@key).inject({}) do |memo, (_, attr)|
94
+ memo.merge! attr.updates(row)
95
+ memo
96
+ end
97
+ upsert.row selector, document
98
+ end
99
+ end
100
+ ActiveRecord::Base.connection_pool.checkin c
101
+ else
102
+ table.each do |row|
103
+ record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
104
+ attributes.each { |_, attr| attr.set_from_row record, row }
105
+ record.save!
106
+ end
91
107
  end
92
108
  refresh
93
109
  nil
@@ -95,6 +111,21 @@ class DataMiner
95
111
 
96
112
  private
97
113
 
114
+ def table_has_autoincrementing_primary_key?
115
+ return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
116
+ answer = model.columns.any? do |column|
117
+ column.primary and column.sql_type =~ /\bint/i
118
+ end
119
+ @table_has_autoincrementing_primary_key_query = [answer]
120
+ answer
121
+ end
122
+
123
+ def storing_primary_key?
124
+ return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
125
+ @storing_primary_key_query = [attributes.has_key?(model.primary_key.to_sym)]
126
+ @storing_primary_key_query.first
127
+ end
128
+
98
129
  def table
99
130
  @table || @table_mutex.synchronize do
100
131
  @table ||= ::RemoteTable.new(@table_settings)
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.2.0'
2
+ VERSION = '2.3.0'
3
3
  end
data/test/helper.rb CHANGED
@@ -1,7 +1,9 @@
1
1
  require 'rubygems'
2
2
  require 'bundler/setup'
3
3
 
4
- if Bundler.definition.specs['ruby-debug19'].first or Bundler.definition.specs['ruby-debug'].first
4
+ if Bundler.definition.specs['debugger'].first
5
+ require 'debugger'
6
+ elsif Bundler.definition.specs['ruby-debug'].first
5
7
  require 'ruby-debug'
6
8
  end
7
9
 
@@ -16,12 +18,46 @@ require 'logger'
16
18
  ActiveRecord::Base.logger = Logger.new $stderr
17
19
  ActiveRecord::Base.logger.level = Logger::INFO
18
20
  # ActiveRecord::Base.logger.level = Logger::DEBUG
19
- ActiveRecord::Base.establish_connection(
20
- 'adapter' => 'mysql2',
21
- 'database' => 'data_miner_test',
22
- 'username' => 'root',
23
- 'password' => 'password'
24
- )
21
+
22
+ case ENV['DATABASE']
23
+ when /postgr/i
24
+ createdb_bin = ENV['TEST_CREATEDB_BIN'] || 'createdb'
25
+ dropdb_bin = ENV['TEST_DROPDB_BIN'] || 'dropdb'
26
+ username = ENV['TEST_POSTGRES_USERNAME'] || `whoami`.chomp
27
+ # password = ENV['TEST_POSTGRES_PASSWORD'] || 'password'
28
+ database = ENV['TEST_POSTGRES_DATABASE'] || 'data_miner_test'
29
+ system %{#{dropdb_bin} #{database}}
30
+ system %{#{createdb_bin} #{database}}
31
+ ActiveRecord::Base.establish_connection(
32
+ 'adapter' => 'postgresql',
33
+ 'encoding' => 'utf8',
34
+ 'database' => database,
35
+ 'username' => username
36
+ # 'password' => password
37
+ )
38
+ when /sqlite/i
39
+ ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
40
+ else
41
+ bin = ENV['TEST_MYSQL_BIN'] || 'mysql'
42
+ username = ENV['TEST_MYSQL_USERNAME'] || 'root'
43
+ password = ENV['TEST_MYSQL_PASSWORD'] || 'password'
44
+ database = ENV['TEST_MYSQL_DATABASE'] || 'data_miner_test'
45
+ cmd = "#{bin} -u #{username} -p#{password}"
46
+ `#{cmd} -e 'show databases'`
47
+ unless $?.success?
48
+ $stderr.puts "Skipping mysql tests because `#{cmd}` doesn't work"
49
+ exit 0
50
+ end
51
+ system %{#{cmd} -e "drop database #{database}"}
52
+ system %{#{cmd} -e "create database #{database}"}
53
+ ActiveRecord::Base.establish_connection(
54
+ 'adapter' => (RUBY_PLATFORM == 'java' ? 'mysql' : 'mysql2'),
55
+ 'encoding' => 'utf8',
56
+ 'database' => database,
57
+ 'username' => username,
58
+ 'password' => password
59
+ )
60
+ end
25
61
 
26
62
  ActiveRecord::Base.mass_assignment_sanitizer = :strict
27
63
 
@@ -43,7 +79,9 @@ end
43
79
  def init_models
44
80
  require 'support/breed'
45
81
  require 'support/pet'
82
+ require 'support/pet2'
46
83
  Pet.auto_upgrade!
84
+ Pet2.auto_upgrade!
47
85
 
48
86
  ActiveRecord::Base.descendants.each do |model|
49
87
  model.attr_accessible nil
@@ -5,7 +5,7 @@ class Breed < ActiveRecord::Base
5
5
  def update_average_age!
6
6
  # make sure pet is populated
7
7
  Pet.run_data_miner!
8
- update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
8
+ update_all %{"average_age" = (SELECT AVG("pets"."age") FROM "pets" WHERE "pets"."breed_id" = "breeds"."name")}
9
9
  end
10
10
  end
11
11
  self.primary_key = "name"
@@ -0,0 +1,2 @@
1
+ license_number,breed
2
+ 222,Beagle-Basset
@@ -0,0 +1,21 @@
1
+ BREED_BY_LICENSE_NUMBER = File.expand_path('../breed_by_license_number.csv', __FILE__)
2
+
3
+ class Pet2 < ActiveRecord::Base
4
+ self.primary_key = "name"
5
+ col :name
6
+ col :breed_id
7
+ col :license_number, :type => :integer
8
+
9
+ data_miner do
10
+ process :auto_upgrade!
11
+ process :run_data_miner_on_parent_associations!
12
+ import("A list of pets", :url => "file://#{PETS}") do
13
+ key :name
14
+ store :license_number
15
+ end
16
+ import("Breed numbers based on license number", :url => "file://#{BREED_BY_LICENSE_NUMBER}") do
17
+ key :license_number
18
+ store :breed_id, :field_name => :breed, :nullify_blank_strings => true
19
+ end
20
+ end
21
+ end
@@ -1,6 +1,6 @@
1
- name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
2
- Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
- Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
- Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
- Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
- Nemo,,,,,,,,
1
+ license_number,name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
2
+ 111,Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
+ 222,Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
+ 333,Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
+ 444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
+ 555,Nemo,,,,,,,,
@@ -108,5 +108,9 @@ describe DataMiner do
108
108
  Pet.data_miner_runs.first.row_count_before.must_equal 0
109
109
  Pet.data_miner_runs.first.row_count_after.must_equal 5
110
110
  end
111
+ it "can import based on keys other than the primary key" do
112
+ Pet2.run_data_miner!
113
+ Pet2.find('Jerry').breed_id.must_equal 'Beagle-Basset'
114
+ end
111
115
  end
112
116
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-06-11 00:00:00.000000000 Z
14
+ date: 2012-06-21 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: aasm
@@ -109,6 +109,22 @@ dependencies:
109
109
  - - ! '>='
110
110
  - !ruby/object:Gem::Version
111
111
  version: 1.2.2
112
+ - !ruby/object:Gem::Dependency
113
+ name: upsert
114
+ requirement: !ruby/object:Gem::Requirement
115
+ none: false
116
+ requirements:
117
+ - - ! '>='
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ type: :runtime
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
112
128
  - !ruby/object:Gem::Dependency
113
129
  name: dkastner-alchemist
114
130
  requirement: !ruby/object:Gem::Requirement
@@ -222,7 +238,7 @@ dependencies:
222
238
  - !ruby/object:Gem::Version
223
239
  version: '0'
224
240
  - !ruby/object:Gem::Dependency
225
- name: mysql2
241
+ name: rake
226
242
  requirement: !ruby/object:Gem::Requirement
227
243
  none: false
228
244
  requirements:
@@ -238,7 +254,7 @@ dependencies:
238
254
  - !ruby/object:Gem::Version
239
255
  version: '0'
240
256
  - !ruby/object:Gem::Dependency
241
- name: rake
257
+ name: yard
242
258
  requirement: !ruby/object:Gem::Requirement
243
259
  none: false
244
260
  requirements:
@@ -254,7 +270,39 @@ dependencies:
254
270
  - !ruby/object:Gem::Version
255
271
  version: '0'
256
272
  - !ruby/object:Gem::Dependency
257
- name: yard
273
+ name: sqlite3
274
+ requirement: !ruby/object:Gem::Requirement
275
+ none: false
276
+ requirements:
277
+ - - ! '>='
278
+ - !ruby/object:Gem::Version
279
+ version: '0'
280
+ type: :development
281
+ prerelease: false
282
+ version_requirements: !ruby/object:Gem::Requirement
283
+ none: false
284
+ requirements:
285
+ - - ! '>='
286
+ - !ruby/object:Gem::Version
287
+ version: '0'
288
+ - !ruby/object:Gem::Dependency
289
+ name: mysql2
290
+ requirement: !ruby/object:Gem::Requirement
291
+ none: false
292
+ requirements:
293
+ - - ! '>='
294
+ - !ruby/object:Gem::Version
295
+ version: '0'
296
+ type: :development
297
+ prerelease: false
298
+ version_requirements: !ruby/object:Gem::Requirement
299
+ none: false
300
+ requirements:
301
+ - - ! '>='
302
+ - !ruby/object:Gem::Version
303
+ version: '0'
304
+ - !ruby/object:Gem::Dependency
305
+ name: pg
258
306
  requirement: !ruby/object:Gem::Requirement
259
307
  none: false
260
308
  requirements:
@@ -306,11 +354,13 @@ files:
306
354
  - test/data_miner/unit_converter/test_conversions.rb
307
355
  - test/helper.rb
308
356
  - test/support/breed.rb
357
+ - test/support/breed_by_license_number.csv
309
358
  - test/support/breeds.xls
310
359
  - test/support/data_miner_with_alchemist.rb
311
360
  - test/support/data_miner_with_conversions.rb
312
361
  - test/support/data_miner_without_unit_converter.rb
313
362
  - test/support/pet.rb
363
+ - test/support/pet2.rb
314
364
  - test/support/pet_color_dictionary.en.csv
315
365
  - test/support/pet_color_dictionary.es.csv
316
366
  - test/support/pets.csv
@@ -352,11 +402,13 @@ test_files:
352
402
  - test/data_miner/unit_converter/test_conversions.rb
353
403
  - test/helper.rb
354
404
  - test/support/breed.rb
405
+ - test/support/breed_by_license_number.csv
355
406
  - test/support/breeds.xls
356
407
  - test/support/data_miner_with_alchemist.rb
357
408
  - test/support/data_miner_with_conversions.rb
358
409
  - test/support/data_miner_without_unit_converter.rb
359
410
  - test/support/pet.rb
411
+ - test/support/pet2.rb
360
412
  - test/support/pet_color_dictionary.en.csv
361
413
  - test/support/pet_color_dictionary.es.csv
362
414
  - test/support/pets.csv