data_miner 2.2.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 2.3.0 / 2012-06-21
2
+
3
+ * Enhancements
4
+
5
+ * Using https://github.com/seamusabshere/upsert to speed up import steps when possible.
6
+
1
7
  2.2.0 / 2012-06-11
2
8
 
3
9
  * Breaking changes
data/data_miner.gemspec CHANGED
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
23
23
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
24
24
  s.add_runtime_dependency 'errata', '>=1.0.1'
25
25
  s.add_runtime_dependency 'remote_table', '>=1.2.2'
26
+ s.add_runtime_dependency 'upsert'
26
27
 
27
28
  s.add_development_dependency 'dkastner-alchemist'
28
29
  s.add_development_dependency 'conversions'
@@ -31,7 +32,16 @@ Gem::Specification.new do |s|
31
32
  s.add_development_dependency 'lock_method'
32
33
  s.add_development_dependency 'minitest'
33
34
  s.add_development_dependency 'minitest-reporters'
34
- s.add_development_dependency 'mysql2'
35
35
  s.add_development_dependency 'rake'
36
36
  s.add_development_dependency 'yard'
37
+ if RUBY_PLATFORM == 'java'
38
+ s.add_development_dependency 'jruby-openssl'
39
+ s.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
40
+ s.add_development_dependency 'activerecord-jdbcmysql-adapter'
41
+ s.add_development_dependency 'activerecord-jdbcpostgresql-adapter'
42
+ else
43
+ s.add_development_dependency 'sqlite3'
44
+ s.add_development_dependency 'mysql2'
45
+ s.add_development_dependency 'pg'
46
+ end
37
47
  end
@@ -25,13 +25,7 @@ class DataMiner
25
25
  errors
26
26
  end
27
27
  end
28
-
29
- def number_column?
30
- return @number_column_query[0] if @number_column_query.is_a?(Array)
31
- @number_column_query = [model.columns_hash[name.to_s].number?]
32
- @number_column_query[0]
33
- end
34
-
28
+
35
29
  VALID_OPTIONS = [
36
30
  :from_units,
37
31
  :to_units,
@@ -211,22 +205,34 @@ class DataMiner
211
205
  end
212
206
  end
213
207
 
214
- # @private
208
+ # # @private
209
+ # TODO make sure that nil handling is replicated when using upsert
215
210
  def set_from_row(local_record, remote_row)
216
211
  previously_nil = local_record.send(name).nil?
217
212
  currently_nil = false
218
-
219
213
  if previously_nil or overwrite
220
214
  new_value = read remote_row
221
215
  local_record.send "#{name}=", new_value
222
216
  currently_nil = new_value.nil?
223
217
  end
224
-
225
218
  if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
226
219
  local_record.send "#{name}_units=", final_to_units
227
220
  end
228
221
  end
229
222
 
223
+ # @private
224
+ def updates(remote_row)
225
+ v = read remote_row
226
+ if persist_units?
227
+ v_units = unless v.nil?
228
+ to_units || read_units(remote_row)
229
+ end
230
+ { name => v, "#{name}_units" => v_units }
231
+ else
232
+ { name => v }
233
+ end
234
+ end
235
+
230
236
  # @private
231
237
  def read(row)
232
238
  if matcher and matcher_output = matcher.match(row)
@@ -316,7 +322,7 @@ class DataMiner
316
322
  def refresh
317
323
  @dictionary = nil
318
324
  end
319
-
325
+
320
326
  private
321
327
 
322
328
  def model
@@ -324,9 +330,15 @@ class DataMiner
324
330
  end
325
331
 
326
332
  def text_column?
327
- return @text_column_query[0] if @text_column_query.is_a?(Array)
333
+ return @text_column_query.first if @text_column_query.is_a?(Array)
328
334
  @text_column_query = [model.columns_hash[name.to_s].text?]
329
- @text_column_query[0]
335
+ @text_column_query.first
336
+ end
337
+
338
+ def number_column?
339
+ return @number_column_query.first if @number_column_query.is_a?(Array)
340
+ @number_column_query = [model.columns_hash[name.to_s].number?]
341
+ @number_column_query.first
330
342
  end
331
343
 
332
344
  def static?
@@ -1,5 +1,6 @@
1
1
  require 'errata'
2
2
  require 'remote_table'
3
+ require 'upsert'
3
4
 
4
5
  class DataMiner
5
6
  class Step
@@ -84,10 +85,25 @@ class DataMiner
84
85
 
85
86
  # @private
86
87
  def start
87
- table.each do |row|
88
- record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
89
- attributes.each { |_, attr| attr.set_from_row record, row }
90
- record.save!
88
+ if storing_primary_key? or table_has_autoincrementing_primary_key?
89
+ c = ActiveRecord::Base.connection_pool.checkout
90
+ Upsert.stream(c, model.table_name) do |upsert|
91
+ table.each do |row|
92
+ selector = { @key => attributes[@key].read(row) }
93
+ document = attributes.except(@key).inject({}) do |memo, (_, attr)|
94
+ memo.merge! attr.updates(row)
95
+ memo
96
+ end
97
+ upsert.row selector, document
98
+ end
99
+ end
100
+ ActiveRecord::Base.connection_pool.checkin c
101
+ else
102
+ table.each do |row|
103
+ record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
104
+ attributes.each { |_, attr| attr.set_from_row record, row }
105
+ record.save!
106
+ end
91
107
  end
92
108
  refresh
93
109
  nil
@@ -95,6 +111,21 @@ class DataMiner
95
111
 
96
112
  private
97
113
 
114
+ def table_has_autoincrementing_primary_key?
115
+ return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
116
+ answer = model.columns.any? do |column|
117
+ column.primary and column.sql_type =~ /\bint/i
118
+ end
119
+ @table_has_autoincrementing_primary_key_query = [answer]
120
+ answer
121
+ end
122
+
123
+ def storing_primary_key?
124
+ return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
125
+ @storing_primary_key_query = [attributes.has_key?(model.primary_key.to_sym)]
126
+ @storing_primary_key_query.first
127
+ end
128
+
98
129
  def table
99
130
  @table || @table_mutex.synchronize do
100
131
  @table ||= ::RemoteTable.new(@table_settings)
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.2.0'
2
+ VERSION = '2.3.0'
3
3
  end
data/test/helper.rb CHANGED
@@ -1,7 +1,9 @@
1
1
  require 'rubygems'
2
2
  require 'bundler/setup'
3
3
 
4
- if Bundler.definition.specs['ruby-debug19'].first or Bundler.definition.specs['ruby-debug'].first
4
+ if Bundler.definition.specs['debugger'].first
5
+ require 'debugger'
6
+ elsif Bundler.definition.specs['ruby-debug'].first
5
7
  require 'ruby-debug'
6
8
  end
7
9
 
@@ -16,12 +18,46 @@ require 'logger'
16
18
  ActiveRecord::Base.logger = Logger.new $stderr
17
19
  ActiveRecord::Base.logger.level = Logger::INFO
18
20
  # ActiveRecord::Base.logger.level = Logger::DEBUG
19
- ActiveRecord::Base.establish_connection(
20
- 'adapter' => 'mysql2',
21
- 'database' => 'data_miner_test',
22
- 'username' => 'root',
23
- 'password' => 'password'
24
- )
21
+
22
+ case ENV['DATABASE']
23
+ when /postgr/i
24
+ createdb_bin = ENV['TEST_CREATEDB_BIN'] || 'createdb'
25
+ dropdb_bin = ENV['TEST_DROPDB_BIN'] || 'dropdb'
26
+ username = ENV['TEST_POSTGRES_USERNAME'] || `whoami`.chomp
27
+ # password = ENV['TEST_POSTGRES_PASSWORD'] || 'password'
28
+ database = ENV['TEST_POSTGRES_DATABASE'] || 'data_miner_test'
29
+ system %{#{dropdb_bin} #{database}}
30
+ system %{#{createdb_bin} #{database}}
31
+ ActiveRecord::Base.establish_connection(
32
+ 'adapter' => 'postgresql',
33
+ 'encoding' => 'utf8',
34
+ 'database' => database,
35
+ 'username' => username
36
+ # 'password' => password
37
+ )
38
+ when /sqlite/i
39
+ ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
40
+ else
41
+ bin = ENV['TEST_MYSQL_BIN'] || 'mysql'
42
+ username = ENV['TEST_MYSQL_USERNAME'] || 'root'
43
+ password = ENV['TEST_MYSQL_PASSWORD'] || 'password'
44
+ database = ENV['TEST_MYSQL_DATABASE'] || 'data_miner_test'
45
+ cmd = "#{bin} -u #{username} -p#{password}"
46
+ `#{cmd} -e 'show databases'`
47
+ unless $?.success?
48
+ $stderr.puts "Skipping mysql tests because `#{cmd}` doesn't work"
49
+ exit 0
50
+ end
51
+ system %{#{cmd} -e "drop database #{database}"}
52
+ system %{#{cmd} -e "create database #{database}"}
53
+ ActiveRecord::Base.establish_connection(
54
+ 'adapter' => (RUBY_PLATFORM == 'java' ? 'mysql' : 'mysql2'),
55
+ 'encoding' => 'utf8',
56
+ 'database' => database,
57
+ 'username' => username,
58
+ 'password' => password
59
+ )
60
+ end
25
61
 
26
62
  ActiveRecord::Base.mass_assignment_sanitizer = :strict
27
63
 
@@ -43,7 +79,9 @@ end
43
79
  def init_models
44
80
  require 'support/breed'
45
81
  require 'support/pet'
82
+ require 'support/pet2'
46
83
  Pet.auto_upgrade!
84
+ Pet2.auto_upgrade!
47
85
 
48
86
  ActiveRecord::Base.descendants.each do |model|
49
87
  model.attr_accessible nil
@@ -5,7 +5,7 @@ class Breed < ActiveRecord::Base
5
5
  def update_average_age!
6
6
  # make sure pet is populated
7
7
  Pet.run_data_miner!
8
- update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
8
+ update_all %{"average_age" = (SELECT AVG("pets"."age") FROM "pets" WHERE "pets"."breed_id" = "breeds"."name")}
9
9
  end
10
10
  end
11
11
  self.primary_key = "name"
@@ -0,0 +1,2 @@
1
+ license_number,breed
2
+ 222,Beagle-Basset
@@ -0,0 +1,21 @@
1
+ BREED_BY_LICENSE_NUMBER = File.expand_path('../breed_by_license_number.csv', __FILE__)
2
+
3
+ class Pet2 < ActiveRecord::Base
4
+ self.primary_key = "name"
5
+ col :name
6
+ col :breed_id
7
+ col :license_number, :type => :integer
8
+
9
+ data_miner do
10
+ process :auto_upgrade!
11
+ process :run_data_miner_on_parent_associations!
12
+ import("A list of pets", :url => "file://#{PETS}") do
13
+ key :name
14
+ store :license_number
15
+ end
16
+ import("Breed numbers based on license number", :url => "file://#{BREED_BY_LICENSE_NUMBER}") do
17
+ key :license_number
18
+ store :breed_id, :field_name => :breed, :nullify_blank_strings => true
19
+ end
20
+ end
21
+ end
@@ -1,6 +1,6 @@
1
- name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
2
- Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
- Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
- Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
- Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
- Nemo,,,,,,,,
1
+ license_number,name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
2
+ 111,Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
+ 222,Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
+ 333,Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
+ 444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
+ 555,Nemo,,,,,,,,
@@ -108,5 +108,9 @@ describe DataMiner do
108
108
  Pet.data_miner_runs.first.row_count_before.must_equal 0
109
109
  Pet.data_miner_runs.first.row_count_after.must_equal 5
110
110
  end
111
+ it "can import based on keys other than the primary key" do
112
+ Pet2.run_data_miner!
113
+ Pet2.find('Jerry').breed_id.must_equal 'Beagle-Basset'
114
+ end
111
115
  end
112
116
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-06-11 00:00:00.000000000 Z
14
+ date: 2012-06-21 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: aasm
@@ -109,6 +109,22 @@ dependencies:
109
109
  - - ! '>='
110
110
  - !ruby/object:Gem::Version
111
111
  version: 1.2.2
112
+ - !ruby/object:Gem::Dependency
113
+ name: upsert
114
+ requirement: !ruby/object:Gem::Requirement
115
+ none: false
116
+ requirements:
117
+ - - ! '>='
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ type: :runtime
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ none: false
124
+ requirements:
125
+ - - ! '>='
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
112
128
  - !ruby/object:Gem::Dependency
113
129
  name: dkastner-alchemist
114
130
  requirement: !ruby/object:Gem::Requirement
@@ -222,7 +238,7 @@ dependencies:
222
238
  - !ruby/object:Gem::Version
223
239
  version: '0'
224
240
  - !ruby/object:Gem::Dependency
225
- name: mysql2
241
+ name: rake
226
242
  requirement: !ruby/object:Gem::Requirement
227
243
  none: false
228
244
  requirements:
@@ -238,7 +254,7 @@ dependencies:
238
254
  - !ruby/object:Gem::Version
239
255
  version: '0'
240
256
  - !ruby/object:Gem::Dependency
241
- name: rake
257
+ name: yard
242
258
  requirement: !ruby/object:Gem::Requirement
243
259
  none: false
244
260
  requirements:
@@ -254,7 +270,39 @@ dependencies:
254
270
  - !ruby/object:Gem::Version
255
271
  version: '0'
256
272
  - !ruby/object:Gem::Dependency
257
- name: yard
273
+ name: sqlite3
274
+ requirement: !ruby/object:Gem::Requirement
275
+ none: false
276
+ requirements:
277
+ - - ! '>='
278
+ - !ruby/object:Gem::Version
279
+ version: '0'
280
+ type: :development
281
+ prerelease: false
282
+ version_requirements: !ruby/object:Gem::Requirement
283
+ none: false
284
+ requirements:
285
+ - - ! '>='
286
+ - !ruby/object:Gem::Version
287
+ version: '0'
288
+ - !ruby/object:Gem::Dependency
289
+ name: mysql2
290
+ requirement: !ruby/object:Gem::Requirement
291
+ none: false
292
+ requirements:
293
+ - - ! '>='
294
+ - !ruby/object:Gem::Version
295
+ version: '0'
296
+ type: :development
297
+ prerelease: false
298
+ version_requirements: !ruby/object:Gem::Requirement
299
+ none: false
300
+ requirements:
301
+ - - ! '>='
302
+ - !ruby/object:Gem::Version
303
+ version: '0'
304
+ - !ruby/object:Gem::Dependency
305
+ name: pg
258
306
  requirement: !ruby/object:Gem::Requirement
259
307
  none: false
260
308
  requirements:
@@ -306,11 +354,13 @@ files:
306
354
  - test/data_miner/unit_converter/test_conversions.rb
307
355
  - test/helper.rb
308
356
  - test/support/breed.rb
357
+ - test/support/breed_by_license_number.csv
309
358
  - test/support/breeds.xls
310
359
  - test/support/data_miner_with_alchemist.rb
311
360
  - test/support/data_miner_with_conversions.rb
312
361
  - test/support/data_miner_without_unit_converter.rb
313
362
  - test/support/pet.rb
363
+ - test/support/pet2.rb
314
364
  - test/support/pet_color_dictionary.en.csv
315
365
  - test/support/pet_color_dictionary.es.csv
316
366
  - test/support/pets.csv
@@ -352,11 +402,13 @@ test_files:
352
402
  - test/data_miner/unit_converter/test_conversions.rb
353
403
  - test/helper.rb
354
404
  - test/support/breed.rb
405
+ - test/support/breed_by_license_number.csv
355
406
  - test/support/breeds.xls
356
407
  - test/support/data_miner_with_alchemist.rb
357
408
  - test/support/data_miner_with_conversions.rb
358
409
  - test/support/data_miner_without_unit_converter.rb
359
410
  - test/support/pet.rb
411
+ - test/support/pet2.rb
360
412
  - test/support/pet_color_dictionary.en.csv
361
413
  - test/support/pet_color_dictionary.es.csv
362
414
  - test/support/pets.csv