data_miner 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +6 -0
- data/data_miner.gemspec +11 -1
- data/lib/data_miner/attribute.rb +25 -13
- data/lib/data_miner/step/import.rb +35 -4
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +45 -7
- data/test/support/breed.rb +1 -1
- data/test/support/breed_by_license_number.csv +2 -0
- data/test/support/pet2.rb +21 -0
- data/test/support/pets.csv +6 -6
- data/test/test_data_miner.rb +4 -0
- metadata +57 -5
data/CHANGELOG
CHANGED
data/data_miner.gemspec
CHANGED
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
24
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
25
25
|
s.add_runtime_dependency 'remote_table', '>=1.2.2'
|
26
|
+
s.add_runtime_dependency 'upsert'
|
26
27
|
|
27
28
|
s.add_development_dependency 'dkastner-alchemist'
|
28
29
|
s.add_development_dependency 'conversions'
|
@@ -31,7 +32,16 @@ Gem::Specification.new do |s|
|
|
31
32
|
s.add_development_dependency 'lock_method'
|
32
33
|
s.add_development_dependency 'minitest'
|
33
34
|
s.add_development_dependency 'minitest-reporters'
|
34
|
-
s.add_development_dependency 'mysql2'
|
35
35
|
s.add_development_dependency 'rake'
|
36
36
|
s.add_development_dependency 'yard'
|
37
|
+
if RUBY_PLATFORM == 'java'
|
38
|
+
s.add_development_dependency 'jruby-openssl'
|
39
|
+
s.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
|
40
|
+
s.add_development_dependency 'activerecord-jdbcmysql-adapter'
|
41
|
+
s.add_development_dependency 'activerecord-jdbcpostgresql-adapter'
|
42
|
+
else
|
43
|
+
s.add_development_dependency 'sqlite3'
|
44
|
+
s.add_development_dependency 'mysql2'
|
45
|
+
s.add_development_dependency 'pg'
|
46
|
+
end
|
37
47
|
end
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -25,13 +25,7 @@ class DataMiner
|
|
25
25
|
errors
|
26
26
|
end
|
27
27
|
end
|
28
|
-
|
29
|
-
def number_column?
|
30
|
-
return @number_column_query[0] if @number_column_query.is_a?(Array)
|
31
|
-
@number_column_query = [model.columns_hash[name.to_s].number?]
|
32
|
-
@number_column_query[0]
|
33
|
-
end
|
34
|
-
|
28
|
+
|
35
29
|
VALID_OPTIONS = [
|
36
30
|
:from_units,
|
37
31
|
:to_units,
|
@@ -211,22 +205,34 @@ class DataMiner
|
|
211
205
|
end
|
212
206
|
end
|
213
207
|
|
214
|
-
# @private
|
208
|
+
# # @private
|
209
|
+
# TODO make sure that nil handling is replicated when using upsert
|
215
210
|
def set_from_row(local_record, remote_row)
|
216
211
|
previously_nil = local_record.send(name).nil?
|
217
212
|
currently_nil = false
|
218
|
-
|
219
213
|
if previously_nil or overwrite
|
220
214
|
new_value = read remote_row
|
221
215
|
local_record.send "#{name}=", new_value
|
222
216
|
currently_nil = new_value.nil?
|
223
217
|
end
|
224
|
-
|
225
218
|
if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
|
226
219
|
local_record.send "#{name}_units=", final_to_units
|
227
220
|
end
|
228
221
|
end
|
229
222
|
|
223
|
+
# @private
|
224
|
+
def updates(remote_row)
|
225
|
+
v = read remote_row
|
226
|
+
if persist_units?
|
227
|
+
v_units = unless v.nil?
|
228
|
+
to_units || read_units(remote_row)
|
229
|
+
end
|
230
|
+
{ name => v, "#{name}_units" => v_units }
|
231
|
+
else
|
232
|
+
{ name => v }
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
230
236
|
# @private
|
231
237
|
def read(row)
|
232
238
|
if matcher and matcher_output = matcher.match(row)
|
@@ -316,7 +322,7 @@ class DataMiner
|
|
316
322
|
def refresh
|
317
323
|
@dictionary = nil
|
318
324
|
end
|
319
|
-
|
325
|
+
|
320
326
|
private
|
321
327
|
|
322
328
|
def model
|
@@ -324,9 +330,15 @@ class DataMiner
|
|
324
330
|
end
|
325
331
|
|
326
332
|
def text_column?
|
327
|
-
return @text_column_query
|
333
|
+
return @text_column_query.first if @text_column_query.is_a?(Array)
|
328
334
|
@text_column_query = [model.columns_hash[name.to_s].text?]
|
329
|
-
@text_column_query
|
335
|
+
@text_column_query.first
|
336
|
+
end
|
337
|
+
|
338
|
+
def number_column?
|
339
|
+
return @number_column_query.first if @number_column_query.is_a?(Array)
|
340
|
+
@number_column_query = [model.columns_hash[name.to_s].number?]
|
341
|
+
@number_column_query.first
|
330
342
|
end
|
331
343
|
|
332
344
|
def static?
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'errata'
|
2
2
|
require 'remote_table'
|
3
|
+
require 'upsert'
|
3
4
|
|
4
5
|
class DataMiner
|
5
6
|
class Step
|
@@ -84,10 +85,25 @@ class DataMiner
|
|
84
85
|
|
85
86
|
# @private
|
86
87
|
def start
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
88
|
+
if storing_primary_key? or table_has_autoincrementing_primary_key?
|
89
|
+
c = ActiveRecord::Base.connection_pool.checkout
|
90
|
+
Upsert.stream(c, model.table_name) do |upsert|
|
91
|
+
table.each do |row|
|
92
|
+
selector = { @key => attributes[@key].read(row) }
|
93
|
+
document = attributes.except(@key).inject({}) do |memo, (_, attr)|
|
94
|
+
memo.merge! attr.updates(row)
|
95
|
+
memo
|
96
|
+
end
|
97
|
+
upsert.row selector, document
|
98
|
+
end
|
99
|
+
end
|
100
|
+
ActiveRecord::Base.connection_pool.checkin c
|
101
|
+
else
|
102
|
+
table.each do |row|
|
103
|
+
record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
|
104
|
+
attributes.each { |_, attr| attr.set_from_row record, row }
|
105
|
+
record.save!
|
106
|
+
end
|
91
107
|
end
|
92
108
|
refresh
|
93
109
|
nil
|
@@ -95,6 +111,21 @@ class DataMiner
|
|
95
111
|
|
96
112
|
private
|
97
113
|
|
114
|
+
def table_has_autoincrementing_primary_key?
|
115
|
+
return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
|
116
|
+
answer = model.columns.any? do |column|
|
117
|
+
column.primary and column.sql_type =~ /\bint/i
|
118
|
+
end
|
119
|
+
@table_has_autoincrementing_primary_key_query = [answer]
|
120
|
+
answer
|
121
|
+
end
|
122
|
+
|
123
|
+
def storing_primary_key?
|
124
|
+
return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
|
125
|
+
@storing_primary_key_query = [attributes.has_key?(model.primary_key.to_sym)]
|
126
|
+
@storing_primary_key_query.first
|
127
|
+
end
|
128
|
+
|
98
129
|
def table
|
99
130
|
@table || @table_mutex.synchronize do
|
100
131
|
@table ||= ::RemoteTable.new(@table_settings)
|
data/lib/data_miner/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bundler/setup'
|
3
3
|
|
4
|
-
if Bundler.definition.specs['
|
4
|
+
if Bundler.definition.specs['debugger'].first
|
5
|
+
require 'debugger'
|
6
|
+
elsif Bundler.definition.specs['ruby-debug'].first
|
5
7
|
require 'ruby-debug'
|
6
8
|
end
|
7
9
|
|
@@ -16,12 +18,46 @@ require 'logger'
|
|
16
18
|
ActiveRecord::Base.logger = Logger.new $stderr
|
17
19
|
ActiveRecord::Base.logger.level = Logger::INFO
|
18
20
|
# ActiveRecord::Base.logger.level = Logger::DEBUG
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
'
|
23
|
-
'
|
24
|
-
|
21
|
+
|
22
|
+
case ENV['DATABASE']
|
23
|
+
when /postgr/i
|
24
|
+
createdb_bin = ENV['TEST_CREATEDB_BIN'] || 'createdb'
|
25
|
+
dropdb_bin = ENV['TEST_DROPDB_BIN'] || 'dropdb'
|
26
|
+
username = ENV['TEST_POSTGRES_USERNAME'] || `whoami`.chomp
|
27
|
+
# password = ENV['TEST_POSTGRES_PASSWORD'] || 'password'
|
28
|
+
database = ENV['TEST_POSTGRES_DATABASE'] || 'data_miner_test'
|
29
|
+
system %{#{dropdb_bin} #{database}}
|
30
|
+
system %{#{createdb_bin} #{database}}
|
31
|
+
ActiveRecord::Base.establish_connection(
|
32
|
+
'adapter' => 'postgresql',
|
33
|
+
'encoding' => 'utf8',
|
34
|
+
'database' => database,
|
35
|
+
'username' => username
|
36
|
+
# 'password' => password
|
37
|
+
)
|
38
|
+
when /sqlite/i
|
39
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
|
40
|
+
else
|
41
|
+
bin = ENV['TEST_MYSQL_BIN'] || 'mysql'
|
42
|
+
username = ENV['TEST_MYSQL_USERNAME'] || 'root'
|
43
|
+
password = ENV['TEST_MYSQL_PASSWORD'] || 'password'
|
44
|
+
database = ENV['TEST_MYSQL_DATABASE'] || 'data_miner_test'
|
45
|
+
cmd = "#{bin} -u #{username} -p#{password}"
|
46
|
+
`#{cmd} -e 'show databases'`
|
47
|
+
unless $?.success?
|
48
|
+
$stderr.puts "Skipping mysql tests because `#{cmd}` doesn't work"
|
49
|
+
exit 0
|
50
|
+
end
|
51
|
+
system %{#{cmd} -e "drop database #{database}"}
|
52
|
+
system %{#{cmd} -e "create database #{database}"}
|
53
|
+
ActiveRecord::Base.establish_connection(
|
54
|
+
'adapter' => (RUBY_PLATFORM == 'java' ? 'mysql' : 'mysql2'),
|
55
|
+
'encoding' => 'utf8',
|
56
|
+
'database' => database,
|
57
|
+
'username' => username,
|
58
|
+
'password' => password
|
59
|
+
)
|
60
|
+
end
|
25
61
|
|
26
62
|
ActiveRecord::Base.mass_assignment_sanitizer = :strict
|
27
63
|
|
@@ -43,7 +79,9 @@ end
|
|
43
79
|
def init_models
|
44
80
|
require 'support/breed'
|
45
81
|
require 'support/pet'
|
82
|
+
require 'support/pet2'
|
46
83
|
Pet.auto_upgrade!
|
84
|
+
Pet2.auto_upgrade!
|
47
85
|
|
48
86
|
ActiveRecord::Base.descendants.each do |model|
|
49
87
|
model.attr_accessible nil
|
data/test/support/breed.rb
CHANGED
@@ -5,7 +5,7 @@ class Breed < ActiveRecord::Base
|
|
5
5
|
def update_average_age!
|
6
6
|
# make sure pet is populated
|
7
7
|
Pet.run_data_miner!
|
8
|
-
update_all %{
|
8
|
+
update_all %{"average_age" = (SELECT AVG("pets"."age") FROM "pets" WHERE "pets"."breed_id" = "breeds"."name")}
|
9
9
|
end
|
10
10
|
end
|
11
11
|
self.primary_key = "name"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
BREED_BY_LICENSE_NUMBER = File.expand_path('../breed_by_license_number.csv', __FILE__)
|
2
|
+
|
3
|
+
class Pet2 < ActiveRecord::Base
|
4
|
+
self.primary_key = "name"
|
5
|
+
col :name
|
6
|
+
col :breed_id
|
7
|
+
col :license_number, :type => :integer
|
8
|
+
|
9
|
+
data_miner do
|
10
|
+
process :auto_upgrade!
|
11
|
+
process :run_data_miner_on_parent_associations!
|
12
|
+
import("A list of pets", :url => "file://#{PETS}") do
|
13
|
+
key :name
|
14
|
+
store :license_number
|
15
|
+
end
|
16
|
+
import("Breed numbers based on license number", :url => "file://#{BREED_BY_LICENSE_NUMBER}") do
|
17
|
+
key :license_number
|
18
|
+
store :breed_id, :field_name => :breed, :nullify_blank_strings => true
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/test/support/pets.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
-
Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
|
3
|
-
Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
|
4
|
-
Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
|
5
|
-
Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
|
6
|
-
Nemo,,,,,,,,
|
1
|
+
license_number,name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
+
111,Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
|
3
|
+
222,Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
|
4
|
+
333,Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
|
5
|
+
444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
|
6
|
+
555,Nemo,,,,,,,,
|
data/test/test_data_miner.rb
CHANGED
@@ -108,5 +108,9 @@ describe DataMiner do
|
|
108
108
|
Pet.data_miner_runs.first.row_count_before.must_equal 0
|
109
109
|
Pet.data_miner_runs.first.row_count_after.must_equal 5
|
110
110
|
end
|
111
|
+
it "can import based on keys other than the primary key" do
|
112
|
+
Pet2.run_data_miner!
|
113
|
+
Pet2.find('Jerry').breed_id.must_equal 'Beagle-Basset'
|
114
|
+
end
|
111
115
|
end
|
112
116
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-06-
|
14
|
+
date: 2012-06-21 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: aasm
|
@@ -109,6 +109,22 @@ dependencies:
|
|
109
109
|
- - ! '>='
|
110
110
|
- !ruby/object:Gem::Version
|
111
111
|
version: 1.2.2
|
112
|
+
- !ruby/object:Gem::Dependency
|
113
|
+
name: upsert
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
type: :runtime
|
121
|
+
prerelease: false
|
122
|
+
version_requirements: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ! '>='
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
112
128
|
- !ruby/object:Gem::Dependency
|
113
129
|
name: dkastner-alchemist
|
114
130
|
requirement: !ruby/object:Gem::Requirement
|
@@ -222,7 +238,7 @@ dependencies:
|
|
222
238
|
- !ruby/object:Gem::Version
|
223
239
|
version: '0'
|
224
240
|
- !ruby/object:Gem::Dependency
|
225
|
-
name:
|
241
|
+
name: rake
|
226
242
|
requirement: !ruby/object:Gem::Requirement
|
227
243
|
none: false
|
228
244
|
requirements:
|
@@ -238,7 +254,7 @@ dependencies:
|
|
238
254
|
- !ruby/object:Gem::Version
|
239
255
|
version: '0'
|
240
256
|
- !ruby/object:Gem::Dependency
|
241
|
-
name:
|
257
|
+
name: yard
|
242
258
|
requirement: !ruby/object:Gem::Requirement
|
243
259
|
none: false
|
244
260
|
requirements:
|
@@ -254,7 +270,39 @@ dependencies:
|
|
254
270
|
- !ruby/object:Gem::Version
|
255
271
|
version: '0'
|
256
272
|
- !ruby/object:Gem::Dependency
|
257
|
-
name:
|
273
|
+
name: sqlite3
|
274
|
+
requirement: !ruby/object:Gem::Requirement
|
275
|
+
none: false
|
276
|
+
requirements:
|
277
|
+
- - ! '>='
|
278
|
+
- !ruby/object:Gem::Version
|
279
|
+
version: '0'
|
280
|
+
type: :development
|
281
|
+
prerelease: false
|
282
|
+
version_requirements: !ruby/object:Gem::Requirement
|
283
|
+
none: false
|
284
|
+
requirements:
|
285
|
+
- - ! '>='
|
286
|
+
- !ruby/object:Gem::Version
|
287
|
+
version: '0'
|
288
|
+
- !ruby/object:Gem::Dependency
|
289
|
+
name: mysql2
|
290
|
+
requirement: !ruby/object:Gem::Requirement
|
291
|
+
none: false
|
292
|
+
requirements:
|
293
|
+
- - ! '>='
|
294
|
+
- !ruby/object:Gem::Version
|
295
|
+
version: '0'
|
296
|
+
type: :development
|
297
|
+
prerelease: false
|
298
|
+
version_requirements: !ruby/object:Gem::Requirement
|
299
|
+
none: false
|
300
|
+
requirements:
|
301
|
+
- - ! '>='
|
302
|
+
- !ruby/object:Gem::Version
|
303
|
+
version: '0'
|
304
|
+
- !ruby/object:Gem::Dependency
|
305
|
+
name: pg
|
258
306
|
requirement: !ruby/object:Gem::Requirement
|
259
307
|
none: false
|
260
308
|
requirements:
|
@@ -306,11 +354,13 @@ files:
|
|
306
354
|
- test/data_miner/unit_converter/test_conversions.rb
|
307
355
|
- test/helper.rb
|
308
356
|
- test/support/breed.rb
|
357
|
+
- test/support/breed_by_license_number.csv
|
309
358
|
- test/support/breeds.xls
|
310
359
|
- test/support/data_miner_with_alchemist.rb
|
311
360
|
- test/support/data_miner_with_conversions.rb
|
312
361
|
- test/support/data_miner_without_unit_converter.rb
|
313
362
|
- test/support/pet.rb
|
363
|
+
- test/support/pet2.rb
|
314
364
|
- test/support/pet_color_dictionary.en.csv
|
315
365
|
- test/support/pet_color_dictionary.es.csv
|
316
366
|
- test/support/pets.csv
|
@@ -352,11 +402,13 @@ test_files:
|
|
352
402
|
- test/data_miner/unit_converter/test_conversions.rb
|
353
403
|
- test/helper.rb
|
354
404
|
- test/support/breed.rb
|
405
|
+
- test/support/breed_by_license_number.csv
|
355
406
|
- test/support/breeds.xls
|
356
407
|
- test/support/data_miner_with_alchemist.rb
|
357
408
|
- test/support/data_miner_with_conversions.rb
|
358
409
|
- test/support/data_miner_without_unit_converter.rb
|
359
410
|
- test/support/pet.rb
|
411
|
+
- test/support/pet2.rb
|
360
412
|
- test/support/pet_color_dictionary.en.csv
|
361
413
|
- test/support/pet_color_dictionary.es.csv
|
362
414
|
- test/support/pets.csv
|