data_miner 2.2.0 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +6 -0
- data/data_miner.gemspec +11 -1
- data/lib/data_miner/attribute.rb +25 -13
- data/lib/data_miner/step/import.rb +35 -4
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +45 -7
- data/test/support/breed.rb +1 -1
- data/test/support/breed_by_license_number.csv +2 -0
- data/test/support/pet2.rb +21 -0
- data/test/support/pets.csv +6 -6
- data/test/test_data_miner.rb +4 -0
- metadata +57 -5
data/CHANGELOG
CHANGED
data/data_miner.gemspec
CHANGED
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
24
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
25
25
|
s.add_runtime_dependency 'remote_table', '>=1.2.2'
|
26
|
+
s.add_runtime_dependency 'upsert'
|
26
27
|
|
27
28
|
s.add_development_dependency 'dkastner-alchemist'
|
28
29
|
s.add_development_dependency 'conversions'
|
@@ -31,7 +32,16 @@ Gem::Specification.new do |s|
|
|
31
32
|
s.add_development_dependency 'lock_method'
|
32
33
|
s.add_development_dependency 'minitest'
|
33
34
|
s.add_development_dependency 'minitest-reporters'
|
34
|
-
s.add_development_dependency 'mysql2'
|
35
35
|
s.add_development_dependency 'rake'
|
36
36
|
s.add_development_dependency 'yard'
|
37
|
+
if RUBY_PLATFORM == 'java'
|
38
|
+
s.add_development_dependency 'jruby-openssl'
|
39
|
+
s.add_development_dependency 'activerecord-jdbcsqlite3-adapter'
|
40
|
+
s.add_development_dependency 'activerecord-jdbcmysql-adapter'
|
41
|
+
s.add_development_dependency 'activerecord-jdbcpostgresql-adapter'
|
42
|
+
else
|
43
|
+
s.add_development_dependency 'sqlite3'
|
44
|
+
s.add_development_dependency 'mysql2'
|
45
|
+
s.add_development_dependency 'pg'
|
46
|
+
end
|
37
47
|
end
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -25,13 +25,7 @@ class DataMiner
|
|
25
25
|
errors
|
26
26
|
end
|
27
27
|
end
|
28
|
-
|
29
|
-
def number_column?
|
30
|
-
return @number_column_query[0] if @number_column_query.is_a?(Array)
|
31
|
-
@number_column_query = [model.columns_hash[name.to_s].number?]
|
32
|
-
@number_column_query[0]
|
33
|
-
end
|
34
|
-
|
28
|
+
|
35
29
|
VALID_OPTIONS = [
|
36
30
|
:from_units,
|
37
31
|
:to_units,
|
@@ -211,22 +205,34 @@ class DataMiner
|
|
211
205
|
end
|
212
206
|
end
|
213
207
|
|
214
|
-
# @private
|
208
|
+
# # @private
|
209
|
+
# TODO make sure that nil handling is replicated when using upsert
|
215
210
|
def set_from_row(local_record, remote_row)
|
216
211
|
previously_nil = local_record.send(name).nil?
|
217
212
|
currently_nil = false
|
218
|
-
|
219
213
|
if previously_nil or overwrite
|
220
214
|
new_value = read remote_row
|
221
215
|
local_record.send "#{name}=", new_value
|
222
216
|
currently_nil = new_value.nil?
|
223
217
|
end
|
224
|
-
|
225
218
|
if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
|
226
219
|
local_record.send "#{name}_units=", final_to_units
|
227
220
|
end
|
228
221
|
end
|
229
222
|
|
223
|
+
# @private
|
224
|
+
def updates(remote_row)
|
225
|
+
v = read remote_row
|
226
|
+
if persist_units?
|
227
|
+
v_units = unless v.nil?
|
228
|
+
to_units || read_units(remote_row)
|
229
|
+
end
|
230
|
+
{ name => v, "#{name}_units" => v_units }
|
231
|
+
else
|
232
|
+
{ name => v }
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
230
236
|
# @private
|
231
237
|
def read(row)
|
232
238
|
if matcher and matcher_output = matcher.match(row)
|
@@ -316,7 +322,7 @@ class DataMiner
|
|
316
322
|
def refresh
|
317
323
|
@dictionary = nil
|
318
324
|
end
|
319
|
-
|
325
|
+
|
320
326
|
private
|
321
327
|
|
322
328
|
def model
|
@@ -324,9 +330,15 @@ class DataMiner
|
|
324
330
|
end
|
325
331
|
|
326
332
|
def text_column?
|
327
|
-
return @text_column_query
|
333
|
+
return @text_column_query.first if @text_column_query.is_a?(Array)
|
328
334
|
@text_column_query = [model.columns_hash[name.to_s].text?]
|
329
|
-
@text_column_query
|
335
|
+
@text_column_query.first
|
336
|
+
end
|
337
|
+
|
338
|
+
def number_column?
|
339
|
+
return @number_column_query.first if @number_column_query.is_a?(Array)
|
340
|
+
@number_column_query = [model.columns_hash[name.to_s].number?]
|
341
|
+
@number_column_query.first
|
330
342
|
end
|
331
343
|
|
332
344
|
def static?
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'errata'
|
2
2
|
require 'remote_table'
|
3
|
+
require 'upsert'
|
3
4
|
|
4
5
|
class DataMiner
|
5
6
|
class Step
|
@@ -84,10 +85,25 @@ class DataMiner
|
|
84
85
|
|
85
86
|
# @private
|
86
87
|
def start
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
88
|
+
if storing_primary_key? or table_has_autoincrementing_primary_key?
|
89
|
+
c = ActiveRecord::Base.connection_pool.checkout
|
90
|
+
Upsert.stream(c, model.table_name) do |upsert|
|
91
|
+
table.each do |row|
|
92
|
+
selector = { @key => attributes[@key].read(row) }
|
93
|
+
document = attributes.except(@key).inject({}) do |memo, (_, attr)|
|
94
|
+
memo.merge! attr.updates(row)
|
95
|
+
memo
|
96
|
+
end
|
97
|
+
upsert.row selector, document
|
98
|
+
end
|
99
|
+
end
|
100
|
+
ActiveRecord::Base.connection_pool.checkin c
|
101
|
+
else
|
102
|
+
table.each do |row|
|
103
|
+
record = model.send "find_or_initialize_by_#{@key}", attributes[@key].read(row)
|
104
|
+
attributes.each { |_, attr| attr.set_from_row record, row }
|
105
|
+
record.save!
|
106
|
+
end
|
91
107
|
end
|
92
108
|
refresh
|
93
109
|
nil
|
@@ -95,6 +111,21 @@ class DataMiner
|
|
95
111
|
|
96
112
|
private
|
97
113
|
|
114
|
+
def table_has_autoincrementing_primary_key?
|
115
|
+
return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
|
116
|
+
answer = model.columns.any? do |column|
|
117
|
+
column.primary and column.sql_type =~ /\bint/i
|
118
|
+
end
|
119
|
+
@table_has_autoincrementing_primary_key_query = [answer]
|
120
|
+
answer
|
121
|
+
end
|
122
|
+
|
123
|
+
def storing_primary_key?
|
124
|
+
return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
|
125
|
+
@storing_primary_key_query = [attributes.has_key?(model.primary_key.to_sym)]
|
126
|
+
@storing_primary_key_query.first
|
127
|
+
end
|
128
|
+
|
98
129
|
def table
|
99
130
|
@table || @table_mutex.synchronize do
|
100
131
|
@table ||= ::RemoteTable.new(@table_settings)
|
data/lib/data_miner/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'bundler/setup'
|
3
3
|
|
4
|
-
if Bundler.definition.specs['
|
4
|
+
if Bundler.definition.specs['debugger'].first
|
5
|
+
require 'debugger'
|
6
|
+
elsif Bundler.definition.specs['ruby-debug'].first
|
5
7
|
require 'ruby-debug'
|
6
8
|
end
|
7
9
|
|
@@ -16,12 +18,46 @@ require 'logger'
|
|
16
18
|
ActiveRecord::Base.logger = Logger.new $stderr
|
17
19
|
ActiveRecord::Base.logger.level = Logger::INFO
|
18
20
|
# ActiveRecord::Base.logger.level = Logger::DEBUG
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
'
|
23
|
-
'
|
24
|
-
|
21
|
+
|
22
|
+
case ENV['DATABASE']
|
23
|
+
when /postgr/i
|
24
|
+
createdb_bin = ENV['TEST_CREATEDB_BIN'] || 'createdb'
|
25
|
+
dropdb_bin = ENV['TEST_DROPDB_BIN'] || 'dropdb'
|
26
|
+
username = ENV['TEST_POSTGRES_USERNAME'] || `whoami`.chomp
|
27
|
+
# password = ENV['TEST_POSTGRES_PASSWORD'] || 'password'
|
28
|
+
database = ENV['TEST_POSTGRES_DATABASE'] || 'data_miner_test'
|
29
|
+
system %{#{dropdb_bin} #{database}}
|
30
|
+
system %{#{createdb_bin} #{database}}
|
31
|
+
ActiveRecord::Base.establish_connection(
|
32
|
+
'adapter' => 'postgresql',
|
33
|
+
'encoding' => 'utf8',
|
34
|
+
'database' => database,
|
35
|
+
'username' => username
|
36
|
+
# 'password' => password
|
37
|
+
)
|
38
|
+
when /sqlite/i
|
39
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
|
40
|
+
else
|
41
|
+
bin = ENV['TEST_MYSQL_BIN'] || 'mysql'
|
42
|
+
username = ENV['TEST_MYSQL_USERNAME'] || 'root'
|
43
|
+
password = ENV['TEST_MYSQL_PASSWORD'] || 'password'
|
44
|
+
database = ENV['TEST_MYSQL_DATABASE'] || 'data_miner_test'
|
45
|
+
cmd = "#{bin} -u #{username} -p#{password}"
|
46
|
+
`#{cmd} -e 'show databases'`
|
47
|
+
unless $?.success?
|
48
|
+
$stderr.puts "Skipping mysql tests because `#{cmd}` doesn't work"
|
49
|
+
exit 0
|
50
|
+
end
|
51
|
+
system %{#{cmd} -e "drop database #{database}"}
|
52
|
+
system %{#{cmd} -e "create database #{database}"}
|
53
|
+
ActiveRecord::Base.establish_connection(
|
54
|
+
'adapter' => (RUBY_PLATFORM == 'java' ? 'mysql' : 'mysql2'),
|
55
|
+
'encoding' => 'utf8',
|
56
|
+
'database' => database,
|
57
|
+
'username' => username,
|
58
|
+
'password' => password
|
59
|
+
)
|
60
|
+
end
|
25
61
|
|
26
62
|
ActiveRecord::Base.mass_assignment_sanitizer = :strict
|
27
63
|
|
@@ -43,7 +79,9 @@ end
|
|
43
79
|
def init_models
|
44
80
|
require 'support/breed'
|
45
81
|
require 'support/pet'
|
82
|
+
require 'support/pet2'
|
46
83
|
Pet.auto_upgrade!
|
84
|
+
Pet2.auto_upgrade!
|
47
85
|
|
48
86
|
ActiveRecord::Base.descendants.each do |model|
|
49
87
|
model.attr_accessible nil
|
data/test/support/breed.rb
CHANGED
@@ -5,7 +5,7 @@ class Breed < ActiveRecord::Base
|
|
5
5
|
def update_average_age!
|
6
6
|
# make sure pet is populated
|
7
7
|
Pet.run_data_miner!
|
8
|
-
update_all %{
|
8
|
+
update_all %{"average_age" = (SELECT AVG("pets"."age") FROM "pets" WHERE "pets"."breed_id" = "breeds"."name")}
|
9
9
|
end
|
10
10
|
end
|
11
11
|
self.primary_key = "name"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
BREED_BY_LICENSE_NUMBER = File.expand_path('../breed_by_license_number.csv', __FILE__)
|
2
|
+
|
3
|
+
class Pet2 < ActiveRecord::Base
|
4
|
+
self.primary_key = "name"
|
5
|
+
col :name
|
6
|
+
col :breed_id
|
7
|
+
col :license_number, :type => :integer
|
8
|
+
|
9
|
+
data_miner do
|
10
|
+
process :auto_upgrade!
|
11
|
+
process :run_data_miner_on_parent_associations!
|
12
|
+
import("A list of pets", :url => "file://#{PETS}") do
|
13
|
+
key :name
|
14
|
+
store :license_number
|
15
|
+
end
|
16
|
+
import("Breed numbers based on license number", :url => "file://#{BREED_BY_LICENSE_NUMBER}") do
|
17
|
+
key :license_number
|
18
|
+
store :breed_id, :field_name => :breed, :nullify_blank_strings => true
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/test/support/pets.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
-
Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
|
3
|
-
Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
|
4
|
-
Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
|
5
|
-
Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
|
6
|
-
Nemo,,,,,,,,
|
1
|
+
license_number,name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
+
111,Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
|
3
|
+
222,Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
|
4
|
+
333,Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
|
5
|
+
444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
|
6
|
+
555,Nemo,,,,,,,,
|
data/test/test_data_miner.rb
CHANGED
@@ -108,5 +108,9 @@ describe DataMiner do
|
|
108
108
|
Pet.data_miner_runs.first.row_count_before.must_equal 0
|
109
109
|
Pet.data_miner_runs.first.row_count_after.must_equal 5
|
110
110
|
end
|
111
|
+
it "can import based on keys other than the primary key" do
|
112
|
+
Pet2.run_data_miner!
|
113
|
+
Pet2.find('Jerry').breed_id.must_equal 'Beagle-Basset'
|
114
|
+
end
|
111
115
|
end
|
112
116
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-06-
|
14
|
+
date: 2012-06-21 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: aasm
|
@@ -109,6 +109,22 @@ dependencies:
|
|
109
109
|
- - ! '>='
|
110
110
|
- !ruby/object:Gem::Version
|
111
111
|
version: 1.2.2
|
112
|
+
- !ruby/object:Gem::Dependency
|
113
|
+
name: upsert
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
115
|
+
none: false
|
116
|
+
requirements:
|
117
|
+
- - ! '>='
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
type: :runtime
|
121
|
+
prerelease: false
|
122
|
+
version_requirements: !ruby/object:Gem::Requirement
|
123
|
+
none: false
|
124
|
+
requirements:
|
125
|
+
- - ! '>='
|
126
|
+
- !ruby/object:Gem::Version
|
127
|
+
version: '0'
|
112
128
|
- !ruby/object:Gem::Dependency
|
113
129
|
name: dkastner-alchemist
|
114
130
|
requirement: !ruby/object:Gem::Requirement
|
@@ -222,7 +238,7 @@ dependencies:
|
|
222
238
|
- !ruby/object:Gem::Version
|
223
239
|
version: '0'
|
224
240
|
- !ruby/object:Gem::Dependency
|
225
|
-
name:
|
241
|
+
name: rake
|
226
242
|
requirement: !ruby/object:Gem::Requirement
|
227
243
|
none: false
|
228
244
|
requirements:
|
@@ -238,7 +254,7 @@ dependencies:
|
|
238
254
|
- !ruby/object:Gem::Version
|
239
255
|
version: '0'
|
240
256
|
- !ruby/object:Gem::Dependency
|
241
|
-
name:
|
257
|
+
name: yard
|
242
258
|
requirement: !ruby/object:Gem::Requirement
|
243
259
|
none: false
|
244
260
|
requirements:
|
@@ -254,7 +270,39 @@ dependencies:
|
|
254
270
|
- !ruby/object:Gem::Version
|
255
271
|
version: '0'
|
256
272
|
- !ruby/object:Gem::Dependency
|
257
|
-
name:
|
273
|
+
name: sqlite3
|
274
|
+
requirement: !ruby/object:Gem::Requirement
|
275
|
+
none: false
|
276
|
+
requirements:
|
277
|
+
- - ! '>='
|
278
|
+
- !ruby/object:Gem::Version
|
279
|
+
version: '0'
|
280
|
+
type: :development
|
281
|
+
prerelease: false
|
282
|
+
version_requirements: !ruby/object:Gem::Requirement
|
283
|
+
none: false
|
284
|
+
requirements:
|
285
|
+
- - ! '>='
|
286
|
+
- !ruby/object:Gem::Version
|
287
|
+
version: '0'
|
288
|
+
- !ruby/object:Gem::Dependency
|
289
|
+
name: mysql2
|
290
|
+
requirement: !ruby/object:Gem::Requirement
|
291
|
+
none: false
|
292
|
+
requirements:
|
293
|
+
- - ! '>='
|
294
|
+
- !ruby/object:Gem::Version
|
295
|
+
version: '0'
|
296
|
+
type: :development
|
297
|
+
prerelease: false
|
298
|
+
version_requirements: !ruby/object:Gem::Requirement
|
299
|
+
none: false
|
300
|
+
requirements:
|
301
|
+
- - ! '>='
|
302
|
+
- !ruby/object:Gem::Version
|
303
|
+
version: '0'
|
304
|
+
- !ruby/object:Gem::Dependency
|
305
|
+
name: pg
|
258
306
|
requirement: !ruby/object:Gem::Requirement
|
259
307
|
none: false
|
260
308
|
requirements:
|
@@ -306,11 +354,13 @@ files:
|
|
306
354
|
- test/data_miner/unit_converter/test_conversions.rb
|
307
355
|
- test/helper.rb
|
308
356
|
- test/support/breed.rb
|
357
|
+
- test/support/breed_by_license_number.csv
|
309
358
|
- test/support/breeds.xls
|
310
359
|
- test/support/data_miner_with_alchemist.rb
|
311
360
|
- test/support/data_miner_with_conversions.rb
|
312
361
|
- test/support/data_miner_without_unit_converter.rb
|
313
362
|
- test/support/pet.rb
|
363
|
+
- test/support/pet2.rb
|
314
364
|
- test/support/pet_color_dictionary.en.csv
|
315
365
|
- test/support/pet_color_dictionary.es.csv
|
316
366
|
- test/support/pets.csv
|
@@ -352,11 +402,13 @@ test_files:
|
|
352
402
|
- test/data_miner/unit_converter/test_conversions.rb
|
353
403
|
- test/helper.rb
|
354
404
|
- test/support/breed.rb
|
405
|
+
- test/support/breed_by_license_number.csv
|
355
406
|
- test/support/breeds.xls
|
356
407
|
- test/support/data_miner_with_alchemist.rb
|
357
408
|
- test/support/data_miner_with_conversions.rb
|
358
409
|
- test/support/data_miner_without_unit_converter.rb
|
359
410
|
- test/support/pet.rb
|
411
|
+
- test/support/pet2.rb
|
360
412
|
- test/support/pet_color_dictionary.en.csv
|
361
413
|
- test/support/pet_color_dictionary.es.csv
|
362
414
|
- test/support/pets.csv
|