data_miner 2.1.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +11 -0
- data/Gemfile +2 -9
- data/data_miner.gemspec +14 -4
- data/lib/data_miner.rb +18 -0
- data/lib/data_miner/attribute.rb +58 -27
- data/lib/data_miner/unit_converter.rb +12 -0
- data/lib/data_miner/unit_converter/alchemist.rb +11 -0
- data/lib/data_miner/unit_converter/conversions.rb +11 -0
- data/lib/data_miner/version.rb +1 -1
- data/test/data_miner/test_attribute.rb +36 -0
- data/test/data_miner/unit_converter/test_alchemist.rb +13 -0
- data/test/data_miner/unit_converter/test_conversions.rb +15 -0
- data/test/helper.rb +17 -64
- data/test/support/breed.rb +21 -0
- data/test/support/data_miner_with_alchemist.rb +13 -0
- data/test/support/data_miner_with_conversions.rb +16 -0
- data/test/support/data_miner_without_unit_converter.rb +51 -0
- data/test/support/pet.rb +34 -0
- data/test/support/pets.csv +4 -4
- data/test/test_data_miner.rb +11 -2
- data/test/test_data_miner_run_column_statistic.rb +2 -0
- data/test/test_earth_import.rb +2 -0
- data/test/test_earth_tap.rb +2 -0
- data/test/test_safety.rb +2 -0
- data/test/test_unit_conversion.rb +16 -0
- metadata +181 -16
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
2.2.0 / 2012-06-11
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* You must specify DataMiner.units_convert = {:alchemist,:conversions}
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* Swappable unit conversion libraries [@dkastner]
|
10
|
+
* Intelligent parsing of commas and periods in number fields [@ihough]
|
11
|
+
|
1
12
|
2.1.2 / 2012-05-22
|
2
13
|
|
3
14
|
* Breaking changes
|
data/Gemfile
CHANGED
@@ -2,15 +2,8 @@ source :rubygems
|
|
2
2
|
|
3
3
|
gemspec
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
gem 'minitest'
|
8
|
-
gem 'minitest-reporters'
|
9
|
-
gem 'mysql2'
|
10
|
-
gem 'rake'
|
11
|
-
gem 'yard'
|
12
|
-
gem 'earth'
|
13
|
-
gem 'lock_method'
|
5
|
+
gem 'conversions'
|
6
|
+
|
14
7
|
if RUBY_VERSION >= '1.9'
|
15
8
|
gem 'unicode_utils'
|
16
9
|
end
|
data/data_miner.gemspec
CHANGED
@@ -17,11 +17,21 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
18
|
s.require_paths = ["lib"]
|
19
19
|
|
20
|
-
s.add_runtime_dependency '
|
20
|
+
s.add_runtime_dependency 'aasm'
|
21
|
+
s.add_runtime_dependency 'active_record_inline_schema'
|
21
22
|
s.add_runtime_dependency 'activerecord', '>=2.3.4'
|
22
23
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
23
|
-
s.add_runtime_dependency 'conversions', '>=1.4.4'
|
24
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
25
|
-
s.add_runtime_dependency '
|
26
|
-
|
25
|
+
s.add_runtime_dependency 'remote_table', '>=1.2.2'
|
26
|
+
|
27
|
+
s.add_development_dependency 'dkastner-alchemist'
|
28
|
+
s.add_development_dependency 'conversions'
|
29
|
+
s.add_development_dependency 'earth'
|
30
|
+
s.add_development_dependency 'fuzzy_match'
|
31
|
+
s.add_development_dependency 'lock_method'
|
32
|
+
s.add_development_dependency 'minitest'
|
33
|
+
s.add_development_dependency 'minitest-reporters'
|
34
|
+
s.add_development_dependency 'mysql2'
|
35
|
+
s.add_development_dependency 'rake'
|
36
|
+
s.add_development_dependency 'yard'
|
27
37
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -23,6 +23,7 @@ require 'data_miner/step/import'
|
|
23
23
|
require 'data_miner/step/tap'
|
24
24
|
require 'data_miner/step/process'
|
25
25
|
require 'data_miner/run'
|
26
|
+
require 'data_miner/unit_converter'
|
26
27
|
|
27
28
|
# A singleton class that holds global configuration for data mining.
|
28
29
|
#
|
@@ -45,6 +46,23 @@ class DataMiner
|
|
45
46
|
def compress_whitespace(str)
|
46
47
|
str.gsub(INNER_SPACE, ' ').strip
|
47
48
|
end
|
49
|
+
|
50
|
+
# Set the unit converter.
|
51
|
+
#
|
52
|
+
# @note As of 2012-05-30, there are problems with the alchemist gem and the use of the conversions gem instead is recommended.
|
53
|
+
#
|
54
|
+
# @param [Symbol,nil] conversion_library Either +:alchemist+ or +:conversions+
|
55
|
+
#
|
56
|
+
# @return [nil]
|
57
|
+
def unit_converter=(conversion_library)
|
58
|
+
@unit_converter = UnitConverter.load conversion_library
|
59
|
+
nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# @return [#convert,nil] The user-selected unit converter or nil.
|
63
|
+
def unit_converter
|
64
|
+
@unit_converter
|
65
|
+
end
|
48
66
|
end
|
49
67
|
|
50
68
|
INNER_SPACE = /[ ]+/
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'conversions'
|
2
|
-
|
3
1
|
class DataMiner
|
4
2
|
# A mapping between a local model column and a remote data source column.
|
5
3
|
#
|
@@ -17,13 +15,23 @@ class DataMiner
|
|
17
15
|
if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
|
18
16
|
errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
|
19
17
|
end
|
20
|
-
|
18
|
+
units_options = options.select { |k, _| k.to_s.include?('units') }
|
19
|
+
if units_options.any? and DataMiner.unit_converter.nil?
|
20
|
+
errors << %{You must set DataMiner.unit_converter to :alchemist or :conversions if you wish to convert units}
|
21
|
+
end
|
22
|
+
if units_options.any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
|
21
23
|
errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
|
22
24
|
end
|
23
25
|
errors
|
24
26
|
end
|
25
27
|
end
|
26
|
-
|
28
|
+
|
29
|
+
def number_column?
|
30
|
+
return @number_column_query[0] if @number_column_query.is_a?(Array)
|
31
|
+
@number_column_query = [model.columns_hash[name.to_s].number?]
|
32
|
+
@number_column_query[0]
|
33
|
+
end
|
34
|
+
|
27
35
|
VALID_OPTIONS = [
|
28
36
|
:from_units,
|
29
37
|
:to_units,
|
@@ -47,12 +55,12 @@ class DataMiner
|
|
47
55
|
]
|
48
56
|
|
49
57
|
VALID_UNIT_DEFINITION_SETS = [
|
50
|
-
[:units],
|
51
|
-
[:from_units, :to_units],
|
52
|
-
[:units_field_name],
|
53
|
-
[:units_field_name, :to_units],
|
54
|
-
[:units_field_number],
|
55
|
-
[:units_field_number, :to_units],
|
58
|
+
[:units], # no conversion
|
59
|
+
[:from_units, :to_units], # yes
|
60
|
+
[:units_field_name], # no
|
61
|
+
[:units_field_name, :to_units], # yes
|
62
|
+
[:units_field_number], # no
|
63
|
+
[:units_field_number, :to_units], # yes
|
56
64
|
]
|
57
65
|
|
58
66
|
DEFAULT_SPLIT_PATTERN = /\s+/
|
@@ -111,14 +119,15 @@ class DataMiner
|
|
111
119
|
# @return [Hash]
|
112
120
|
attr_reader :split
|
113
121
|
|
114
|
-
# Final units. May invoke a conversion using https://
|
122
|
+
# Final units. May invoke a conversion using https://rubygems.org/gems/alchemist
|
115
123
|
#
|
116
124
|
# If a local column named +[name]_units+ exists, it will be populated with this value.
|
117
125
|
#
|
118
126
|
# @return [Symbol]
|
119
127
|
attr_reader :to_units
|
120
128
|
|
121
|
-
# Initial units. May invoke a conversion using https://
|
129
|
+
# Initial units. May invoke a conversion using a conversion gem like https://rubygems.org/gems/alchemist
|
130
|
+
# Be sure to set DataMiner.unit_converter
|
122
131
|
# @return [Symbol]
|
123
132
|
attr_reader :from_units
|
124
133
|
|
@@ -186,6 +195,8 @@ class DataMiner
|
|
186
195
|
@overwrite = options.fetch :overwrite, DEFAULT_OVERWRITE
|
187
196
|
@units_field_name = options[:units_field_name]
|
188
197
|
@units_field_number = options[:units_field_number]
|
198
|
+
@convert_boolean = (@from_units.present? or (@to_units.present? and (@units_field_name.present? or @units_field_number.present?)))
|
199
|
+
@persist_units_boolean = (@to_units.present? or @units_field_name.present? or @units_field_number.present?)
|
189
200
|
@dictionary_mutex = ::Mutex.new
|
190
201
|
end
|
191
202
|
|
@@ -211,7 +222,7 @@ class DataMiner
|
|
211
222
|
currently_nil = new_value.nil?
|
212
223
|
end
|
213
224
|
|
214
|
-
if not currently_nil and
|
225
|
+
if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
|
215
226
|
local_record.send "#{name}_units=", final_to_units
|
216
227
|
end
|
217
228
|
end
|
@@ -244,6 +255,21 @@ class DataMiner
|
|
244
255
|
return value
|
245
256
|
end
|
246
257
|
value = value.to_s
|
258
|
+
if number_column?
|
259
|
+
period_position = value.rindex '.'
|
260
|
+
comma_position = value.rindex ','
|
261
|
+
# assume that ',' is a thousands separator and '.' is a decimal point unless we have evidence to the contrary
|
262
|
+
if period_position and comma_position and comma_position > period_position
|
263
|
+
# uncommon euro style 1.000,53
|
264
|
+
value = value.delete('.').gsub(',', '.')
|
265
|
+
elsif comma_position and comma_position > (value.length - 4)
|
266
|
+
# uncommon euro style 1000,53
|
267
|
+
value = value.gsub(',', '.')
|
268
|
+
elsif comma_position
|
269
|
+
# more common 1,000[.00] style - still don't want commas
|
270
|
+
value = value.delete(',')
|
271
|
+
end
|
272
|
+
end
|
247
273
|
if chars
|
248
274
|
value = value[chars]
|
249
275
|
end
|
@@ -252,7 +278,7 @@ class DataMiner
|
|
252
278
|
keep = split.fetch :keep, DEFAULT_SPLIT_KEEP
|
253
279
|
value = value.to_s.split(pattern)[keep].to_s
|
254
280
|
end
|
255
|
-
if value.blank? and (not
|
281
|
+
if value.blank? and (not text_column? or nullify_blank_strings)
|
256
282
|
return
|
257
283
|
end
|
258
284
|
value = DataMiner.compress_whitespace value
|
@@ -260,12 +286,7 @@ class DataMiner
|
|
260
286
|
value = DataMiner.upcase value
|
261
287
|
end
|
262
288
|
if convert?
|
263
|
-
|
264
|
-
final_to_units = to_units || read_units(row)
|
265
|
-
if final_from_units.blank? or final_to_units.blank?
|
266
|
-
raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
267
|
-
end
|
268
|
-
value = value.to_f.convert final_from_units, final_to_units
|
289
|
+
value = convert_units value, row
|
269
290
|
end
|
270
291
|
if sprintf
|
271
292
|
if sprintf.end_with?('f')
|
@@ -281,6 +302,16 @@ class DataMiner
|
|
281
302
|
value
|
282
303
|
end
|
283
304
|
|
305
|
+
# @private
|
306
|
+
def convert_units(value, row)
|
307
|
+
final_from_units = from_units || read_units(row)
|
308
|
+
final_to_units = to_units || read_units(row)
|
309
|
+
unless final_from_units and final_to_units
|
310
|
+
raise RuntimeError, "[data_miner] Missing units: from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
311
|
+
end
|
312
|
+
DataMiner.unit_converter.convert value, final_from_units, final_to_units
|
313
|
+
end
|
314
|
+
|
284
315
|
# @private
|
285
316
|
def refresh
|
286
317
|
@dictionary = nil
|
@@ -292,10 +323,10 @@ class DataMiner
|
|
292
323
|
step.model
|
293
324
|
end
|
294
325
|
|
295
|
-
def
|
296
|
-
return @
|
297
|
-
@
|
298
|
-
@
|
326
|
+
def text_column?
|
327
|
+
return @text_column_query[0] if @text_column_query.is_a?(Array)
|
328
|
+
@text_column_query = [model.columns_hash[name.to_s].text?]
|
329
|
+
@text_column_query[0]
|
299
330
|
end
|
300
331
|
|
301
332
|
def static?
|
@@ -307,11 +338,11 @@ class DataMiner
|
|
307
338
|
end
|
308
339
|
|
309
340
|
def convert?
|
310
|
-
|
341
|
+
@convert_boolean
|
311
342
|
end
|
312
343
|
|
313
|
-
def
|
314
|
-
|
344
|
+
def persist_units?
|
345
|
+
@persist_units_boolean
|
315
346
|
end
|
316
347
|
|
317
348
|
def read_units(row)
|
data/lib/data_miner/version.rb
CHANGED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe DataMiner::Attribute do
|
4
|
+
before do
|
5
|
+
DataMiner.unit_converter = :alchemist
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#convert?' do
|
9
|
+
it 'returns true if from_units is set' do
|
10
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
|
11
|
+
assert attribute.send(:convert?)
|
12
|
+
end
|
13
|
+
it 'returns true if to_units and units_field_name are set' do
|
14
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar', :to_units => :kilograms
|
15
|
+
assert attribute.send(:convert?)
|
16
|
+
end
|
17
|
+
it 'returns true if to_units and units_field_number are set' do
|
18
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 3, :to_units => :kilograms
|
19
|
+
assert attribute.send(:convert?)
|
20
|
+
end
|
21
|
+
it 'returns false if units_field_name only is set' do
|
22
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar'
|
23
|
+
refute attribute.send(:convert?)
|
24
|
+
end
|
25
|
+
it 'returns false if units_field_number only is set' do
|
26
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 'bar'
|
27
|
+
refute attribute.send(:convert?)
|
28
|
+
end
|
29
|
+
it 'raises if no converter and units are used' do
|
30
|
+
DataMiner.unit_converter = nil
|
31
|
+
lambda {
|
32
|
+
DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
|
33
|
+
}.must_raise ArgumentError, /unit_converter/
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner::UnitConverter::Alchemist' do
|
4
|
+
before do
|
5
|
+
DataMiner.unit_converter = :alchemist
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#convert' do
|
9
|
+
it 'converts a value from one unit to another' do
|
10
|
+
DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner::UnitConverter::Conversions' do
|
4
|
+
before do
|
5
|
+
#DataMiner.unit_converter = :conversions
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#convert' do
|
9
|
+
it 'converts a value from one unit to another' do
|
10
|
+
# can't load both alchemist and conversions in same test run
|
11
|
+
# see test/test_unit_conversion for coverage of this adapter
|
12
|
+
#DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/helper.rb
CHANGED
@@ -11,11 +11,6 @@ require 'minitest/reporters'
|
|
11
11
|
MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
12
12
|
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
13
13
|
|
14
|
-
cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
|
15
|
-
$stderr.puts "Running `#{cmd}`..."
|
16
|
-
system cmd
|
17
|
-
$stderr.puts "Done."
|
18
|
-
|
19
14
|
require 'active_record'
|
20
15
|
require 'logger'
|
21
16
|
ActiveRecord::Base.logger = Logger.new $stderr
|
@@ -31,68 +26,26 @@ ActiveRecord::Base.establish_connection(
|
|
31
26
|
ActiveRecord::Base.mass_assignment_sanitizer = :strict
|
32
27
|
|
33
28
|
require 'data_miner'
|
34
|
-
DataMiner::Run.auto_upgrade!
|
35
|
-
DataMiner::Run::ColumnStatistic.auto_upgrade!
|
36
|
-
DataMiner::Run.clear_locks
|
37
29
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
30
|
+
def init_database(unit_converter = :conversions)
|
31
|
+
cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
|
32
|
+
$stderr.puts "Running `#{cmd}`..."
|
33
|
+
system cmd
|
34
|
+
$stderr.puts "Done."
|
43
35
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
col :breed_id
|
48
|
-
col :color_id
|
49
|
-
col :age, :type => :integer
|
50
|
-
col :age_units
|
51
|
-
col :weight, :type => :float
|
52
|
-
col :weight_units
|
53
|
-
col :height, :type => :integer
|
54
|
-
col :height_units
|
55
|
-
col :favorite_food
|
56
|
-
col :command_phrase
|
57
|
-
belongs_to :breed
|
58
|
-
data_miner do
|
59
|
-
process :auto_upgrade!
|
60
|
-
process :run_data_miner_on_parent_associations!
|
61
|
-
import("A list of pets", :url => "file://#{PETS}") do
|
62
|
-
key :name
|
63
|
-
store :age, :units_field_name => 'age_units'
|
64
|
-
store :breed_id, :field_name => :breed, :nullify_blank_strings => true
|
65
|
-
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
66
|
-
store :weight, :from_units => :pounds, :to_units => :kilograms
|
67
|
-
store :favorite_food, :nullify_blank_strings => true
|
68
|
-
store :command_phrase
|
69
|
-
store :height, :units => :centimetres
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
36
|
+
DataMiner::Run.auto_upgrade!
|
37
|
+
DataMiner::Run::ColumnStatistic.auto_upgrade!
|
38
|
+
DataMiner::Run.clear_locks
|
73
39
|
|
74
|
-
|
75
|
-
class << self
|
76
|
-
def update_average_age!
|
77
|
-
# make sure pet is populated
|
78
|
-
Pet.run_data_miner!
|
79
|
-
update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
|
80
|
-
end
|
81
|
-
end
|
82
|
-
self.primary_key = "name"
|
83
|
-
col :name
|
84
|
-
col :average_age, :type => :float
|
85
|
-
data_miner do
|
86
|
-
process :auto_upgrade!
|
87
|
-
import("A list of breeds", :url => "file://#{BREEDS}") do
|
88
|
-
key :name, :field_name => 'Breed name'
|
89
|
-
end
|
90
|
-
process :update_average_age!
|
91
|
-
end
|
40
|
+
DataMiner.unit_converter = unit_converter
|
92
41
|
end
|
93
42
|
|
94
|
-
|
95
|
-
|
96
|
-
|
43
|
+
def init_models
|
44
|
+
require 'support/breed'
|
45
|
+
require 'support/pet'
|
46
|
+
Pet.auto_upgrade!
|
97
47
|
|
98
|
-
|
48
|
+
ActiveRecord::Base.descendants.each do |model|
|
49
|
+
model.attr_accessible nil
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
BREEDS = File.expand_path('../breeds.xls', __FILE__)
|
2
|
+
|
3
|
+
class Breed < ActiveRecord::Base
|
4
|
+
class << self
|
5
|
+
def update_average_age!
|
6
|
+
# make sure pet is populated
|
7
|
+
Pet.run_data_miner!
|
8
|
+
update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
|
9
|
+
end
|
10
|
+
end
|
11
|
+
self.primary_key = "name"
|
12
|
+
col :name
|
13
|
+
col :average_age, :type => :float
|
14
|
+
data_miner do
|
15
|
+
process :auto_upgrade!
|
16
|
+
import("A list of breeds", :url => "file://#{BREEDS}") do
|
17
|
+
key :name, :field_name => 'Breed name'
|
18
|
+
end
|
19
|
+
process :update_average_age!
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner with Alchemist' do
|
4
|
+
before do
|
5
|
+
init_database(:alchemist)
|
6
|
+
init_models
|
7
|
+
Pet.run_data_miner!
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'converts convertible units' do
|
11
|
+
Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to.kilograms.to_f
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
require 'conversions'
|
4
|
+
Conversions.register :years, :years, 1
|
5
|
+
|
6
|
+
describe 'DataMiner with Conversions' do
|
7
|
+
before do
|
8
|
+
init_database(:conversions)
|
9
|
+
init_models
|
10
|
+
Pet.run_data_miner!
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'converts convertible units' do
|
14
|
+
Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to(:kilograms)
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class MyPet < ActiveRecord::Base
|
4
|
+
PETS = File.expand_path('../pets.csv', __FILE__)
|
5
|
+
COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
|
6
|
+
|
7
|
+
self.primary_key = "name"
|
8
|
+
col :name
|
9
|
+
col :color_id
|
10
|
+
col :age, :type => :integer
|
11
|
+
col :age_units
|
12
|
+
col :weight, :type => :float
|
13
|
+
col :weight_units
|
14
|
+
col :height, :type => :integer
|
15
|
+
col :height_units
|
16
|
+
col :favorite_food
|
17
|
+
col :command_phrase
|
18
|
+
|
19
|
+
data_miner do
|
20
|
+
process :auto_upgrade!
|
21
|
+
import("A list of pets", :url => "file://#{PETS}") do
|
22
|
+
key :name
|
23
|
+
store :age
|
24
|
+
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
25
|
+
store :weight
|
26
|
+
store :favorite_food, :nullify_blank_strings => true
|
27
|
+
store :command_phrase
|
28
|
+
store :height, :units => :centimetres
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'DataMiner with Conversions' do
|
34
|
+
it 'happens when DataMiner.unit_converter is nil' do
|
35
|
+
DataMiner.unit_converter.must_be_nil
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'converts convertible units' do
|
39
|
+
init_database(nil)
|
40
|
+
MyPet.run_data_miner!
|
41
|
+
MyPet.find('Pierre').weight.must_equal 4.4
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'raises an error if conversions are attempted' do
|
45
|
+
init_database(nil)
|
46
|
+
lambda do
|
47
|
+
init_models
|
48
|
+
Pet.run_data_miner!
|
49
|
+
end.must_raise DataMiner::Attribute::NoConverterSet
|
50
|
+
end
|
51
|
+
end
|
data/test/support/pet.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PETS = File.expand_path('../pets.csv', __FILE__)
|
2
|
+
PETS_FUNNY = File.expand_path('../pets_funny.csv', __FILE__)
|
3
|
+
COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
|
4
|
+
COLOR_DICTIONARY_SPANISH = File.expand_path('../pet_color_dictionary.es.csv', __FILE__)
|
5
|
+
|
6
|
+
class Pet < ActiveRecord::Base
|
7
|
+
self.primary_key = "name"
|
8
|
+
col :name
|
9
|
+
col :breed_id
|
10
|
+
col :color_id
|
11
|
+
col :age, :type => :integer
|
12
|
+
col :age_units
|
13
|
+
col :weight, :type => :float
|
14
|
+
col :weight_units
|
15
|
+
col :height, :type => :float
|
16
|
+
col :height_units
|
17
|
+
col :favorite_food
|
18
|
+
col :command_phrase
|
19
|
+
belongs_to :breed
|
20
|
+
data_miner do
|
21
|
+
process :auto_upgrade!
|
22
|
+
process :run_data_miner_on_parent_associations!
|
23
|
+
import("A list of pets", :url => "file://#{PETS}") do
|
24
|
+
key :name
|
25
|
+
store :age, :units_field_name => 'age_units'
|
26
|
+
store :breed_id, :field_name => :breed, :nullify_blank_strings => true
|
27
|
+
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
28
|
+
store :weight, :from_units => :pounds, :to_units => :kilograms
|
29
|
+
store :favorite_food, :nullify_blank_strings => true
|
30
|
+
store :command_phrase
|
31
|
+
store :height, :units => :millimetres
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/test/support/pets.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
-
Pierre,Tabby,GO,4,years,4.4,
|
3
|
-
Jerry,Beagle,BR/BL,5,years,10,
|
4
|
-
Amigo,Spanish Lizarto,GR/BU,17,years," ",
|
5
|
-
Johnny,Beagle,BR/BL,2,years,20,
|
2
|
+
Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
|
3
|
+
Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
|
4
|
+
Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
|
5
|
+
Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
|
6
6
|
Nemo,,,,,,,,
|
data/test/test_data_miner.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'helper'
|
3
|
+
init_database
|
4
|
+
init_models
|
3
5
|
|
4
6
|
describe DataMiner do
|
5
7
|
describe "when used to import example data about pets" do
|
@@ -65,15 +67,22 @@ describe DataMiner do
|
|
65
67
|
Breed.run_data_miner!
|
66
68
|
Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
|
67
69
|
end
|
70
|
+
it "properly interprets numbers using comma or period separators" do
|
71
|
+
Pet.run_data_miner!
|
72
|
+
Pet.find('Pierre').height.must_equal 3000.5
|
73
|
+
Pet.find('Jerry').height.must_equal 3000.0
|
74
|
+
Pet.find('Amigo').height.must_equal 300.5
|
75
|
+
Pet.find('Johnny').height.must_equal 4000.0
|
76
|
+
end
|
68
77
|
it "performs unit conversions" do
|
69
78
|
Pet.run_data_miner!
|
70
|
-
Pet.find('Pierre').weight.must_be_close_to
|
79
|
+
Pet.find('Pierre').weight.must_be_close_to 1.9958 # 4.4 pounds in kilograms
|
71
80
|
end
|
72
81
|
it "sets units" do
|
73
82
|
Pet.run_data_miner!
|
74
83
|
Pet.find('Pierre').age_units.must_equal 'years'
|
75
84
|
Pet.find('Pierre').weight_units.must_equal 'kilograms'
|
76
|
-
Pet.find('Pierre').height_units.must_equal '
|
85
|
+
Pet.find('Pierre').height_units.must_equal 'millimetres'
|
77
86
|
end
|
78
87
|
it "always nullifies numeric columns when blank/nil is the input" do
|
79
88
|
Pet.run_data_miner!
|
data/test/test_earth_import.rb
CHANGED
data/test/test_earth_tap.rb
CHANGED
data/test/test_safety.rb
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner unit conversion' do
|
4
|
+
it "blows up if you don't specify a converter" do
|
5
|
+
output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_without_unit_converter.rb', __FILE__)}`
|
6
|
+
refute $?.success?, output
|
7
|
+
end
|
8
|
+
it 'can convert with alchemist' do
|
9
|
+
output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_alchemist.rb', __FILE__)}`
|
10
|
+
assert $?.success?, output
|
11
|
+
end
|
12
|
+
it 'can convert with conversions' do
|
13
|
+
output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_conversions.rb', __FILE__)}`
|
14
|
+
assert $?.success?, output
|
15
|
+
end
|
16
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,16 +11,16 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-06-11 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name:
|
17
|
+
name: aasm
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: '0'
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -28,7 +28,23 @@ dependencies:
|
|
28
28
|
requirements:
|
29
29
|
- - ! '>='
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
31
|
+
version: '0'
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: active_record_inline_schema
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
32
48
|
- !ruby/object:Gem::Dependency
|
33
49
|
name: activerecord
|
34
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,13 +78,13 @@ dependencies:
|
|
62
78
|
- !ruby/object:Gem::Version
|
63
79
|
version: 2.3.4
|
64
80
|
- !ruby/object:Gem::Dependency
|
65
|
-
name:
|
81
|
+
name: errata
|
66
82
|
requirement: !ruby/object:Gem::Requirement
|
67
83
|
none: false
|
68
84
|
requirements:
|
69
85
|
- - ! '>='
|
70
86
|
- !ruby/object:Gem::Version
|
71
|
-
version: 1.
|
87
|
+
version: 1.0.1
|
72
88
|
type: :runtime
|
73
89
|
prerelease: false
|
74
90
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -76,15 +92,15 @@ dependencies:
|
|
76
92
|
requirements:
|
77
93
|
- - ! '>='
|
78
94
|
- !ruby/object:Gem::Version
|
79
|
-
version: 1.
|
95
|
+
version: 1.0.1
|
80
96
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
97
|
+
name: remote_table
|
82
98
|
requirement: !ruby/object:Gem::Requirement
|
83
99
|
none: false
|
84
100
|
requirements:
|
85
101
|
- - ! '>='
|
86
102
|
- !ruby/object:Gem::Version
|
87
|
-
version: 1.
|
103
|
+
version: 1.2.2
|
88
104
|
type: :runtime
|
89
105
|
prerelease: false
|
90
106
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -92,16 +108,16 @@ dependencies:
|
|
92
108
|
requirements:
|
93
109
|
- - ! '>='
|
94
110
|
- !ruby/object:Gem::Version
|
95
|
-
version: 1.
|
111
|
+
version: 1.2.2
|
96
112
|
- !ruby/object:Gem::Dependency
|
97
|
-
name:
|
113
|
+
name: dkastner-alchemist
|
98
114
|
requirement: !ruby/object:Gem::Requirement
|
99
115
|
none: false
|
100
116
|
requirements:
|
101
117
|
- - ! '>='
|
102
118
|
- !ruby/object:Gem::Version
|
103
119
|
version: '0'
|
104
|
-
type: :
|
120
|
+
type: :development
|
105
121
|
prerelease: false
|
106
122
|
version_requirements: !ruby/object:Gem::Requirement
|
107
123
|
none: false
|
@@ -110,14 +126,142 @@ dependencies:
|
|
110
126
|
- !ruby/object:Gem::Version
|
111
127
|
version: '0'
|
112
128
|
- !ruby/object:Gem::Dependency
|
113
|
-
name:
|
129
|
+
name: conversions
|
114
130
|
requirement: !ruby/object:Gem::Requirement
|
115
131
|
none: false
|
116
132
|
requirements:
|
117
133
|
- - ! '>='
|
118
134
|
- !ruby/object:Gem::Version
|
119
135
|
version: '0'
|
120
|
-
type: :
|
136
|
+
type: :development
|
137
|
+
prerelease: false
|
138
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ! '>='
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
- !ruby/object:Gem::Dependency
|
145
|
+
name: earth
|
146
|
+
requirement: !ruby/object:Gem::Requirement
|
147
|
+
none: false
|
148
|
+
requirements:
|
149
|
+
- - ! '>='
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '0'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
none: false
|
156
|
+
requirements:
|
157
|
+
- - ! '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
- !ruby/object:Gem::Dependency
|
161
|
+
name: fuzzy_match
|
162
|
+
requirement: !ruby/object:Gem::Requirement
|
163
|
+
none: false
|
164
|
+
requirements:
|
165
|
+
- - ! '>='
|
166
|
+
- !ruby/object:Gem::Version
|
167
|
+
version: '0'
|
168
|
+
type: :development
|
169
|
+
prerelease: false
|
170
|
+
version_requirements: !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
- !ruby/object:Gem::Dependency
|
177
|
+
name: lock_method
|
178
|
+
requirement: !ruby/object:Gem::Requirement
|
179
|
+
none: false
|
180
|
+
requirements:
|
181
|
+
- - ! '>='
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
version: '0'
|
184
|
+
type: :development
|
185
|
+
prerelease: false
|
186
|
+
version_requirements: !ruby/object:Gem::Requirement
|
187
|
+
none: false
|
188
|
+
requirements:
|
189
|
+
- - ! '>='
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
version: '0'
|
192
|
+
- !ruby/object:Gem::Dependency
|
193
|
+
name: minitest
|
194
|
+
requirement: !ruby/object:Gem::Requirement
|
195
|
+
none: false
|
196
|
+
requirements:
|
197
|
+
- - ! '>='
|
198
|
+
- !ruby/object:Gem::Version
|
199
|
+
version: '0'
|
200
|
+
type: :development
|
201
|
+
prerelease: false
|
202
|
+
version_requirements: !ruby/object:Gem::Requirement
|
203
|
+
none: false
|
204
|
+
requirements:
|
205
|
+
- - ! '>='
|
206
|
+
- !ruby/object:Gem::Version
|
207
|
+
version: '0'
|
208
|
+
- !ruby/object:Gem::Dependency
|
209
|
+
name: minitest-reporters
|
210
|
+
requirement: !ruby/object:Gem::Requirement
|
211
|
+
none: false
|
212
|
+
requirements:
|
213
|
+
- - ! '>='
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
type: :development
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
none: false
|
220
|
+
requirements:
|
221
|
+
- - ! '>='
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: '0'
|
224
|
+
- !ruby/object:Gem::Dependency
|
225
|
+
name: mysql2
|
226
|
+
requirement: !ruby/object:Gem::Requirement
|
227
|
+
none: false
|
228
|
+
requirements:
|
229
|
+
- - ! '>='
|
230
|
+
- !ruby/object:Gem::Version
|
231
|
+
version: '0'
|
232
|
+
type: :development
|
233
|
+
prerelease: false
|
234
|
+
version_requirements: !ruby/object:Gem::Requirement
|
235
|
+
none: false
|
236
|
+
requirements:
|
237
|
+
- - ! '>='
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: '0'
|
240
|
+
- !ruby/object:Gem::Dependency
|
241
|
+
name: rake
|
242
|
+
requirement: !ruby/object:Gem::Requirement
|
243
|
+
none: false
|
244
|
+
requirements:
|
245
|
+
- - ! '>='
|
246
|
+
- !ruby/object:Gem::Version
|
247
|
+
version: '0'
|
248
|
+
type: :development
|
249
|
+
prerelease: false
|
250
|
+
version_requirements: !ruby/object:Gem::Requirement
|
251
|
+
none: false
|
252
|
+
requirements:
|
253
|
+
- - ! '>='
|
254
|
+
- !ruby/object:Gem::Version
|
255
|
+
version: '0'
|
256
|
+
- !ruby/object:Gem::Dependency
|
257
|
+
name: yard
|
258
|
+
requirement: !ruby/object:Gem::Requirement
|
259
|
+
none: false
|
260
|
+
requirements:
|
261
|
+
- - ! '>='
|
262
|
+
- !ruby/object:Gem::Version
|
263
|
+
version: '0'
|
264
|
+
type: :development
|
121
265
|
prerelease: false
|
122
266
|
version_requirements: !ruby/object:Gem::Requirement
|
123
267
|
none: false
|
@@ -153,9 +297,20 @@ files:
|
|
153
297
|
- lib/data_miner/step/import.rb
|
154
298
|
- lib/data_miner/step/process.rb
|
155
299
|
- lib/data_miner/step/tap.rb
|
300
|
+
- lib/data_miner/unit_converter.rb
|
301
|
+
- lib/data_miner/unit_converter/alchemist.rb
|
302
|
+
- lib/data_miner/unit_converter/conversions.rb
|
156
303
|
- lib/data_miner/version.rb
|
304
|
+
- test/data_miner/test_attribute.rb
|
305
|
+
- test/data_miner/unit_converter/test_alchemist.rb
|
306
|
+
- test/data_miner/unit_converter/test_conversions.rb
|
157
307
|
- test/helper.rb
|
308
|
+
- test/support/breed.rb
|
158
309
|
- test/support/breeds.xls
|
310
|
+
- test/support/data_miner_with_alchemist.rb
|
311
|
+
- test/support/data_miner_with_conversions.rb
|
312
|
+
- test/support/data_miner_without_unit_converter.rb
|
313
|
+
- test/support/pet.rb
|
159
314
|
- test/support/pet_color_dictionary.en.csv
|
160
315
|
- test/support/pet_color_dictionary.es.csv
|
161
316
|
- test/support/pets.csv
|
@@ -165,6 +320,7 @@ files:
|
|
165
320
|
- test/test_earth_import.rb
|
166
321
|
- test/test_earth_tap.rb
|
167
322
|
- test/test_safety.rb
|
323
|
+
- test/test_unit_conversion.rb
|
168
324
|
homepage: https://github.com/seamusabshere/data_miner
|
169
325
|
licenses: []
|
170
326
|
post_install_message:
|
@@ -185,14 +341,22 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
185
341
|
version: '0'
|
186
342
|
requirements: []
|
187
343
|
rubyforge_project: data_miner
|
188
|
-
rubygems_version: 1.8.
|
344
|
+
rubygems_version: 1.8.24
|
189
345
|
signing_key:
|
190
346
|
specification_version: 3
|
191
347
|
summary: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import
|
192
348
|
XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.
|
193
349
|
test_files:
|
350
|
+
- test/data_miner/test_attribute.rb
|
351
|
+
- test/data_miner/unit_converter/test_alchemist.rb
|
352
|
+
- test/data_miner/unit_converter/test_conversions.rb
|
194
353
|
- test/helper.rb
|
354
|
+
- test/support/breed.rb
|
195
355
|
- test/support/breeds.xls
|
356
|
+
- test/support/data_miner_with_alchemist.rb
|
357
|
+
- test/support/data_miner_with_conversions.rb
|
358
|
+
- test/support/data_miner_without_unit_converter.rb
|
359
|
+
- test/support/pet.rb
|
196
360
|
- test/support/pet_color_dictionary.en.csv
|
197
361
|
- test/support/pet_color_dictionary.es.csv
|
198
362
|
- test/support/pets.csv
|
@@ -202,4 +366,5 @@ test_files:
|
|
202
366
|
- test/test_earth_import.rb
|
203
367
|
- test/test_earth_tap.rb
|
204
368
|
- test/test_safety.rb
|
369
|
+
- test/test_unit_conversion.rb
|
205
370
|
has_rdoc:
|