data_miner 2.1.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +11 -0
- data/Gemfile +2 -9
- data/data_miner.gemspec +14 -4
- data/lib/data_miner.rb +18 -0
- data/lib/data_miner/attribute.rb +58 -27
- data/lib/data_miner/unit_converter.rb +12 -0
- data/lib/data_miner/unit_converter/alchemist.rb +11 -0
- data/lib/data_miner/unit_converter/conversions.rb +11 -0
- data/lib/data_miner/version.rb +1 -1
- data/test/data_miner/test_attribute.rb +36 -0
- data/test/data_miner/unit_converter/test_alchemist.rb +13 -0
- data/test/data_miner/unit_converter/test_conversions.rb +15 -0
- data/test/helper.rb +17 -64
- data/test/support/breed.rb +21 -0
- data/test/support/data_miner_with_alchemist.rb +13 -0
- data/test/support/data_miner_with_conversions.rb +16 -0
- data/test/support/data_miner_without_unit_converter.rb +51 -0
- data/test/support/pet.rb +34 -0
- data/test/support/pets.csv +4 -4
- data/test/test_data_miner.rb +11 -2
- data/test/test_data_miner_run_column_statistic.rb +2 -0
- data/test/test_earth_import.rb +2 -0
- data/test/test_earth_tap.rb +2 -0
- data/test/test_safety.rb +2 -0
- data/test/test_unit_conversion.rb +16 -0
- metadata +181 -16
data/CHANGELOG
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
2.2.0 / 2012-06-11
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* You must specify DataMiner.units_convert = {:alchemist,:conversions}
|
6
|
+
|
7
|
+
* Enhancements
|
8
|
+
|
9
|
+
* Swappable unit conversion libraries [@dkastner]
|
10
|
+
* Intelligent parsing of commas and periods in number fields [@ihough]
|
11
|
+
|
1
12
|
2.1.2 / 2012-05-22
|
2
13
|
|
3
14
|
* Breaking changes
|
data/Gemfile
CHANGED
@@ -2,15 +2,8 @@ source :rubygems
|
|
2
2
|
|
3
3
|
gemspec
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
gem 'minitest'
|
8
|
-
gem 'minitest-reporters'
|
9
|
-
gem 'mysql2'
|
10
|
-
gem 'rake'
|
11
|
-
gem 'yard'
|
12
|
-
gem 'earth'
|
13
|
-
gem 'lock_method'
|
5
|
+
gem 'conversions'
|
6
|
+
|
14
7
|
if RUBY_VERSION >= '1.9'
|
15
8
|
gem 'unicode_utils'
|
16
9
|
end
|
data/data_miner.gemspec
CHANGED
@@ -17,11 +17,21 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
18
|
s.require_paths = ["lib"]
|
19
19
|
|
20
|
-
s.add_runtime_dependency '
|
20
|
+
s.add_runtime_dependency 'aasm'
|
21
|
+
s.add_runtime_dependency 'active_record_inline_schema'
|
21
22
|
s.add_runtime_dependency 'activerecord', '>=2.3.4'
|
22
23
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
23
|
-
s.add_runtime_dependency 'conversions', '>=1.4.4'
|
24
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
25
|
-
s.add_runtime_dependency '
|
26
|
-
|
25
|
+
s.add_runtime_dependency 'remote_table', '>=1.2.2'
|
26
|
+
|
27
|
+
s.add_development_dependency 'dkastner-alchemist'
|
28
|
+
s.add_development_dependency 'conversions'
|
29
|
+
s.add_development_dependency 'earth'
|
30
|
+
s.add_development_dependency 'fuzzy_match'
|
31
|
+
s.add_development_dependency 'lock_method'
|
32
|
+
s.add_development_dependency 'minitest'
|
33
|
+
s.add_development_dependency 'minitest-reporters'
|
34
|
+
s.add_development_dependency 'mysql2'
|
35
|
+
s.add_development_dependency 'rake'
|
36
|
+
s.add_development_dependency 'yard'
|
27
37
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -23,6 +23,7 @@ require 'data_miner/step/import'
|
|
23
23
|
require 'data_miner/step/tap'
|
24
24
|
require 'data_miner/step/process'
|
25
25
|
require 'data_miner/run'
|
26
|
+
require 'data_miner/unit_converter'
|
26
27
|
|
27
28
|
# A singleton class that holds global configuration for data mining.
|
28
29
|
#
|
@@ -45,6 +46,23 @@ class DataMiner
|
|
45
46
|
def compress_whitespace(str)
|
46
47
|
str.gsub(INNER_SPACE, ' ').strip
|
47
48
|
end
|
49
|
+
|
50
|
+
# Set the unit converter.
|
51
|
+
#
|
52
|
+
# @note As of 2012-05-30, there are problems with the alchemist gem and the use of the conversions gem instead is recommended.
|
53
|
+
#
|
54
|
+
# @param [Symbol,nil] conversion_library Either +:alchemist+ or +:conversions+
|
55
|
+
#
|
56
|
+
# @return [nil]
|
57
|
+
def unit_converter=(conversion_library)
|
58
|
+
@unit_converter = UnitConverter.load conversion_library
|
59
|
+
nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# @return [#convert,nil] The user-selected unit converter or nil.
|
63
|
+
def unit_converter
|
64
|
+
@unit_converter
|
65
|
+
end
|
48
66
|
end
|
49
67
|
|
50
68
|
INNER_SPACE = /[ ]+/
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'conversions'
|
2
|
-
|
3
1
|
class DataMiner
|
4
2
|
# A mapping between a local model column and a remote data source column.
|
5
3
|
#
|
@@ -17,13 +15,23 @@ class DataMiner
|
|
17
15
|
if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
|
18
16
|
errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
|
19
17
|
end
|
20
|
-
|
18
|
+
units_options = options.select { |k, _| k.to_s.include?('units') }
|
19
|
+
if units_options.any? and DataMiner.unit_converter.nil?
|
20
|
+
errors << %{You must set DataMiner.unit_converter to :alchemist or :conversions if you wish to convert units}
|
21
|
+
end
|
22
|
+
if units_options.any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
|
21
23
|
errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
|
22
24
|
end
|
23
25
|
errors
|
24
26
|
end
|
25
27
|
end
|
26
|
-
|
28
|
+
|
29
|
+
def number_column?
|
30
|
+
return @number_column_query[0] if @number_column_query.is_a?(Array)
|
31
|
+
@number_column_query = [model.columns_hash[name.to_s].number?]
|
32
|
+
@number_column_query[0]
|
33
|
+
end
|
34
|
+
|
27
35
|
VALID_OPTIONS = [
|
28
36
|
:from_units,
|
29
37
|
:to_units,
|
@@ -47,12 +55,12 @@ class DataMiner
|
|
47
55
|
]
|
48
56
|
|
49
57
|
VALID_UNIT_DEFINITION_SETS = [
|
50
|
-
[:units],
|
51
|
-
[:from_units, :to_units],
|
52
|
-
[:units_field_name],
|
53
|
-
[:units_field_name, :to_units],
|
54
|
-
[:units_field_number],
|
55
|
-
[:units_field_number, :to_units],
|
58
|
+
[:units], # no conversion
|
59
|
+
[:from_units, :to_units], # yes
|
60
|
+
[:units_field_name], # no
|
61
|
+
[:units_field_name, :to_units], # yes
|
62
|
+
[:units_field_number], # no
|
63
|
+
[:units_field_number, :to_units], # yes
|
56
64
|
]
|
57
65
|
|
58
66
|
DEFAULT_SPLIT_PATTERN = /\s+/
|
@@ -111,14 +119,15 @@ class DataMiner
|
|
111
119
|
# @return [Hash]
|
112
120
|
attr_reader :split
|
113
121
|
|
114
|
-
# Final units. May invoke a conversion using https://
|
122
|
+
# Final units. May invoke a conversion using https://rubygems.org/gems/alchemist
|
115
123
|
#
|
116
124
|
# If a local column named +[name]_units+ exists, it will be populated with this value.
|
117
125
|
#
|
118
126
|
# @return [Symbol]
|
119
127
|
attr_reader :to_units
|
120
128
|
|
121
|
-
# Initial units. May invoke a conversion using https://
|
129
|
+
# Initial units. May invoke a conversion using a conversion gem like https://rubygems.org/gems/alchemist
|
130
|
+
# Be sure to set DataMiner.unit_converter
|
122
131
|
# @return [Symbol]
|
123
132
|
attr_reader :from_units
|
124
133
|
|
@@ -186,6 +195,8 @@ class DataMiner
|
|
186
195
|
@overwrite = options.fetch :overwrite, DEFAULT_OVERWRITE
|
187
196
|
@units_field_name = options[:units_field_name]
|
188
197
|
@units_field_number = options[:units_field_number]
|
198
|
+
@convert_boolean = (@from_units.present? or (@to_units.present? and (@units_field_name.present? or @units_field_number.present?)))
|
199
|
+
@persist_units_boolean = (@to_units.present? or @units_field_name.present? or @units_field_number.present?)
|
189
200
|
@dictionary_mutex = ::Mutex.new
|
190
201
|
end
|
191
202
|
|
@@ -211,7 +222,7 @@ class DataMiner
|
|
211
222
|
currently_nil = new_value.nil?
|
212
223
|
end
|
213
224
|
|
214
|
-
if not currently_nil and
|
225
|
+
if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
|
215
226
|
local_record.send "#{name}_units=", final_to_units
|
216
227
|
end
|
217
228
|
end
|
@@ -244,6 +255,21 @@ class DataMiner
|
|
244
255
|
return value
|
245
256
|
end
|
246
257
|
value = value.to_s
|
258
|
+
if number_column?
|
259
|
+
period_position = value.rindex '.'
|
260
|
+
comma_position = value.rindex ','
|
261
|
+
# assume that ',' is a thousands separator and '.' is a decimal point unless we have evidence to the contrary
|
262
|
+
if period_position and comma_position and comma_position > period_position
|
263
|
+
# uncommon euro style 1.000,53
|
264
|
+
value = value.delete('.').gsub(',', '.')
|
265
|
+
elsif comma_position and comma_position > (value.length - 4)
|
266
|
+
# uncommon euro style 1000,53
|
267
|
+
value = value.gsub(',', '.')
|
268
|
+
elsif comma_position
|
269
|
+
# more common 1,000[.00] style - still don't want commas
|
270
|
+
value = value.delete(',')
|
271
|
+
end
|
272
|
+
end
|
247
273
|
if chars
|
248
274
|
value = value[chars]
|
249
275
|
end
|
@@ -252,7 +278,7 @@ class DataMiner
|
|
252
278
|
keep = split.fetch :keep, DEFAULT_SPLIT_KEEP
|
253
279
|
value = value.to_s.split(pattern)[keep].to_s
|
254
280
|
end
|
255
|
-
if value.blank? and (not
|
281
|
+
if value.blank? and (not text_column? or nullify_blank_strings)
|
256
282
|
return
|
257
283
|
end
|
258
284
|
value = DataMiner.compress_whitespace value
|
@@ -260,12 +286,7 @@ class DataMiner
|
|
260
286
|
value = DataMiner.upcase value
|
261
287
|
end
|
262
288
|
if convert?
|
263
|
-
|
264
|
-
final_to_units = to_units || read_units(row)
|
265
|
-
if final_from_units.blank? or final_to_units.blank?
|
266
|
-
raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
267
|
-
end
|
268
|
-
value = value.to_f.convert final_from_units, final_to_units
|
289
|
+
value = convert_units value, row
|
269
290
|
end
|
270
291
|
if sprintf
|
271
292
|
if sprintf.end_with?('f')
|
@@ -281,6 +302,16 @@ class DataMiner
|
|
281
302
|
value
|
282
303
|
end
|
283
304
|
|
305
|
+
# @private
|
306
|
+
def convert_units(value, row)
|
307
|
+
final_from_units = from_units || read_units(row)
|
308
|
+
final_to_units = to_units || read_units(row)
|
309
|
+
unless final_from_units and final_to_units
|
310
|
+
raise RuntimeError, "[data_miner] Missing units: from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
311
|
+
end
|
312
|
+
DataMiner.unit_converter.convert value, final_from_units, final_to_units
|
313
|
+
end
|
314
|
+
|
284
315
|
# @private
|
285
316
|
def refresh
|
286
317
|
@dictionary = nil
|
@@ -292,10 +323,10 @@ class DataMiner
|
|
292
323
|
step.model
|
293
324
|
end
|
294
325
|
|
295
|
-
def
|
296
|
-
return @
|
297
|
-
@
|
298
|
-
@
|
326
|
+
def text_column?
|
327
|
+
return @text_column_query[0] if @text_column_query.is_a?(Array)
|
328
|
+
@text_column_query = [model.columns_hash[name.to_s].text?]
|
329
|
+
@text_column_query[0]
|
299
330
|
end
|
300
331
|
|
301
332
|
def static?
|
@@ -307,11 +338,11 @@ class DataMiner
|
|
307
338
|
end
|
308
339
|
|
309
340
|
def convert?
|
310
|
-
|
341
|
+
@convert_boolean
|
311
342
|
end
|
312
343
|
|
313
|
-
def
|
314
|
-
|
344
|
+
def persist_units?
|
345
|
+
@persist_units_boolean
|
315
346
|
end
|
316
347
|
|
317
348
|
def read_units(row)
|
data/lib/data_miner/version.rb
CHANGED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe DataMiner::Attribute do
|
4
|
+
before do
|
5
|
+
DataMiner.unit_converter = :alchemist
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#convert?' do
|
9
|
+
it 'returns true if from_units is set' do
|
10
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
|
11
|
+
assert attribute.send(:convert?)
|
12
|
+
end
|
13
|
+
it 'returns true if to_units and units_field_name are set' do
|
14
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar', :to_units => :kilograms
|
15
|
+
assert attribute.send(:convert?)
|
16
|
+
end
|
17
|
+
it 'returns true if to_units and units_field_number are set' do
|
18
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 3, :to_units => :kilograms
|
19
|
+
assert attribute.send(:convert?)
|
20
|
+
end
|
21
|
+
it 'returns false if units_field_name only is set' do
|
22
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar'
|
23
|
+
refute attribute.send(:convert?)
|
24
|
+
end
|
25
|
+
it 'returns false if units_field_number only is set' do
|
26
|
+
attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 'bar'
|
27
|
+
refute attribute.send(:convert?)
|
28
|
+
end
|
29
|
+
it 'raises if no converter and units are used' do
|
30
|
+
DataMiner.unit_converter = nil
|
31
|
+
lambda {
|
32
|
+
DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
|
33
|
+
}.must_raise ArgumentError, /unit_converter/
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner::UnitConverter::Alchemist' do
|
4
|
+
before do
|
5
|
+
DataMiner.unit_converter = :alchemist
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#convert' do
|
9
|
+
it 'converts a value from one unit to another' do
|
10
|
+
DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner::UnitConverter::Conversions' do
|
4
|
+
before do
|
5
|
+
#DataMiner.unit_converter = :conversions
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#convert' do
|
9
|
+
it 'converts a value from one unit to another' do
|
10
|
+
# can't load both alchemist and conversions in same test run
|
11
|
+
# see test/test_unit_conversion for coverage of this adapter
|
12
|
+
#DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/test/helper.rb
CHANGED
@@ -11,11 +11,6 @@ require 'minitest/reporters'
|
|
11
11
|
MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
12
12
|
MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
13
13
|
|
14
|
-
cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
|
15
|
-
$stderr.puts "Running `#{cmd}`..."
|
16
|
-
system cmd
|
17
|
-
$stderr.puts "Done."
|
18
|
-
|
19
14
|
require 'active_record'
|
20
15
|
require 'logger'
|
21
16
|
ActiveRecord::Base.logger = Logger.new $stderr
|
@@ -31,68 +26,26 @@ ActiveRecord::Base.establish_connection(
|
|
31
26
|
ActiveRecord::Base.mass_assignment_sanitizer = :strict
|
32
27
|
|
33
28
|
require 'data_miner'
|
34
|
-
DataMiner::Run.auto_upgrade!
|
35
|
-
DataMiner::Run::ColumnStatistic.auto_upgrade!
|
36
|
-
DataMiner::Run.clear_locks
|
37
29
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
30
|
+
def init_database(unit_converter = :conversions)
|
31
|
+
cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
|
32
|
+
$stderr.puts "Running `#{cmd}`..."
|
33
|
+
system cmd
|
34
|
+
$stderr.puts "Done."
|
43
35
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
col :breed_id
|
48
|
-
col :color_id
|
49
|
-
col :age, :type => :integer
|
50
|
-
col :age_units
|
51
|
-
col :weight, :type => :float
|
52
|
-
col :weight_units
|
53
|
-
col :height, :type => :integer
|
54
|
-
col :height_units
|
55
|
-
col :favorite_food
|
56
|
-
col :command_phrase
|
57
|
-
belongs_to :breed
|
58
|
-
data_miner do
|
59
|
-
process :auto_upgrade!
|
60
|
-
process :run_data_miner_on_parent_associations!
|
61
|
-
import("A list of pets", :url => "file://#{PETS}") do
|
62
|
-
key :name
|
63
|
-
store :age, :units_field_name => 'age_units'
|
64
|
-
store :breed_id, :field_name => :breed, :nullify_blank_strings => true
|
65
|
-
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
66
|
-
store :weight, :from_units => :pounds, :to_units => :kilograms
|
67
|
-
store :favorite_food, :nullify_blank_strings => true
|
68
|
-
store :command_phrase
|
69
|
-
store :height, :units => :centimetres
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
36
|
+
DataMiner::Run.auto_upgrade!
|
37
|
+
DataMiner::Run::ColumnStatistic.auto_upgrade!
|
38
|
+
DataMiner::Run.clear_locks
|
73
39
|
|
74
|
-
|
75
|
-
class << self
|
76
|
-
def update_average_age!
|
77
|
-
# make sure pet is populated
|
78
|
-
Pet.run_data_miner!
|
79
|
-
update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
|
80
|
-
end
|
81
|
-
end
|
82
|
-
self.primary_key = "name"
|
83
|
-
col :name
|
84
|
-
col :average_age, :type => :float
|
85
|
-
data_miner do
|
86
|
-
process :auto_upgrade!
|
87
|
-
import("A list of breeds", :url => "file://#{BREEDS}") do
|
88
|
-
key :name, :field_name => 'Breed name'
|
89
|
-
end
|
90
|
-
process :update_average_age!
|
91
|
-
end
|
40
|
+
DataMiner.unit_converter = unit_converter
|
92
41
|
end
|
93
42
|
|
94
|
-
|
95
|
-
|
96
|
-
|
43
|
+
def init_models
|
44
|
+
require 'support/breed'
|
45
|
+
require 'support/pet'
|
46
|
+
Pet.auto_upgrade!
|
97
47
|
|
98
|
-
|
48
|
+
ActiveRecord::Base.descendants.each do |model|
|
49
|
+
model.attr_accessible nil
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
BREEDS = File.expand_path('../breeds.xls', __FILE__)
|
2
|
+
|
3
|
+
class Breed < ActiveRecord::Base
|
4
|
+
class << self
|
5
|
+
def update_average_age!
|
6
|
+
# make sure pet is populated
|
7
|
+
Pet.run_data_miner!
|
8
|
+
update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
|
9
|
+
end
|
10
|
+
end
|
11
|
+
self.primary_key = "name"
|
12
|
+
col :name
|
13
|
+
col :average_age, :type => :float
|
14
|
+
data_miner do
|
15
|
+
process :auto_upgrade!
|
16
|
+
import("A list of breeds", :url => "file://#{BREEDS}") do
|
17
|
+
key :name, :field_name => 'Breed name'
|
18
|
+
end
|
19
|
+
process :update_average_age!
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner with Alchemist' do
|
4
|
+
before do
|
5
|
+
init_database(:alchemist)
|
6
|
+
init_models
|
7
|
+
Pet.run_data_miner!
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'converts convertible units' do
|
11
|
+
Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to.kilograms.to_f
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
require 'conversions'
|
4
|
+
Conversions.register :years, :years, 1
|
5
|
+
|
6
|
+
describe 'DataMiner with Conversions' do
|
7
|
+
before do
|
8
|
+
init_database(:conversions)
|
9
|
+
init_models
|
10
|
+
Pet.run_data_miner!
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'converts convertible units' do
|
14
|
+
Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to(:kilograms)
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class MyPet < ActiveRecord::Base
|
4
|
+
PETS = File.expand_path('../pets.csv', __FILE__)
|
5
|
+
COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
|
6
|
+
|
7
|
+
self.primary_key = "name"
|
8
|
+
col :name
|
9
|
+
col :color_id
|
10
|
+
col :age, :type => :integer
|
11
|
+
col :age_units
|
12
|
+
col :weight, :type => :float
|
13
|
+
col :weight_units
|
14
|
+
col :height, :type => :integer
|
15
|
+
col :height_units
|
16
|
+
col :favorite_food
|
17
|
+
col :command_phrase
|
18
|
+
|
19
|
+
data_miner do
|
20
|
+
process :auto_upgrade!
|
21
|
+
import("A list of pets", :url => "file://#{PETS}") do
|
22
|
+
key :name
|
23
|
+
store :age
|
24
|
+
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
25
|
+
store :weight
|
26
|
+
store :favorite_food, :nullify_blank_strings => true
|
27
|
+
store :command_phrase
|
28
|
+
store :height, :units => :centimetres
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'DataMiner with Conversions' do
|
34
|
+
it 'happens when DataMiner.unit_converter is nil' do
|
35
|
+
DataMiner.unit_converter.must_be_nil
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'converts convertible units' do
|
39
|
+
init_database(nil)
|
40
|
+
MyPet.run_data_miner!
|
41
|
+
MyPet.find('Pierre').weight.must_equal 4.4
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'raises an error if conversions are attempted' do
|
45
|
+
init_database(nil)
|
46
|
+
lambda do
|
47
|
+
init_models
|
48
|
+
Pet.run_data_miner!
|
49
|
+
end.must_raise DataMiner::Attribute::NoConverterSet
|
50
|
+
end
|
51
|
+
end
|
data/test/support/pet.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PETS = File.expand_path('../pets.csv', __FILE__)
|
2
|
+
PETS_FUNNY = File.expand_path('../pets_funny.csv', __FILE__)
|
3
|
+
COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
|
4
|
+
COLOR_DICTIONARY_SPANISH = File.expand_path('../pet_color_dictionary.es.csv', __FILE__)
|
5
|
+
|
6
|
+
class Pet < ActiveRecord::Base
|
7
|
+
self.primary_key = "name"
|
8
|
+
col :name
|
9
|
+
col :breed_id
|
10
|
+
col :color_id
|
11
|
+
col :age, :type => :integer
|
12
|
+
col :age_units
|
13
|
+
col :weight, :type => :float
|
14
|
+
col :weight_units
|
15
|
+
col :height, :type => :float
|
16
|
+
col :height_units
|
17
|
+
col :favorite_food
|
18
|
+
col :command_phrase
|
19
|
+
belongs_to :breed
|
20
|
+
data_miner do
|
21
|
+
process :auto_upgrade!
|
22
|
+
process :run_data_miner_on_parent_associations!
|
23
|
+
import("A list of pets", :url => "file://#{PETS}") do
|
24
|
+
key :name
|
25
|
+
store :age, :units_field_name => 'age_units'
|
26
|
+
store :breed_id, :field_name => :breed, :nullify_blank_strings => true
|
27
|
+
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
28
|
+
store :weight, :from_units => :pounds, :to_units => :kilograms
|
29
|
+
store :favorite_food, :nullify_blank_strings => true
|
30
|
+
store :command_phrase
|
31
|
+
store :height, :units => :millimetres
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/test/support/pets.csv
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
-
Pierre,Tabby,GO,4,years,4.4,
|
3
|
-
Jerry,Beagle,BR/BL,5,years,10,
|
4
|
-
Amigo,Spanish Lizarto,GR/BU,17,years," ",
|
5
|
-
Johnny,Beagle,BR/BL,2,years,20,
|
2
|
+
Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
|
3
|
+
Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
|
4
|
+
Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
|
5
|
+
Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
|
6
6
|
Nemo,,,,,,,,
|
data/test/test_data_miner.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
require 'helper'
|
3
|
+
init_database
|
4
|
+
init_models
|
3
5
|
|
4
6
|
describe DataMiner do
|
5
7
|
describe "when used to import example data about pets" do
|
@@ -65,15 +67,22 @@ describe DataMiner do
|
|
65
67
|
Breed.run_data_miner!
|
66
68
|
Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
|
67
69
|
end
|
70
|
+
it "properly interprets numbers using comma or period separators" do
|
71
|
+
Pet.run_data_miner!
|
72
|
+
Pet.find('Pierre').height.must_equal 3000.5
|
73
|
+
Pet.find('Jerry').height.must_equal 3000.0
|
74
|
+
Pet.find('Amigo').height.must_equal 300.5
|
75
|
+
Pet.find('Johnny').height.must_equal 4000.0
|
76
|
+
end
|
68
77
|
it "performs unit conversions" do
|
69
78
|
Pet.run_data_miner!
|
70
|
-
Pet.find('Pierre').weight.must_be_close_to
|
79
|
+
Pet.find('Pierre').weight.must_be_close_to 1.9958 # 4.4 pounds in kilograms
|
71
80
|
end
|
72
81
|
it "sets units" do
|
73
82
|
Pet.run_data_miner!
|
74
83
|
Pet.find('Pierre').age_units.must_equal 'years'
|
75
84
|
Pet.find('Pierre').weight_units.must_equal 'kilograms'
|
76
|
-
Pet.find('Pierre').height_units.must_equal '
|
85
|
+
Pet.find('Pierre').height_units.must_equal 'millimetres'
|
77
86
|
end
|
78
87
|
it "always nullifies numeric columns when blank/nil is the input" do
|
79
88
|
Pet.run_data_miner!
|
data/test/test_earth_import.rb
CHANGED
data/test/test_earth_tap.rb
CHANGED
data/test/test_safety.rb
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
describe 'DataMiner unit conversion' do
|
4
|
+
it "blows up if you don't specify a converter" do
|
5
|
+
output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_without_unit_converter.rb', __FILE__)}`
|
6
|
+
refute $?.success?, output
|
7
|
+
end
|
8
|
+
it 'can convert with alchemist' do
|
9
|
+
output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_alchemist.rb', __FILE__)}`
|
10
|
+
assert $?.success?, output
|
11
|
+
end
|
12
|
+
it 'can convert with conversions' do
|
13
|
+
output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_conversions.rb', __FILE__)}`
|
14
|
+
assert $?.success?, output
|
15
|
+
end
|
16
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,16 +11,16 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-06-11 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
|
-
name:
|
17
|
+
name: aasm
|
18
18
|
requirement: !ruby/object:Gem::Requirement
|
19
19
|
none: false
|
20
20
|
requirements:
|
21
21
|
- - ! '>='
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: '0'
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -28,7 +28,23 @@ dependencies:
|
|
28
28
|
requirements:
|
29
29
|
- - ! '>='
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
31
|
+
version: '0'
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: active_record_inline_schema
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
32
48
|
- !ruby/object:Gem::Dependency
|
33
49
|
name: activerecord
|
34
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,13 +78,13 @@ dependencies:
|
|
62
78
|
- !ruby/object:Gem::Version
|
63
79
|
version: 2.3.4
|
64
80
|
- !ruby/object:Gem::Dependency
|
65
|
-
name:
|
81
|
+
name: errata
|
66
82
|
requirement: !ruby/object:Gem::Requirement
|
67
83
|
none: false
|
68
84
|
requirements:
|
69
85
|
- - ! '>='
|
70
86
|
- !ruby/object:Gem::Version
|
71
|
-
version: 1.
|
87
|
+
version: 1.0.1
|
72
88
|
type: :runtime
|
73
89
|
prerelease: false
|
74
90
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -76,15 +92,15 @@ dependencies:
|
|
76
92
|
requirements:
|
77
93
|
- - ! '>='
|
78
94
|
- !ruby/object:Gem::Version
|
79
|
-
version: 1.
|
95
|
+
version: 1.0.1
|
80
96
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
97
|
+
name: remote_table
|
82
98
|
requirement: !ruby/object:Gem::Requirement
|
83
99
|
none: false
|
84
100
|
requirements:
|
85
101
|
- - ! '>='
|
86
102
|
- !ruby/object:Gem::Version
|
87
|
-
version: 1.
|
103
|
+
version: 1.2.2
|
88
104
|
type: :runtime
|
89
105
|
prerelease: false
|
90
106
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -92,16 +108,16 @@ dependencies:
|
|
92
108
|
requirements:
|
93
109
|
- - ! '>='
|
94
110
|
- !ruby/object:Gem::Version
|
95
|
-
version: 1.
|
111
|
+
version: 1.2.2
|
96
112
|
- !ruby/object:Gem::Dependency
|
97
|
-
name:
|
113
|
+
name: dkastner-alchemist
|
98
114
|
requirement: !ruby/object:Gem::Requirement
|
99
115
|
none: false
|
100
116
|
requirements:
|
101
117
|
- - ! '>='
|
102
118
|
- !ruby/object:Gem::Version
|
103
119
|
version: '0'
|
104
|
-
type: :
|
120
|
+
type: :development
|
105
121
|
prerelease: false
|
106
122
|
version_requirements: !ruby/object:Gem::Requirement
|
107
123
|
none: false
|
@@ -110,14 +126,142 @@ dependencies:
|
|
110
126
|
- !ruby/object:Gem::Version
|
111
127
|
version: '0'
|
112
128
|
- !ruby/object:Gem::Dependency
|
113
|
-
name:
|
129
|
+
name: conversions
|
114
130
|
requirement: !ruby/object:Gem::Requirement
|
115
131
|
none: false
|
116
132
|
requirements:
|
117
133
|
- - ! '>='
|
118
134
|
- !ruby/object:Gem::Version
|
119
135
|
version: '0'
|
120
|
-
type: :
|
136
|
+
type: :development
|
137
|
+
prerelease: false
|
138
|
+
version_requirements: !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ! '>='
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: '0'
|
144
|
+
- !ruby/object:Gem::Dependency
|
145
|
+
name: earth
|
146
|
+
requirement: !ruby/object:Gem::Requirement
|
147
|
+
none: false
|
148
|
+
requirements:
|
149
|
+
- - ! '>='
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '0'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
none: false
|
156
|
+
requirements:
|
157
|
+
- - ! '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
- !ruby/object:Gem::Dependency
|
161
|
+
name: fuzzy_match
|
162
|
+
requirement: !ruby/object:Gem::Requirement
|
163
|
+
none: false
|
164
|
+
requirements:
|
165
|
+
- - ! '>='
|
166
|
+
- !ruby/object:Gem::Version
|
167
|
+
version: '0'
|
168
|
+
type: :development
|
169
|
+
prerelease: false
|
170
|
+
version_requirements: !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
- !ruby/object:Gem::Dependency
|
177
|
+
name: lock_method
|
178
|
+
requirement: !ruby/object:Gem::Requirement
|
179
|
+
none: false
|
180
|
+
requirements:
|
181
|
+
- - ! '>='
|
182
|
+
- !ruby/object:Gem::Version
|
183
|
+
version: '0'
|
184
|
+
type: :development
|
185
|
+
prerelease: false
|
186
|
+
version_requirements: !ruby/object:Gem::Requirement
|
187
|
+
none: false
|
188
|
+
requirements:
|
189
|
+
- - ! '>='
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
version: '0'
|
192
|
+
- !ruby/object:Gem::Dependency
|
193
|
+
name: minitest
|
194
|
+
requirement: !ruby/object:Gem::Requirement
|
195
|
+
none: false
|
196
|
+
requirements:
|
197
|
+
- - ! '>='
|
198
|
+
- !ruby/object:Gem::Version
|
199
|
+
version: '0'
|
200
|
+
type: :development
|
201
|
+
prerelease: false
|
202
|
+
version_requirements: !ruby/object:Gem::Requirement
|
203
|
+
none: false
|
204
|
+
requirements:
|
205
|
+
- - ! '>='
|
206
|
+
- !ruby/object:Gem::Version
|
207
|
+
version: '0'
|
208
|
+
- !ruby/object:Gem::Dependency
|
209
|
+
name: minitest-reporters
|
210
|
+
requirement: !ruby/object:Gem::Requirement
|
211
|
+
none: false
|
212
|
+
requirements:
|
213
|
+
- - ! '>='
|
214
|
+
- !ruby/object:Gem::Version
|
215
|
+
version: '0'
|
216
|
+
type: :development
|
217
|
+
prerelease: false
|
218
|
+
version_requirements: !ruby/object:Gem::Requirement
|
219
|
+
none: false
|
220
|
+
requirements:
|
221
|
+
- - ! '>='
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: '0'
|
224
|
+
- !ruby/object:Gem::Dependency
|
225
|
+
name: mysql2
|
226
|
+
requirement: !ruby/object:Gem::Requirement
|
227
|
+
none: false
|
228
|
+
requirements:
|
229
|
+
- - ! '>='
|
230
|
+
- !ruby/object:Gem::Version
|
231
|
+
version: '0'
|
232
|
+
type: :development
|
233
|
+
prerelease: false
|
234
|
+
version_requirements: !ruby/object:Gem::Requirement
|
235
|
+
none: false
|
236
|
+
requirements:
|
237
|
+
- - ! '>='
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: '0'
|
240
|
+
- !ruby/object:Gem::Dependency
|
241
|
+
name: rake
|
242
|
+
requirement: !ruby/object:Gem::Requirement
|
243
|
+
none: false
|
244
|
+
requirements:
|
245
|
+
- - ! '>='
|
246
|
+
- !ruby/object:Gem::Version
|
247
|
+
version: '0'
|
248
|
+
type: :development
|
249
|
+
prerelease: false
|
250
|
+
version_requirements: !ruby/object:Gem::Requirement
|
251
|
+
none: false
|
252
|
+
requirements:
|
253
|
+
- - ! '>='
|
254
|
+
- !ruby/object:Gem::Version
|
255
|
+
version: '0'
|
256
|
+
- !ruby/object:Gem::Dependency
|
257
|
+
name: yard
|
258
|
+
requirement: !ruby/object:Gem::Requirement
|
259
|
+
none: false
|
260
|
+
requirements:
|
261
|
+
- - ! '>='
|
262
|
+
- !ruby/object:Gem::Version
|
263
|
+
version: '0'
|
264
|
+
type: :development
|
121
265
|
prerelease: false
|
122
266
|
version_requirements: !ruby/object:Gem::Requirement
|
123
267
|
none: false
|
@@ -153,9 +297,20 @@ files:
|
|
153
297
|
- lib/data_miner/step/import.rb
|
154
298
|
- lib/data_miner/step/process.rb
|
155
299
|
- lib/data_miner/step/tap.rb
|
300
|
+
- lib/data_miner/unit_converter.rb
|
301
|
+
- lib/data_miner/unit_converter/alchemist.rb
|
302
|
+
- lib/data_miner/unit_converter/conversions.rb
|
156
303
|
- lib/data_miner/version.rb
|
304
|
+
- test/data_miner/test_attribute.rb
|
305
|
+
- test/data_miner/unit_converter/test_alchemist.rb
|
306
|
+
- test/data_miner/unit_converter/test_conversions.rb
|
157
307
|
- test/helper.rb
|
308
|
+
- test/support/breed.rb
|
158
309
|
- test/support/breeds.xls
|
310
|
+
- test/support/data_miner_with_alchemist.rb
|
311
|
+
- test/support/data_miner_with_conversions.rb
|
312
|
+
- test/support/data_miner_without_unit_converter.rb
|
313
|
+
- test/support/pet.rb
|
159
314
|
- test/support/pet_color_dictionary.en.csv
|
160
315
|
- test/support/pet_color_dictionary.es.csv
|
161
316
|
- test/support/pets.csv
|
@@ -165,6 +320,7 @@ files:
|
|
165
320
|
- test/test_earth_import.rb
|
166
321
|
- test/test_earth_tap.rb
|
167
322
|
- test/test_safety.rb
|
323
|
+
- test/test_unit_conversion.rb
|
168
324
|
homepage: https://github.com/seamusabshere/data_miner
|
169
325
|
licenses: []
|
170
326
|
post_install_message:
|
@@ -185,14 +341,22 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
185
341
|
version: '0'
|
186
342
|
requirements: []
|
187
343
|
rubyforge_project: data_miner
|
188
|
-
rubygems_version: 1.8.
|
344
|
+
rubygems_version: 1.8.24
|
189
345
|
signing_key:
|
190
346
|
specification_version: 3
|
191
347
|
summary: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import
|
192
348
|
XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.
|
193
349
|
test_files:
|
350
|
+
- test/data_miner/test_attribute.rb
|
351
|
+
- test/data_miner/unit_converter/test_alchemist.rb
|
352
|
+
- test/data_miner/unit_converter/test_conversions.rb
|
194
353
|
- test/helper.rb
|
354
|
+
- test/support/breed.rb
|
195
355
|
- test/support/breeds.xls
|
356
|
+
- test/support/data_miner_with_alchemist.rb
|
357
|
+
- test/support/data_miner_with_conversions.rb
|
358
|
+
- test/support/data_miner_without_unit_converter.rb
|
359
|
+
- test/support/pet.rb
|
196
360
|
- test/support/pet_color_dictionary.en.csv
|
197
361
|
- test/support/pet_color_dictionary.es.csv
|
198
362
|
- test/support/pets.csv
|
@@ -202,4 +366,5 @@ test_files:
|
|
202
366
|
- test/test_earth_import.rb
|
203
367
|
- test/test_earth_tap.rb
|
204
368
|
- test/test_safety.rb
|
369
|
+
- test/test_unit_conversion.rb
|
205
370
|
has_rdoc:
|