data_miner 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ 2.2.0 / 2012-06-11
2
+
3
+ * Breaking changes
4
+
5
+ * You must specify DataMiner.units_convert = {:alchemist,:conversions}
6
+
7
+ * Enhancements
8
+
9
+ * Swappable unit conversion libraries [@dkastner]
10
+ * Intelligent parsing of commas and periods in number fields [@ihough]
11
+
1
12
  2.1.2 / 2012-05-22
2
13
 
3
14
  * Breaking changes
data/Gemfile CHANGED
@@ -2,15 +2,8 @@ source :rubygems
2
2
 
3
3
  gemspec
4
4
 
5
- # development dependencies
6
- gem 'fuzzy_match'
7
- gem 'minitest'
8
- gem 'minitest-reporters'
9
- gem 'mysql2'
10
- gem 'rake'
11
- gem 'yard'
12
- gem 'earth'
13
- gem 'lock_method'
5
+ gem 'conversions'
6
+
14
7
  if RUBY_VERSION >= '1.9'
15
8
  gem 'unicode_utils'
16
9
  end
data/data_miner.gemspec CHANGED
@@ -17,11 +17,21 @@ Gem::Specification.new do |s|
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
18
  s.require_paths = ["lib"]
19
19
 
20
- s.add_runtime_dependency 'remote_table', '>=1.2.2'
20
+ s.add_runtime_dependency 'aasm'
21
+ s.add_runtime_dependency 'active_record_inline_schema'
21
22
  s.add_runtime_dependency 'activerecord', '>=2.3.4'
22
23
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
23
- s.add_runtime_dependency 'conversions', '>=1.4.4'
24
24
  s.add_runtime_dependency 'errata', '>=1.0.1'
25
- s.add_runtime_dependency 'active_record_inline_schema'
26
- s.add_runtime_dependency 'aasm'
25
+ s.add_runtime_dependency 'remote_table', '>=1.2.2'
26
+
27
+ s.add_development_dependency 'dkastner-alchemist'
28
+ s.add_development_dependency 'conversions'
29
+ s.add_development_dependency 'earth'
30
+ s.add_development_dependency 'fuzzy_match'
31
+ s.add_development_dependency 'lock_method'
32
+ s.add_development_dependency 'minitest'
33
+ s.add_development_dependency 'minitest-reporters'
34
+ s.add_development_dependency 'mysql2'
35
+ s.add_development_dependency 'rake'
36
+ s.add_development_dependency 'yard'
27
37
  end
data/lib/data_miner.rb CHANGED
@@ -23,6 +23,7 @@ require 'data_miner/step/import'
23
23
  require 'data_miner/step/tap'
24
24
  require 'data_miner/step/process'
25
25
  require 'data_miner/run'
26
+ require 'data_miner/unit_converter'
26
27
 
27
28
  # A singleton class that holds global configuration for data mining.
28
29
  #
@@ -45,6 +46,23 @@ class DataMiner
45
46
  def compress_whitespace(str)
46
47
  str.gsub(INNER_SPACE, ' ').strip
47
48
  end
49
+
50
+ # Set the unit converter.
51
+ #
52
+ # @note As of 2012-05-30, there are problems with the alchemist gem and the use of the conversions gem instead is recommended.
53
+ #
54
+ # @param [Symbol,nil] conversion_library Either +:alchemist+ or +:conversions+
55
+ #
56
+ # @return [nil]
57
+ def unit_converter=(conversion_library)
58
+ @unit_converter = UnitConverter.load conversion_library
59
+ nil
60
+ end
61
+
62
+ # @return [#convert,nil] The user-selected unit converter or nil.
63
+ def unit_converter
64
+ @unit_converter
65
+ end
48
66
  end
49
67
 
50
68
  INNER_SPACE = /[ ]+/
@@ -1,5 +1,3 @@
1
- require 'conversions'
2
-
3
1
  class DataMiner
4
2
  # A mapping between a local model column and a remote data source column.
5
3
  #
@@ -17,13 +15,23 @@ class DataMiner
17
15
  if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
18
16
  errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
19
17
  end
20
- if (units_options = options.select { |k, _| k.to_s.include?('units') }).any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
18
+ units_options = options.select { |k, _| k.to_s.include?('units') }
19
+ if units_options.any? and DataMiner.unit_converter.nil?
20
+ errors << %{You must set DataMiner.unit_converter to :alchemist or :conversions if you wish to convert units}
21
+ end
22
+ if units_options.any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
21
23
  errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
22
24
  end
23
25
  errors
24
26
  end
25
27
  end
26
-
28
+
29
+ def number_column?
30
+ return @number_column_query[0] if @number_column_query.is_a?(Array)
31
+ @number_column_query = [model.columns_hash[name.to_s].number?]
32
+ @number_column_query[0]
33
+ end
34
+
27
35
  VALID_OPTIONS = [
28
36
  :from_units,
29
37
  :to_units,
@@ -47,12 +55,12 @@ class DataMiner
47
55
  ]
48
56
 
49
57
  VALID_UNIT_DEFINITION_SETS = [
50
- [:units],
51
- [:from_units, :to_units],
52
- [:units_field_name],
53
- [:units_field_name, :to_units],
54
- [:units_field_number],
55
- [:units_field_number, :to_units],
58
+ [:units], # no conversion
59
+ [:from_units, :to_units], # yes
60
+ [:units_field_name], # no
61
+ [:units_field_name, :to_units], # yes
62
+ [:units_field_number], # no
63
+ [:units_field_number, :to_units], # yes
56
64
  ]
57
65
 
58
66
  DEFAULT_SPLIT_PATTERN = /\s+/
@@ -111,14 +119,15 @@ class DataMiner
111
119
  # @return [Hash]
112
120
  attr_reader :split
113
121
 
114
- # Final units. May invoke a conversion using https://github.com/seamusabshere/conversions
122
+ # Final units. May invoke a conversion using https://rubygems.org/gems/alchemist
115
123
  #
116
124
  # If a local column named +[name]_units+ exists, it will be populated with this value.
117
125
  #
118
126
  # @return [Symbol]
119
127
  attr_reader :to_units
120
128
 
121
- # Initial units. May invoke a conversion using https://github.com/seamusabshere/conversions
129
+ # Initial units. May invoke a conversion using a conversion gem like https://rubygems.org/gems/alchemist
130
+ # Be sure to set DataMiner.unit_converter
122
131
  # @return [Symbol]
123
132
  attr_reader :from_units
124
133
 
@@ -186,6 +195,8 @@ class DataMiner
186
195
  @overwrite = options.fetch :overwrite, DEFAULT_OVERWRITE
187
196
  @units_field_name = options[:units_field_name]
188
197
  @units_field_number = options[:units_field_number]
198
+ @convert_boolean = (@from_units.present? or (@to_units.present? and (@units_field_name.present? or @units_field_number.present?)))
199
+ @persist_units_boolean = (@to_units.present? or @units_field_name.present? or @units_field_number.present?)
189
200
  @dictionary_mutex = ::Mutex.new
190
201
  end
191
202
 
@@ -211,7 +222,7 @@ class DataMiner
211
222
  currently_nil = new_value.nil?
212
223
  end
213
224
 
214
- if not currently_nil and units? and (final_to_units = (to_units || read_units(remote_row)))
225
+ if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
215
226
  local_record.send "#{name}_units=", final_to_units
216
227
  end
217
228
  end
@@ -244,6 +255,21 @@ class DataMiner
244
255
  return value
245
256
  end
246
257
  value = value.to_s
258
+ if number_column?
259
+ period_position = value.rindex '.'
260
+ comma_position = value.rindex ','
261
+ # assume that ',' is a thousands separator and '.' is a decimal point unless we have evidence to the contrary
262
+ if period_position and comma_position and comma_position > period_position
263
+ # uncommon euro style 1.000,53
264
+ value = value.delete('.').gsub(',', '.')
265
+ elsif comma_position and comma_position > (value.length - 4)
266
+ # uncommon euro style 1000,53
267
+ value = value.gsub(',', '.')
268
+ elsif comma_position
269
+ # more common 1,000[.00] style - still don't want commas
270
+ value = value.delete(',')
271
+ end
272
+ end
247
273
  if chars
248
274
  value = value[chars]
249
275
  end
@@ -252,7 +278,7 @@ class DataMiner
252
278
  keep = split.fetch :keep, DEFAULT_SPLIT_KEEP
253
279
  value = value.to_s.split(pattern)[keep].to_s
254
280
  end
255
- if value.blank? and (not stringlike_column? or nullify_blank_strings)
281
+ if value.blank? and (not text_column? or nullify_blank_strings)
256
282
  return
257
283
  end
258
284
  value = DataMiner.compress_whitespace value
@@ -260,12 +286,7 @@ class DataMiner
260
286
  value = DataMiner.upcase value
261
287
  end
262
288
  if convert?
263
- final_from_units = from_units || read_units(row)
264
- final_to_units = to_units || read_units(row)
265
- if final_from_units.blank? or final_to_units.blank?
266
- raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
267
- end
268
- value = value.to_f.convert final_from_units, final_to_units
289
+ value = convert_units value, row
269
290
  end
270
291
  if sprintf
271
292
  if sprintf.end_with?('f')
@@ -281,6 +302,16 @@ class DataMiner
281
302
  value
282
303
  end
283
304
 
305
+ # @private
306
+ def convert_units(value, row)
307
+ final_from_units = from_units || read_units(row)
308
+ final_to_units = to_units || read_units(row)
309
+ unless final_from_units and final_to_units
310
+ raise RuntimeError, "[data_miner] Missing units: from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
311
+ end
312
+ DataMiner.unit_converter.convert value, final_from_units, final_to_units
313
+ end
314
+
284
315
  # @private
285
316
  def refresh
286
317
  @dictionary = nil
@@ -292,10 +323,10 @@ class DataMiner
292
323
  step.model
293
324
  end
294
325
 
295
- def stringlike_column?
296
- return @stringlike_column_query[0] if @stringlike_column_query.is_a?(::Array)
297
- @stringlike_column_query = [model.columns_hash[name.to_s].type == :string]
298
- @stringlike_column_query[0]
326
+ def text_column?
327
+ return @text_column_query[0] if @text_column_query.is_a?(Array)
328
+ @text_column_query = [model.columns_hash[name.to_s].text?]
329
+ @text_column_query[0]
299
330
  end
300
331
 
301
332
  def static?
@@ -307,11 +338,11 @@ class DataMiner
307
338
  end
308
339
 
309
340
  def convert?
310
- from_units.present? or units_field_name.present? or units_field_number.present?
341
+ @convert_boolean
311
342
  end
312
343
 
313
- def units?
314
- to_units.present? or units_field_name.present? or units_field_number.present?
344
+ def persist_units?
345
+ @persist_units_boolean
315
346
  end
316
347
 
317
348
  def read_units(row)
@@ -0,0 +1,12 @@
1
+ class DataMiner
2
+ class UnitConverter
3
+ class << self
4
+ def load(type)
5
+ if type
6
+ require "data_miner/unit_converter/#{type}"
7
+ const_get(type.to_s.camelcase).new
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ require 'alchemist'
2
+
3
+ class DataMiner
4
+ class UnitConverter
5
+ class Alchemist < UnitConverter
6
+ def convert(value, from, to)
7
+ value.to_f.send(from).to.send(to)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ require 'conversions'
2
+
3
+ class DataMiner
4
+ class UnitConverter
5
+ class Conversions < UnitConverter
6
+ def convert(value, from, to)
7
+ value.to_f.convert from, to
8
+ end
9
+ end
10
+ end
11
+ end
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.1.2'
2
+ VERSION = '2.2.0'
3
3
  end
@@ -0,0 +1,36 @@
1
+ require 'helper'
2
+
3
+ describe DataMiner::Attribute do
4
+ before do
5
+ DataMiner.unit_converter = :alchemist
6
+ end
7
+
8
+ describe '#convert?' do
9
+ it 'returns true if from_units is set' do
10
+ attribute = DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
11
+ assert attribute.send(:convert?)
12
+ end
13
+ it 'returns true if to_units and units_field_name are set' do
14
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar', :to_units => :kilograms
15
+ assert attribute.send(:convert?)
16
+ end
17
+ it 'returns true if to_units and units_field_number are set' do
18
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 3, :to_units => :kilograms
19
+ assert attribute.send(:convert?)
20
+ end
21
+ it 'returns false if units_field_name only is set' do
22
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar'
23
+ refute attribute.send(:convert?)
24
+ end
25
+ it 'returns false if units_field_number only is set' do
26
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 'bar'
27
+ refute attribute.send(:convert?)
28
+ end
29
+ it 'raises if no converter and units are used' do
30
+ DataMiner.unit_converter = nil
31
+ lambda {
32
+ DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
33
+ }.must_raise ArgumentError, /unit_converter/
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner::UnitConverter::Alchemist' do
4
+ before do
5
+ DataMiner.unit_converter = :alchemist
6
+ end
7
+
8
+ describe '#convert' do
9
+ it 'converts a value from one unit to another' do
10
+ DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner::UnitConverter::Conversions' do
4
+ before do
5
+ #DataMiner.unit_converter = :conversions
6
+ end
7
+
8
+ describe '#convert' do
9
+ it 'converts a value from one unit to another' do
10
+ # can't load both alchemist and conversions in same test run
11
+ # see test/test_unit_conversion for coverage of this adapter
12
+ #DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
13
+ end
14
+ end
15
+ end
data/test/helper.rb CHANGED
@@ -11,11 +11,6 @@ require 'minitest/reporters'
11
11
  MiniTest::Unit.runner = MiniTest::SuiteRunner.new
12
12
  MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
13
13
 
14
- cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
15
- $stderr.puts "Running `#{cmd}`..."
16
- system cmd
17
- $stderr.puts "Done."
18
-
19
14
  require 'active_record'
20
15
  require 'logger'
21
16
  ActiveRecord::Base.logger = Logger.new $stderr
@@ -31,68 +26,26 @@ ActiveRecord::Base.establish_connection(
31
26
  ActiveRecord::Base.mass_assignment_sanitizer = :strict
32
27
 
33
28
  require 'data_miner'
34
- DataMiner::Run.auto_upgrade!
35
- DataMiner::Run::ColumnStatistic.auto_upgrade!
36
- DataMiner::Run.clear_locks
37
29
 
38
- PETS = File.expand_path('../support/pets.csv', __FILE__)
39
- PETS_FUNNY = File.expand_path('../support/pets_funny.csv', __FILE__)
40
- COLOR_DICTIONARY_ENGLISH = File.expand_path('../support/pet_color_dictionary.en.csv', __FILE__)
41
- COLOR_DICTIONARY_SPANISH = File.expand_path('../support/pet_color_dictionary.es.csv', __FILE__)
42
- BREEDS = File.expand_path('../support/breeds.xls', __FILE__)
30
+ def init_database(unit_converter = :conversions)
31
+ cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
32
+ $stderr.puts "Running `#{cmd}`..."
33
+ system cmd
34
+ $stderr.puts "Done."
43
35
 
44
- class Pet < ActiveRecord::Base
45
- self.primary_key = "name"
46
- col :name
47
- col :breed_id
48
- col :color_id
49
- col :age, :type => :integer
50
- col :age_units
51
- col :weight, :type => :float
52
- col :weight_units
53
- col :height, :type => :integer
54
- col :height_units
55
- col :favorite_food
56
- col :command_phrase
57
- belongs_to :breed
58
- data_miner do
59
- process :auto_upgrade!
60
- process :run_data_miner_on_parent_associations!
61
- import("A list of pets", :url => "file://#{PETS}") do
62
- key :name
63
- store :age, :units_field_name => 'age_units'
64
- store :breed_id, :field_name => :breed, :nullify_blank_strings => true
65
- store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
66
- store :weight, :from_units => :pounds, :to_units => :kilograms
67
- store :favorite_food, :nullify_blank_strings => true
68
- store :command_phrase
69
- store :height, :units => :centimetres
70
- end
71
- end
72
- end
36
+ DataMiner::Run.auto_upgrade!
37
+ DataMiner::Run::ColumnStatistic.auto_upgrade!
38
+ DataMiner::Run.clear_locks
73
39
 
74
- class Breed < ActiveRecord::Base
75
- class << self
76
- def update_average_age!
77
- # make sure pet is populated
78
- Pet.run_data_miner!
79
- update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
80
- end
81
- end
82
- self.primary_key = "name"
83
- col :name
84
- col :average_age, :type => :float
85
- data_miner do
86
- process :auto_upgrade!
87
- import("A list of breeds", :url => "file://#{BREEDS}") do
88
- key :name, :field_name => 'Breed name'
89
- end
90
- process :update_average_age!
91
- end
40
+ DataMiner.unit_converter = unit_converter
92
41
  end
93
42
 
94
- ActiveRecord::Base.descendants.each do |model|
95
- model.attr_accessible nil
96
- end
43
+ def init_models
44
+ require 'support/breed'
45
+ require 'support/pet'
46
+ Pet.auto_upgrade!
97
47
 
98
- Pet.auto_upgrade!
48
+ ActiveRecord::Base.descendants.each do |model|
49
+ model.attr_accessible nil
50
+ end
51
+ end
@@ -0,0 +1,21 @@
1
+ BREEDS = File.expand_path('../breeds.xls', __FILE__)
2
+
3
+ class Breed < ActiveRecord::Base
4
+ class << self
5
+ def update_average_age!
6
+ # make sure pet is populated
7
+ Pet.run_data_miner!
8
+ update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
9
+ end
10
+ end
11
+ self.primary_key = "name"
12
+ col :name
13
+ col :average_age, :type => :float
14
+ data_miner do
15
+ process :auto_upgrade!
16
+ import("A list of breeds", :url => "file://#{BREEDS}") do
17
+ key :name, :field_name => 'Breed name'
18
+ end
19
+ process :update_average_age!
20
+ end
21
+ end
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner with Alchemist' do
4
+ before do
5
+ init_database(:alchemist)
6
+ init_models
7
+ Pet.run_data_miner!
8
+ end
9
+
10
+ it 'converts convertible units' do
11
+ Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to.kilograms.to_f
12
+ end
13
+ end
@@ -0,0 +1,16 @@
1
+ require 'helper'
2
+
3
+ require 'conversions'
4
+ Conversions.register :years, :years, 1
5
+
6
+ describe 'DataMiner with Conversions' do
7
+ before do
8
+ init_database(:conversions)
9
+ init_models
10
+ Pet.run_data_miner!
11
+ end
12
+
13
+ it 'converts convertible units' do
14
+ Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to(:kilograms)
15
+ end
16
+ end
@@ -0,0 +1,51 @@
1
+ require 'helper'
2
+
3
+ class MyPet < ActiveRecord::Base
4
+ PETS = File.expand_path('../pets.csv', __FILE__)
5
+ COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
6
+
7
+ self.primary_key = "name"
8
+ col :name
9
+ col :color_id
10
+ col :age, :type => :integer
11
+ col :age_units
12
+ col :weight, :type => :float
13
+ col :weight_units
14
+ col :height, :type => :integer
15
+ col :height_units
16
+ col :favorite_food
17
+ col :command_phrase
18
+
19
+ data_miner do
20
+ process :auto_upgrade!
21
+ import("A list of pets", :url => "file://#{PETS}") do
22
+ key :name
23
+ store :age
24
+ store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
25
+ store :weight
26
+ store :favorite_food, :nullify_blank_strings => true
27
+ store :command_phrase
28
+ store :height, :units => :centimetres
29
+ end
30
+ end
31
+ end
32
+
33
+ describe 'DataMiner with Conversions' do
34
+ it 'happens when DataMiner.unit_converter is nil' do
35
+ DataMiner.unit_converter.must_be_nil
36
+ end
37
+
38
+ it 'converts convertible units' do
39
+ init_database(nil)
40
+ MyPet.run_data_miner!
41
+ MyPet.find('Pierre').weight.must_equal 4.4
42
+ end
43
+
44
+ it 'raises an error if conversions are attempted' do
45
+ init_database(nil)
46
+ lambda do
47
+ init_models
48
+ Pet.run_data_miner!
49
+ end.must_raise DataMiner::Attribute::NoConverterSet
50
+ end
51
+ end
@@ -0,0 +1,34 @@
1
+ PETS = File.expand_path('../pets.csv', __FILE__)
2
+ PETS_FUNNY = File.expand_path('../pets_funny.csv', __FILE__)
3
+ COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
4
+ COLOR_DICTIONARY_SPANISH = File.expand_path('../pet_color_dictionary.es.csv', __FILE__)
5
+
6
+ class Pet < ActiveRecord::Base
7
+ self.primary_key = "name"
8
+ col :name
9
+ col :breed_id
10
+ col :color_id
11
+ col :age, :type => :integer
12
+ col :age_units
13
+ col :weight, :type => :float
14
+ col :weight_units
15
+ col :height, :type => :float
16
+ col :height_units
17
+ col :favorite_food
18
+ col :command_phrase
19
+ belongs_to :breed
20
+ data_miner do
21
+ process :auto_upgrade!
22
+ process :run_data_miner_on_parent_associations!
23
+ import("A list of pets", :url => "file://#{PETS}") do
24
+ key :name
25
+ store :age, :units_field_name => 'age_units'
26
+ store :breed_id, :field_name => :breed, :nullify_blank_strings => true
27
+ store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
28
+ store :weight, :from_units => :pounds, :to_units => :kilograms
29
+ store :favorite_food, :nullify_blank_strings => true
30
+ store :command_phrase
31
+ store :height, :units => :millimetres
32
+ end
33
+ end
34
+ end
@@ -1,6 +1,6 @@
1
1
  name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
2
- Pierre,Tabby,GO,4,years,4.4,30,tomato,"eh"
3
- Jerry,Beagle,BR/BL,5,years,10,30,cheese,"che"
4
- Amigo,Spanish Lizarto,GR/BU,17,years," ",3,crickets," "
5
- Johnny,Beagle,BR/BL,2,years,20,45," ",
2
+ Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
+ Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
+ Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
+ Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
6
  Nemo,,,,,,,,
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
 
4
6
  describe DataMiner do
5
7
  describe "when used to import example data about pets" do
@@ -65,15 +67,22 @@ describe DataMiner do
65
67
  Breed.run_data_miner!
66
68
  Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
67
69
  end
70
+ it "properly interprets numbers using comma or period separators" do
71
+ Pet.run_data_miner!
72
+ Pet.find('Pierre').height.must_equal 3000.5
73
+ Pet.find('Jerry').height.must_equal 3000.0
74
+ Pet.find('Amigo').height.must_equal 300.5
75
+ Pet.find('Johnny').height.must_equal 4000.0
76
+ end
68
77
  it "performs unit conversions" do
69
78
  Pet.run_data_miner!
70
- Pet.find('Pierre').weight.must_be_close_to(4.4.pounds.to(:kilograms), 0.00001)
79
+ Pet.find('Pierre').weight.must_be_close_to 1.9958 # 4.4 pounds in kilograms
71
80
  end
72
81
  it "sets units" do
73
82
  Pet.run_data_miner!
74
83
  Pet.find('Pierre').age_units.must_equal 'years'
75
84
  Pet.find('Pierre').weight_units.must_equal 'kilograms'
76
- Pet.find('Pierre').height_units.must_equal 'centimetres'
85
+ Pet.find('Pierre').height_units.must_equal 'millimetres'
77
86
  end
78
87
  it "always nullifies numeric columns when blank/nil is the input" do
79
88
  Pet.run_data_miner!
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
 
4
6
  describe DataMiner::Run::ColumnStatistic do
5
7
  describe "when advanced statistics are enabled" do
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
  require 'earth'
4
6
 
5
7
  # use earth, which has a plethora of real-world data_miner blocks
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
  require 'earth'
4
6
 
5
7
  # use earth, which has a plethora of real-world data_miner blocks
data/test/test_safety.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
  require 'earth'
4
6
 
5
7
  require 'lock_method'
@@ -0,0 +1,16 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner unit conversion' do
4
+ it "blows up if you don't specify a converter" do
5
+ output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_without_unit_converter.rb', __FILE__)}`
6
+ refute $?.success?, output
7
+ end
8
+ it 'can convert with alchemist' do
9
+ output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_alchemist.rb', __FILE__)}`
10
+ assert $?.success?, output
11
+ end
12
+ it 'can convert with conversions' do
13
+ output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_conversions.rb', __FILE__)}`
14
+ assert $?.success?, output
15
+ end
16
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 2.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,16 +11,16 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-05-22 00:00:00.000000000 Z
14
+ date: 2012-06-11 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: remote_table
17
+ name: aasm
18
18
  requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
22
22
  - !ruby/object:Gem::Version
23
- version: 1.2.2
23
+ version: '0'
24
24
  type: :runtime
25
25
  prerelease: false
26
26
  version_requirements: !ruby/object:Gem::Requirement
@@ -28,7 +28,23 @@ dependencies:
28
28
  requirements:
29
29
  - - ! '>='
30
30
  - !ruby/object:Gem::Version
31
- version: 1.2.2
31
+ version: '0'
32
+ - !ruby/object:Gem::Dependency
33
+ name: active_record_inline_schema
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
32
48
  - !ruby/object:Gem::Dependency
33
49
  name: activerecord
34
50
  requirement: !ruby/object:Gem::Requirement
@@ -62,13 +78,13 @@ dependencies:
62
78
  - !ruby/object:Gem::Version
63
79
  version: 2.3.4
64
80
  - !ruby/object:Gem::Dependency
65
- name: conversions
81
+ name: errata
66
82
  requirement: !ruby/object:Gem::Requirement
67
83
  none: false
68
84
  requirements:
69
85
  - - ! '>='
70
86
  - !ruby/object:Gem::Version
71
- version: 1.4.4
87
+ version: 1.0.1
72
88
  type: :runtime
73
89
  prerelease: false
74
90
  version_requirements: !ruby/object:Gem::Requirement
@@ -76,15 +92,15 @@ dependencies:
76
92
  requirements:
77
93
  - - ! '>='
78
94
  - !ruby/object:Gem::Version
79
- version: 1.4.4
95
+ version: 1.0.1
80
96
  - !ruby/object:Gem::Dependency
81
- name: errata
97
+ name: remote_table
82
98
  requirement: !ruby/object:Gem::Requirement
83
99
  none: false
84
100
  requirements:
85
101
  - - ! '>='
86
102
  - !ruby/object:Gem::Version
87
- version: 1.0.1
103
+ version: 1.2.2
88
104
  type: :runtime
89
105
  prerelease: false
90
106
  version_requirements: !ruby/object:Gem::Requirement
@@ -92,16 +108,16 @@ dependencies:
92
108
  requirements:
93
109
  - - ! '>='
94
110
  - !ruby/object:Gem::Version
95
- version: 1.0.1
111
+ version: 1.2.2
96
112
  - !ruby/object:Gem::Dependency
97
- name: active_record_inline_schema
113
+ name: dkastner-alchemist
98
114
  requirement: !ruby/object:Gem::Requirement
99
115
  none: false
100
116
  requirements:
101
117
  - - ! '>='
102
118
  - !ruby/object:Gem::Version
103
119
  version: '0'
104
- type: :runtime
120
+ type: :development
105
121
  prerelease: false
106
122
  version_requirements: !ruby/object:Gem::Requirement
107
123
  none: false
@@ -110,14 +126,142 @@ dependencies:
110
126
  - !ruby/object:Gem::Version
111
127
  version: '0'
112
128
  - !ruby/object:Gem::Dependency
113
- name: aasm
129
+ name: conversions
114
130
  requirement: !ruby/object:Gem::Requirement
115
131
  none: false
116
132
  requirements:
117
133
  - - ! '>='
118
134
  - !ruby/object:Gem::Version
119
135
  version: '0'
120
- type: :runtime
136
+ type: :development
137
+ prerelease: false
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ - !ruby/object:Gem::Dependency
145
+ name: earth
146
+ requirement: !ruby/object:Gem::Requirement
147
+ none: false
148
+ requirements:
149
+ - - ! '>='
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ none: false
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ - !ruby/object:Gem::Dependency
161
+ name: fuzzy_match
162
+ requirement: !ruby/object:Gem::Requirement
163
+ none: false
164
+ requirements:
165
+ - - ! '>='
166
+ - !ruby/object:Gem::Version
167
+ version: '0'
168
+ type: :development
169
+ prerelease: false
170
+ version_requirements: !ruby/object:Gem::Requirement
171
+ none: false
172
+ requirements:
173
+ - - ! '>='
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ - !ruby/object:Gem::Dependency
177
+ name: lock_method
178
+ requirement: !ruby/object:Gem::Requirement
179
+ none: false
180
+ requirements:
181
+ - - ! '>='
182
+ - !ruby/object:Gem::Version
183
+ version: '0'
184
+ type: :development
185
+ prerelease: false
186
+ version_requirements: !ruby/object:Gem::Requirement
187
+ none: false
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ - !ruby/object:Gem::Dependency
193
+ name: minitest
194
+ requirement: !ruby/object:Gem::Requirement
195
+ none: false
196
+ requirements:
197
+ - - ! '>='
198
+ - !ruby/object:Gem::Version
199
+ version: '0'
200
+ type: :development
201
+ prerelease: false
202
+ version_requirements: !ruby/object:Gem::Requirement
203
+ none: false
204
+ requirements:
205
+ - - ! '>='
206
+ - !ruby/object:Gem::Version
207
+ version: '0'
208
+ - !ruby/object:Gem::Dependency
209
+ name: minitest-reporters
210
+ requirement: !ruby/object:Gem::Requirement
211
+ none: false
212
+ requirements:
213
+ - - ! '>='
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ none: false
220
+ requirements:
221
+ - - ! '>='
222
+ - !ruby/object:Gem::Version
223
+ version: '0'
224
+ - !ruby/object:Gem::Dependency
225
+ name: mysql2
226
+ requirement: !ruby/object:Gem::Requirement
227
+ none: false
228
+ requirements:
229
+ - - ! '>='
230
+ - !ruby/object:Gem::Version
231
+ version: '0'
232
+ type: :development
233
+ prerelease: false
234
+ version_requirements: !ruby/object:Gem::Requirement
235
+ none: false
236
+ requirements:
237
+ - - ! '>='
238
+ - !ruby/object:Gem::Version
239
+ version: '0'
240
+ - !ruby/object:Gem::Dependency
241
+ name: rake
242
+ requirement: !ruby/object:Gem::Requirement
243
+ none: false
244
+ requirements:
245
+ - - ! '>='
246
+ - !ruby/object:Gem::Version
247
+ version: '0'
248
+ type: :development
249
+ prerelease: false
250
+ version_requirements: !ruby/object:Gem::Requirement
251
+ none: false
252
+ requirements:
253
+ - - ! '>='
254
+ - !ruby/object:Gem::Version
255
+ version: '0'
256
+ - !ruby/object:Gem::Dependency
257
+ name: yard
258
+ requirement: !ruby/object:Gem::Requirement
259
+ none: false
260
+ requirements:
261
+ - - ! '>='
262
+ - !ruby/object:Gem::Version
263
+ version: '0'
264
+ type: :development
121
265
  prerelease: false
122
266
  version_requirements: !ruby/object:Gem::Requirement
123
267
  none: false
@@ -153,9 +297,20 @@ files:
153
297
  - lib/data_miner/step/import.rb
154
298
  - lib/data_miner/step/process.rb
155
299
  - lib/data_miner/step/tap.rb
300
+ - lib/data_miner/unit_converter.rb
301
+ - lib/data_miner/unit_converter/alchemist.rb
302
+ - lib/data_miner/unit_converter/conversions.rb
156
303
  - lib/data_miner/version.rb
304
+ - test/data_miner/test_attribute.rb
305
+ - test/data_miner/unit_converter/test_alchemist.rb
306
+ - test/data_miner/unit_converter/test_conversions.rb
157
307
  - test/helper.rb
308
+ - test/support/breed.rb
158
309
  - test/support/breeds.xls
310
+ - test/support/data_miner_with_alchemist.rb
311
+ - test/support/data_miner_with_conversions.rb
312
+ - test/support/data_miner_without_unit_converter.rb
313
+ - test/support/pet.rb
159
314
  - test/support/pet_color_dictionary.en.csv
160
315
  - test/support/pet_color_dictionary.es.csv
161
316
  - test/support/pets.csv
@@ -165,6 +320,7 @@ files:
165
320
  - test/test_earth_import.rb
166
321
  - test/test_earth_tap.rb
167
322
  - test/test_safety.rb
323
+ - test/test_unit_conversion.rb
168
324
  homepage: https://github.com/seamusabshere/data_miner
169
325
  licenses: []
170
326
  post_install_message:
@@ -185,14 +341,22 @@ required_rubygems_version: !ruby/object:Gem::Requirement
185
341
  version: '0'
186
342
  requirements: []
187
343
  rubyforge_project: data_miner
188
- rubygems_version: 1.8.21
344
+ rubygems_version: 1.8.24
189
345
  signing_key:
190
346
  specification_version: 3
191
347
  summary: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import
192
348
  XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.
193
349
  test_files:
350
+ - test/data_miner/test_attribute.rb
351
+ - test/data_miner/unit_converter/test_alchemist.rb
352
+ - test/data_miner/unit_converter/test_conversions.rb
194
353
  - test/helper.rb
354
+ - test/support/breed.rb
195
355
  - test/support/breeds.xls
356
+ - test/support/data_miner_with_alchemist.rb
357
+ - test/support/data_miner_with_conversions.rb
358
+ - test/support/data_miner_without_unit_converter.rb
359
+ - test/support/pet.rb
196
360
  - test/support/pet_color_dictionary.en.csv
197
361
  - test/support/pet_color_dictionary.es.csv
198
362
  - test/support/pets.csv
@@ -202,4 +366,5 @@ test_files:
202
366
  - test/test_earth_import.rb
203
367
  - test/test_earth_tap.rb
204
368
  - test/test_safety.rb
369
+ - test/test_unit_conversion.rb
205
370
  has_rdoc: