data_miner 2.1.2 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,14 @@
1
+ 2.2.0 / 2012-06-11
2
+
3
+ * Breaking changes
4
+
5
+ * You must specify DataMiner.units_convert = {:alchemist,:conversions}
6
+
7
+ * Enhancements
8
+
9
+ * Swappable unit conversion libraries [@dkastner]
10
+ * Intelligent parsing of commas and periods in number fields [@ihough]
11
+
1
12
  2.1.2 / 2012-05-22
2
13
 
3
14
  * Breaking changes
data/Gemfile CHANGED
@@ -2,15 +2,8 @@ source :rubygems
2
2
 
3
3
  gemspec
4
4
 
5
- # development dependencies
6
- gem 'fuzzy_match'
7
- gem 'minitest'
8
- gem 'minitest-reporters'
9
- gem 'mysql2'
10
- gem 'rake'
11
- gem 'yard'
12
- gem 'earth'
13
- gem 'lock_method'
5
+ gem 'conversions'
6
+
14
7
  if RUBY_VERSION >= '1.9'
15
8
  gem 'unicode_utils'
16
9
  end
data/data_miner.gemspec CHANGED
@@ -17,11 +17,21 @@ Gem::Specification.new do |s|
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
18
  s.require_paths = ["lib"]
19
19
 
20
- s.add_runtime_dependency 'remote_table', '>=1.2.2'
20
+ s.add_runtime_dependency 'aasm'
21
+ s.add_runtime_dependency 'active_record_inline_schema'
21
22
  s.add_runtime_dependency 'activerecord', '>=2.3.4'
22
23
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
23
- s.add_runtime_dependency 'conversions', '>=1.4.4'
24
24
  s.add_runtime_dependency 'errata', '>=1.0.1'
25
- s.add_runtime_dependency 'active_record_inline_schema'
26
- s.add_runtime_dependency 'aasm'
25
+ s.add_runtime_dependency 'remote_table', '>=1.2.2'
26
+
27
+ s.add_development_dependency 'dkastner-alchemist'
28
+ s.add_development_dependency 'conversions'
29
+ s.add_development_dependency 'earth'
30
+ s.add_development_dependency 'fuzzy_match'
31
+ s.add_development_dependency 'lock_method'
32
+ s.add_development_dependency 'minitest'
33
+ s.add_development_dependency 'minitest-reporters'
34
+ s.add_development_dependency 'mysql2'
35
+ s.add_development_dependency 'rake'
36
+ s.add_development_dependency 'yard'
27
37
  end
data/lib/data_miner.rb CHANGED
@@ -23,6 +23,7 @@ require 'data_miner/step/import'
23
23
  require 'data_miner/step/tap'
24
24
  require 'data_miner/step/process'
25
25
  require 'data_miner/run'
26
+ require 'data_miner/unit_converter'
26
27
 
27
28
  # A singleton class that holds global configuration for data mining.
28
29
  #
@@ -45,6 +46,23 @@ class DataMiner
45
46
  def compress_whitespace(str)
46
47
  str.gsub(INNER_SPACE, ' ').strip
47
48
  end
49
+
50
+ # Set the unit converter.
51
+ #
52
+ # @note As of 2012-05-30, there are problems with the alchemist gem and the use of the conversions gem instead is recommended.
53
+ #
54
+ # @param [Symbol,nil] conversion_library Either +:alchemist+ or +:conversions+
55
+ #
56
+ # @return [nil]
57
+ def unit_converter=(conversion_library)
58
+ @unit_converter = UnitConverter.load conversion_library
59
+ nil
60
+ end
61
+
62
+ # @return [#convert,nil] The user-selected unit converter or nil.
63
+ def unit_converter
64
+ @unit_converter
65
+ end
48
66
  end
49
67
 
50
68
  INNER_SPACE = /[ ]+/
@@ -1,5 +1,3 @@
1
- require 'conversions'
2
-
3
1
  class DataMiner
4
2
  # A mapping between a local model column and a remote data source column.
5
3
  #
@@ -17,13 +15,23 @@ class DataMiner
17
15
  if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
18
16
  errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
19
17
  end
20
- if (units_options = options.select { |k, _| k.to_s.include?('units') }).any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
18
+ units_options = options.select { |k, _| k.to_s.include?('units') }
19
+ if units_options.any? and DataMiner.unit_converter.nil?
20
+ errors << %{You must set DataMiner.unit_converter to :alchemist or :conversions if you wish to convert units}
21
+ end
22
+ if units_options.any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
21
23
  errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
22
24
  end
23
25
  errors
24
26
  end
25
27
  end
26
-
28
+
29
+ def number_column?
30
+ return @number_column_query[0] if @number_column_query.is_a?(Array)
31
+ @number_column_query = [model.columns_hash[name.to_s].number?]
32
+ @number_column_query[0]
33
+ end
34
+
27
35
  VALID_OPTIONS = [
28
36
  :from_units,
29
37
  :to_units,
@@ -47,12 +55,12 @@ class DataMiner
47
55
  ]
48
56
 
49
57
  VALID_UNIT_DEFINITION_SETS = [
50
- [:units],
51
- [:from_units, :to_units],
52
- [:units_field_name],
53
- [:units_field_name, :to_units],
54
- [:units_field_number],
55
- [:units_field_number, :to_units],
58
+ [:units], # no conversion
59
+ [:from_units, :to_units], # yes
60
+ [:units_field_name], # no
61
+ [:units_field_name, :to_units], # yes
62
+ [:units_field_number], # no
63
+ [:units_field_number, :to_units], # yes
56
64
  ]
57
65
 
58
66
  DEFAULT_SPLIT_PATTERN = /\s+/
@@ -111,14 +119,15 @@ class DataMiner
111
119
  # @return [Hash]
112
120
  attr_reader :split
113
121
 
114
- # Final units. May invoke a conversion using https://github.com/seamusabshere/conversions
122
+ # Final units. May invoke a conversion using https://rubygems.org/gems/alchemist
115
123
  #
116
124
  # If a local column named +[name]_units+ exists, it will be populated with this value.
117
125
  #
118
126
  # @return [Symbol]
119
127
  attr_reader :to_units
120
128
 
121
- # Initial units. May invoke a conversion using https://github.com/seamusabshere/conversions
129
+ # Initial units. May invoke a conversion using a conversion gem like https://rubygems.org/gems/alchemist
130
+ # Be sure to set DataMiner.unit_converter
122
131
  # @return [Symbol]
123
132
  attr_reader :from_units
124
133
 
@@ -186,6 +195,8 @@ class DataMiner
186
195
  @overwrite = options.fetch :overwrite, DEFAULT_OVERWRITE
187
196
  @units_field_name = options[:units_field_name]
188
197
  @units_field_number = options[:units_field_number]
198
+ @convert_boolean = (@from_units.present? or (@to_units.present? and (@units_field_name.present? or @units_field_number.present?)))
199
+ @persist_units_boolean = (@to_units.present? or @units_field_name.present? or @units_field_number.present?)
189
200
  @dictionary_mutex = ::Mutex.new
190
201
  end
191
202
 
@@ -211,7 +222,7 @@ class DataMiner
211
222
  currently_nil = new_value.nil?
212
223
  end
213
224
 
214
- if not currently_nil and units? and (final_to_units = (to_units || read_units(remote_row)))
225
+ if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
215
226
  local_record.send "#{name}_units=", final_to_units
216
227
  end
217
228
  end
@@ -244,6 +255,21 @@ class DataMiner
244
255
  return value
245
256
  end
246
257
  value = value.to_s
258
+ if number_column?
259
+ period_position = value.rindex '.'
260
+ comma_position = value.rindex ','
261
+ # assume that ',' is a thousands separator and '.' is a decimal point unless we have evidence to the contrary
262
+ if period_position and comma_position and comma_position > period_position
263
+ # uncommon euro style 1.000,53
264
+ value = value.delete('.').gsub(',', '.')
265
+ elsif comma_position and comma_position > (value.length - 4)
266
+ # uncommon euro style 1000,53
267
+ value = value.gsub(',', '.')
268
+ elsif comma_position
269
+ # more common 1,000[.00] style - still don't want commas
270
+ value = value.delete(',')
271
+ end
272
+ end
247
273
  if chars
248
274
  value = value[chars]
249
275
  end
@@ -252,7 +278,7 @@ class DataMiner
252
278
  keep = split.fetch :keep, DEFAULT_SPLIT_KEEP
253
279
  value = value.to_s.split(pattern)[keep].to_s
254
280
  end
255
- if value.blank? and (not stringlike_column? or nullify_blank_strings)
281
+ if value.blank? and (not text_column? or nullify_blank_strings)
256
282
  return
257
283
  end
258
284
  value = DataMiner.compress_whitespace value
@@ -260,12 +286,7 @@ class DataMiner
260
286
  value = DataMiner.upcase value
261
287
  end
262
288
  if convert?
263
- final_from_units = from_units || read_units(row)
264
- final_to_units = to_units || read_units(row)
265
- if final_from_units.blank? or final_to_units.blank?
266
- raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
267
- end
268
- value = value.to_f.convert final_from_units, final_to_units
289
+ value = convert_units value, row
269
290
  end
270
291
  if sprintf
271
292
  if sprintf.end_with?('f')
@@ -281,6 +302,16 @@ class DataMiner
281
302
  value
282
303
  end
283
304
 
305
+ # @private
306
+ def convert_units(value, row)
307
+ final_from_units = from_units || read_units(row)
308
+ final_to_units = to_units || read_units(row)
309
+ unless final_from_units and final_to_units
310
+ raise RuntimeError, "[data_miner] Missing units: from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
311
+ end
312
+ DataMiner.unit_converter.convert value, final_from_units, final_to_units
313
+ end
314
+
284
315
  # @private
285
316
  def refresh
286
317
  @dictionary = nil
@@ -292,10 +323,10 @@ class DataMiner
292
323
  step.model
293
324
  end
294
325
 
295
- def stringlike_column?
296
- return @stringlike_column_query[0] if @stringlike_column_query.is_a?(::Array)
297
- @stringlike_column_query = [model.columns_hash[name.to_s].type == :string]
298
- @stringlike_column_query[0]
326
+ def text_column?
327
+ return @text_column_query[0] if @text_column_query.is_a?(Array)
328
+ @text_column_query = [model.columns_hash[name.to_s].text?]
329
+ @text_column_query[0]
299
330
  end
300
331
 
301
332
  def static?
@@ -307,11 +338,11 @@ class DataMiner
307
338
  end
308
339
 
309
340
  def convert?
310
- from_units.present? or units_field_name.present? or units_field_number.present?
341
+ @convert_boolean
311
342
  end
312
343
 
313
- def units?
314
- to_units.present? or units_field_name.present? or units_field_number.present?
344
+ def persist_units?
345
+ @persist_units_boolean
315
346
  end
316
347
 
317
348
  def read_units(row)
@@ -0,0 +1,12 @@
1
+ class DataMiner
2
+ class UnitConverter
3
+ class << self
4
+ def load(type)
5
+ if type
6
+ require "data_miner/unit_converter/#{type}"
7
+ const_get(type.to_s.camelcase).new
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ require 'alchemist'
2
+
3
+ class DataMiner
4
+ class UnitConverter
5
+ class Alchemist < UnitConverter
6
+ def convert(value, from, to)
7
+ value.to_f.send(from).to.send(to)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ require 'conversions'
2
+
3
+ class DataMiner
4
+ class UnitConverter
5
+ class Conversions < UnitConverter
6
+ def convert(value, from, to)
7
+ value.to_f.convert from, to
8
+ end
9
+ end
10
+ end
11
+ end
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.1.2'
2
+ VERSION = '2.2.0'
3
3
  end
@@ -0,0 +1,36 @@
1
+ require 'helper'
2
+
3
+ describe DataMiner::Attribute do
4
+ before do
5
+ DataMiner.unit_converter = :alchemist
6
+ end
7
+
8
+ describe '#convert?' do
9
+ it 'returns true if from_units is set' do
10
+ attribute = DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
11
+ assert attribute.send(:convert?)
12
+ end
13
+ it 'returns true if to_units and units_field_name are set' do
14
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar', :to_units => :kilograms
15
+ assert attribute.send(:convert?)
16
+ end
17
+ it 'returns true if to_units and units_field_number are set' do
18
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 3, :to_units => :kilograms
19
+ assert attribute.send(:convert?)
20
+ end
21
+ it 'returns false if units_field_name only is set' do
22
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar'
23
+ refute attribute.send(:convert?)
24
+ end
25
+ it 'returns false if units_field_number only is set' do
26
+ attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 'bar'
27
+ refute attribute.send(:convert?)
28
+ end
29
+ it 'raises if no converter and units are used' do
30
+ DataMiner.unit_converter = nil
31
+ lambda {
32
+ DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
33
+ }.must_raise ArgumentError, /unit_converter/
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner::UnitConverter::Alchemist' do
4
+ before do
5
+ DataMiner.unit_converter = :alchemist
6
+ end
7
+
8
+ describe '#convert' do
9
+ it 'converts a value from one unit to another' do
10
+ DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner::UnitConverter::Conversions' do
4
+ before do
5
+ #DataMiner.unit_converter = :conversions
6
+ end
7
+
8
+ describe '#convert' do
9
+ it 'converts a value from one unit to another' do
10
+ # can't load both alchemist and conversions in same test run
11
+ # see test/test_unit_conversion for coverage of this adapter
12
+ #DataMiner.unit_converter.convert 3.5, :kilograms, :pounds
13
+ end
14
+ end
15
+ end
data/test/helper.rb CHANGED
@@ -11,11 +11,6 @@ require 'minitest/reporters'
11
11
  MiniTest::Unit.runner = MiniTest::SuiteRunner.new
12
12
  MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
13
13
 
14
- cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
15
- $stderr.puts "Running `#{cmd}`..."
16
- system cmd
17
- $stderr.puts "Done."
18
-
19
14
  require 'active_record'
20
15
  require 'logger'
21
16
  ActiveRecord::Base.logger = Logger.new $stderr
@@ -31,68 +26,26 @@ ActiveRecord::Base.establish_connection(
31
26
  ActiveRecord::Base.mass_assignment_sanitizer = :strict
32
27
 
33
28
  require 'data_miner'
34
- DataMiner::Run.auto_upgrade!
35
- DataMiner::Run::ColumnStatistic.auto_upgrade!
36
- DataMiner::Run.clear_locks
37
29
 
38
- PETS = File.expand_path('../support/pets.csv', __FILE__)
39
- PETS_FUNNY = File.expand_path('../support/pets_funny.csv', __FILE__)
40
- COLOR_DICTIONARY_ENGLISH = File.expand_path('../support/pet_color_dictionary.en.csv', __FILE__)
41
- COLOR_DICTIONARY_SPANISH = File.expand_path('../support/pet_color_dictionary.es.csv', __FILE__)
42
- BREEDS = File.expand_path('../support/breeds.xls', __FILE__)
30
+ def init_database(unit_converter = :conversions)
31
+ cmd = %{mysql -u root -ppassword -e "DROP DATABASE data_miner_test; CREATE DATABASE data_miner_test CHARSET utf8"}
32
+ $stderr.puts "Running `#{cmd}`..."
33
+ system cmd
34
+ $stderr.puts "Done."
43
35
 
44
- class Pet < ActiveRecord::Base
45
- self.primary_key = "name"
46
- col :name
47
- col :breed_id
48
- col :color_id
49
- col :age, :type => :integer
50
- col :age_units
51
- col :weight, :type => :float
52
- col :weight_units
53
- col :height, :type => :integer
54
- col :height_units
55
- col :favorite_food
56
- col :command_phrase
57
- belongs_to :breed
58
- data_miner do
59
- process :auto_upgrade!
60
- process :run_data_miner_on_parent_associations!
61
- import("A list of pets", :url => "file://#{PETS}") do
62
- key :name
63
- store :age, :units_field_name => 'age_units'
64
- store :breed_id, :field_name => :breed, :nullify_blank_strings => true
65
- store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
66
- store :weight, :from_units => :pounds, :to_units => :kilograms
67
- store :favorite_food, :nullify_blank_strings => true
68
- store :command_phrase
69
- store :height, :units => :centimetres
70
- end
71
- end
72
- end
36
+ DataMiner::Run.auto_upgrade!
37
+ DataMiner::Run::ColumnStatistic.auto_upgrade!
38
+ DataMiner::Run.clear_locks
73
39
 
74
- class Breed < ActiveRecord::Base
75
- class << self
76
- def update_average_age!
77
- # make sure pet is populated
78
- Pet.run_data_miner!
79
- update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
80
- end
81
- end
82
- self.primary_key = "name"
83
- col :name
84
- col :average_age, :type => :float
85
- data_miner do
86
- process :auto_upgrade!
87
- import("A list of breeds", :url => "file://#{BREEDS}") do
88
- key :name, :field_name => 'Breed name'
89
- end
90
- process :update_average_age!
91
- end
40
+ DataMiner.unit_converter = unit_converter
92
41
  end
93
42
 
94
- ActiveRecord::Base.descendants.each do |model|
95
- model.attr_accessible nil
96
- end
43
+ def init_models
44
+ require 'support/breed'
45
+ require 'support/pet'
46
+ Pet.auto_upgrade!
97
47
 
98
- Pet.auto_upgrade!
48
+ ActiveRecord::Base.descendants.each do |model|
49
+ model.attr_accessible nil
50
+ end
51
+ end
@@ -0,0 +1,21 @@
1
+ BREEDS = File.expand_path('../breeds.xls', __FILE__)
2
+
3
+ class Breed < ActiveRecord::Base
4
+ class << self
5
+ def update_average_age!
6
+ # make sure pet is populated
7
+ Pet.run_data_miner!
8
+ update_all %{breeds.average_age = (SELECT AVG(pets.age) FROM pets WHERE pets.breed_id = breeds.name)}
9
+ end
10
+ end
11
+ self.primary_key = "name"
12
+ col :name
13
+ col :average_age, :type => :float
14
+ data_miner do
15
+ process :auto_upgrade!
16
+ import("A list of breeds", :url => "file://#{BREEDS}") do
17
+ key :name, :field_name => 'Breed name'
18
+ end
19
+ process :update_average_age!
20
+ end
21
+ end
@@ -0,0 +1,13 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner with Alchemist' do
4
+ before do
5
+ init_database(:alchemist)
6
+ init_models
7
+ Pet.run_data_miner!
8
+ end
9
+
10
+ it 'converts convertible units' do
11
+ Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to.kilograms.to_f
12
+ end
13
+ end
@@ -0,0 +1,16 @@
1
+ require 'helper'
2
+
3
+ require 'conversions'
4
+ Conversions.register :years, :years, 1
5
+
6
+ describe 'DataMiner with Conversions' do
7
+ before do
8
+ init_database(:conversions)
9
+ init_models
10
+ Pet.run_data_miner!
11
+ end
12
+
13
+ it 'converts convertible units' do
14
+ Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to(:kilograms)
15
+ end
16
+ end
@@ -0,0 +1,51 @@
1
+ require 'helper'
2
+
3
+ class MyPet < ActiveRecord::Base
4
+ PETS = File.expand_path('../pets.csv', __FILE__)
5
+ COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
6
+
7
+ self.primary_key = "name"
8
+ col :name
9
+ col :color_id
10
+ col :age, :type => :integer
11
+ col :age_units
12
+ col :weight, :type => :float
13
+ col :weight_units
14
+ col :height, :type => :integer
15
+ col :height_units
16
+ col :favorite_food
17
+ col :command_phrase
18
+
19
+ data_miner do
20
+ process :auto_upgrade!
21
+ import("A list of pets", :url => "file://#{PETS}") do
22
+ key :name
23
+ store :age
24
+ store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
25
+ store :weight
26
+ store :favorite_food, :nullify_blank_strings => true
27
+ store :command_phrase
28
+ store :height, :units => :centimetres
29
+ end
30
+ end
31
+ end
32
+
33
+ describe 'DataMiner with Conversions' do
34
+ it 'happens when DataMiner.unit_converter is nil' do
35
+ DataMiner.unit_converter.must_be_nil
36
+ end
37
+
38
+ it 'converts convertible units' do
39
+ init_database(nil)
40
+ MyPet.run_data_miner!
41
+ MyPet.find('Pierre').weight.must_equal 4.4
42
+ end
43
+
44
+ it 'raises an error if conversions are attempted' do
45
+ init_database(nil)
46
+ lambda do
47
+ init_models
48
+ Pet.run_data_miner!
49
+ end.must_raise DataMiner::Attribute::NoConverterSet
50
+ end
51
+ end
@@ -0,0 +1,34 @@
1
+ PETS = File.expand_path('../pets.csv', __FILE__)
2
+ PETS_FUNNY = File.expand_path('../pets_funny.csv', __FILE__)
3
+ COLOR_DICTIONARY_ENGLISH = File.expand_path('../pet_color_dictionary.en.csv', __FILE__)
4
+ COLOR_DICTIONARY_SPANISH = File.expand_path('../pet_color_dictionary.es.csv', __FILE__)
5
+
6
+ class Pet < ActiveRecord::Base
7
+ self.primary_key = "name"
8
+ col :name
9
+ col :breed_id
10
+ col :color_id
11
+ col :age, :type => :integer
12
+ col :age_units
13
+ col :weight, :type => :float
14
+ col :weight_units
15
+ col :height, :type => :float
16
+ col :height_units
17
+ col :favorite_food
18
+ col :command_phrase
19
+ belongs_to :breed
20
+ data_miner do
21
+ process :auto_upgrade!
22
+ process :run_data_miner_on_parent_associations!
23
+ import("A list of pets", :url => "file://#{PETS}") do
24
+ key :name
25
+ store :age, :units_field_name => 'age_units'
26
+ store :breed_id, :field_name => :breed, :nullify_blank_strings => true
27
+ store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
28
+ store :weight, :from_units => :pounds, :to_units => :kilograms
29
+ store :favorite_food, :nullify_blank_strings => true
30
+ store :command_phrase
31
+ store :height, :units => :millimetres
32
+ end
33
+ end
34
+ end
@@ -1,6 +1,6 @@
1
1
  name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
2
- Pierre,Tabby,GO,4,years,4.4,30,tomato,"eh"
3
- Jerry,Beagle,BR/BL,5,years,10,30,cheese,"che"
4
- Amigo,Spanish Lizarto,GR/BU,17,years," ",3,crickets," "
5
- Johnny,Beagle,BR/BL,2,years,20,45," ",
2
+ Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
+ Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
+ Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
+ Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
6
  Nemo,,,,,,,,
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
 
4
6
  describe DataMiner do
5
7
  describe "when used to import example data about pets" do
@@ -65,15 +67,22 @@ describe DataMiner do
65
67
  Breed.run_data_miner!
66
68
  Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
67
69
  end
70
+ it "properly interprets numbers using comma or period separators" do
71
+ Pet.run_data_miner!
72
+ Pet.find('Pierre').height.must_equal 3000.5
73
+ Pet.find('Jerry').height.must_equal 3000.0
74
+ Pet.find('Amigo').height.must_equal 300.5
75
+ Pet.find('Johnny').height.must_equal 4000.0
76
+ end
68
77
  it "performs unit conversions" do
69
78
  Pet.run_data_miner!
70
- Pet.find('Pierre').weight.must_be_close_to(4.4.pounds.to(:kilograms), 0.00001)
79
+ Pet.find('Pierre').weight.must_be_close_to 1.9958 # 4.4 pounds in kilograms
71
80
  end
72
81
  it "sets units" do
73
82
  Pet.run_data_miner!
74
83
  Pet.find('Pierre').age_units.must_equal 'years'
75
84
  Pet.find('Pierre').weight_units.must_equal 'kilograms'
76
- Pet.find('Pierre').height_units.must_equal 'centimetres'
85
+ Pet.find('Pierre').height_units.must_equal 'millimetres'
77
86
  end
78
87
  it "always nullifies numeric columns when blank/nil is the input" do
79
88
  Pet.run_data_miner!
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
 
4
6
  describe DataMiner::Run::ColumnStatistic do
5
7
  describe "when advanced statistics are enabled" do
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
  require 'earth'
4
6
 
5
7
  # use earth, which has a plethora of real-world data_miner blocks
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
  require 'earth'
4
6
 
5
7
  # use earth, which has a plethora of real-world data_miner blocks
data/test/test_safety.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  require 'helper'
3
+ init_database
4
+ init_models
3
5
  require 'earth'
4
6
 
5
7
  require 'lock_method'
@@ -0,0 +1,16 @@
1
+ require 'helper'
2
+
3
+ describe 'DataMiner unit conversion' do
4
+ it "blows up if you don't specify a converter" do
5
+ output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_without_unit_converter.rb', __FILE__)}`
6
+ refute $?.success?, output
7
+ end
8
+ it 'can convert with alchemist' do
9
+ output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_alchemist.rb', __FILE__)}`
10
+ assert $?.success?, output
11
+ end
12
+ it 'can convert with conversions' do
13
+ output = `ruby -I#{File.dirname(__FILE__)} #{File.expand_path('../support/data_miner_with_conversions.rb', __FILE__)}`
14
+ assert $?.success?, output
15
+ end
16
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 2.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,16 +11,16 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-05-22 00:00:00.000000000 Z
14
+ date: 2012-06-11 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
- name: remote_table
17
+ name: aasm
18
18
  requirement: !ruby/object:Gem::Requirement
19
19
  none: false
20
20
  requirements:
21
21
  - - ! '>='
22
22
  - !ruby/object:Gem::Version
23
- version: 1.2.2
23
+ version: '0'
24
24
  type: :runtime
25
25
  prerelease: false
26
26
  version_requirements: !ruby/object:Gem::Requirement
@@ -28,7 +28,23 @@ dependencies:
28
28
  requirements:
29
29
  - - ! '>='
30
30
  - !ruby/object:Gem::Version
31
- version: 1.2.2
31
+ version: '0'
32
+ - !ruby/object:Gem::Dependency
33
+ name: active_record_inline_schema
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
32
48
  - !ruby/object:Gem::Dependency
33
49
  name: activerecord
34
50
  requirement: !ruby/object:Gem::Requirement
@@ -62,13 +78,13 @@ dependencies:
62
78
  - !ruby/object:Gem::Version
63
79
  version: 2.3.4
64
80
  - !ruby/object:Gem::Dependency
65
- name: conversions
81
+ name: errata
66
82
  requirement: !ruby/object:Gem::Requirement
67
83
  none: false
68
84
  requirements:
69
85
  - - ! '>='
70
86
  - !ruby/object:Gem::Version
71
- version: 1.4.4
87
+ version: 1.0.1
72
88
  type: :runtime
73
89
  prerelease: false
74
90
  version_requirements: !ruby/object:Gem::Requirement
@@ -76,15 +92,15 @@ dependencies:
76
92
  requirements:
77
93
  - - ! '>='
78
94
  - !ruby/object:Gem::Version
79
- version: 1.4.4
95
+ version: 1.0.1
80
96
  - !ruby/object:Gem::Dependency
81
- name: errata
97
+ name: remote_table
82
98
  requirement: !ruby/object:Gem::Requirement
83
99
  none: false
84
100
  requirements:
85
101
  - - ! '>='
86
102
  - !ruby/object:Gem::Version
87
- version: 1.0.1
103
+ version: 1.2.2
88
104
  type: :runtime
89
105
  prerelease: false
90
106
  version_requirements: !ruby/object:Gem::Requirement
@@ -92,16 +108,16 @@ dependencies:
92
108
  requirements:
93
109
  - - ! '>='
94
110
  - !ruby/object:Gem::Version
95
- version: 1.0.1
111
+ version: 1.2.2
96
112
  - !ruby/object:Gem::Dependency
97
- name: active_record_inline_schema
113
+ name: dkastner-alchemist
98
114
  requirement: !ruby/object:Gem::Requirement
99
115
  none: false
100
116
  requirements:
101
117
  - - ! '>='
102
118
  - !ruby/object:Gem::Version
103
119
  version: '0'
104
- type: :runtime
120
+ type: :development
105
121
  prerelease: false
106
122
  version_requirements: !ruby/object:Gem::Requirement
107
123
  none: false
@@ -110,14 +126,142 @@ dependencies:
110
126
  - !ruby/object:Gem::Version
111
127
  version: '0'
112
128
  - !ruby/object:Gem::Dependency
113
- name: aasm
129
+ name: conversions
114
130
  requirement: !ruby/object:Gem::Requirement
115
131
  none: false
116
132
  requirements:
117
133
  - - ! '>='
118
134
  - !ruby/object:Gem::Version
119
135
  version: '0'
120
- type: :runtime
136
+ type: :development
137
+ prerelease: false
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ! '>='
142
+ - !ruby/object:Gem::Version
143
+ version: '0'
144
+ - !ruby/object:Gem::Dependency
145
+ name: earth
146
+ requirement: !ruby/object:Gem::Requirement
147
+ none: false
148
+ requirements:
149
+ - - ! '>='
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ none: false
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ - !ruby/object:Gem::Dependency
161
+ name: fuzzy_match
162
+ requirement: !ruby/object:Gem::Requirement
163
+ none: false
164
+ requirements:
165
+ - - ! '>='
166
+ - !ruby/object:Gem::Version
167
+ version: '0'
168
+ type: :development
169
+ prerelease: false
170
+ version_requirements: !ruby/object:Gem::Requirement
171
+ none: false
172
+ requirements:
173
+ - - ! '>='
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ - !ruby/object:Gem::Dependency
177
+ name: lock_method
178
+ requirement: !ruby/object:Gem::Requirement
179
+ none: false
180
+ requirements:
181
+ - - ! '>='
182
+ - !ruby/object:Gem::Version
183
+ version: '0'
184
+ type: :development
185
+ prerelease: false
186
+ version_requirements: !ruby/object:Gem::Requirement
187
+ none: false
188
+ requirements:
189
+ - - ! '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ - !ruby/object:Gem::Dependency
193
+ name: minitest
194
+ requirement: !ruby/object:Gem::Requirement
195
+ none: false
196
+ requirements:
197
+ - - ! '>='
198
+ - !ruby/object:Gem::Version
199
+ version: '0'
200
+ type: :development
201
+ prerelease: false
202
+ version_requirements: !ruby/object:Gem::Requirement
203
+ none: false
204
+ requirements:
205
+ - - ! '>='
206
+ - !ruby/object:Gem::Version
207
+ version: '0'
208
+ - !ruby/object:Gem::Dependency
209
+ name: minitest-reporters
210
+ requirement: !ruby/object:Gem::Requirement
211
+ none: false
212
+ requirements:
213
+ - - ! '>='
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ none: false
220
+ requirements:
221
+ - - ! '>='
222
+ - !ruby/object:Gem::Version
223
+ version: '0'
224
+ - !ruby/object:Gem::Dependency
225
+ name: mysql2
226
+ requirement: !ruby/object:Gem::Requirement
227
+ none: false
228
+ requirements:
229
+ - - ! '>='
230
+ - !ruby/object:Gem::Version
231
+ version: '0'
232
+ type: :development
233
+ prerelease: false
234
+ version_requirements: !ruby/object:Gem::Requirement
235
+ none: false
236
+ requirements:
237
+ - - ! '>='
238
+ - !ruby/object:Gem::Version
239
+ version: '0'
240
+ - !ruby/object:Gem::Dependency
241
+ name: rake
242
+ requirement: !ruby/object:Gem::Requirement
243
+ none: false
244
+ requirements:
245
+ - - ! '>='
246
+ - !ruby/object:Gem::Version
247
+ version: '0'
248
+ type: :development
249
+ prerelease: false
250
+ version_requirements: !ruby/object:Gem::Requirement
251
+ none: false
252
+ requirements:
253
+ - - ! '>='
254
+ - !ruby/object:Gem::Version
255
+ version: '0'
256
+ - !ruby/object:Gem::Dependency
257
+ name: yard
258
+ requirement: !ruby/object:Gem::Requirement
259
+ none: false
260
+ requirements:
261
+ - - ! '>='
262
+ - !ruby/object:Gem::Version
263
+ version: '0'
264
+ type: :development
121
265
  prerelease: false
122
266
  version_requirements: !ruby/object:Gem::Requirement
123
267
  none: false
@@ -153,9 +297,20 @@ files:
153
297
  - lib/data_miner/step/import.rb
154
298
  - lib/data_miner/step/process.rb
155
299
  - lib/data_miner/step/tap.rb
300
+ - lib/data_miner/unit_converter.rb
301
+ - lib/data_miner/unit_converter/alchemist.rb
302
+ - lib/data_miner/unit_converter/conversions.rb
156
303
  - lib/data_miner/version.rb
304
+ - test/data_miner/test_attribute.rb
305
+ - test/data_miner/unit_converter/test_alchemist.rb
306
+ - test/data_miner/unit_converter/test_conversions.rb
157
307
  - test/helper.rb
308
+ - test/support/breed.rb
158
309
  - test/support/breeds.xls
310
+ - test/support/data_miner_with_alchemist.rb
311
+ - test/support/data_miner_with_conversions.rb
312
+ - test/support/data_miner_without_unit_converter.rb
313
+ - test/support/pet.rb
159
314
  - test/support/pet_color_dictionary.en.csv
160
315
  - test/support/pet_color_dictionary.es.csv
161
316
  - test/support/pets.csv
@@ -165,6 +320,7 @@ files:
165
320
  - test/test_earth_import.rb
166
321
  - test/test_earth_tap.rb
167
322
  - test/test_safety.rb
323
+ - test/test_unit_conversion.rb
168
324
  homepage: https://github.com/seamusabshere/data_miner
169
325
  licenses: []
170
326
  post_install_message:
@@ -185,14 +341,22 @@ required_rubygems_version: !ruby/object:Gem::Requirement
185
341
  version: '0'
186
342
  requirements: []
187
343
  rubyforge_project: data_miner
188
- rubygems_version: 1.8.21
344
+ rubygems_version: 1.8.24
189
345
  signing_key:
190
346
  specification_version: 3
191
347
  summary: Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import
192
348
  XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.
193
349
  test_files:
350
+ - test/data_miner/test_attribute.rb
351
+ - test/data_miner/unit_converter/test_alchemist.rb
352
+ - test/data_miner/unit_converter/test_conversions.rb
194
353
  - test/helper.rb
354
+ - test/support/breed.rb
195
355
  - test/support/breeds.xls
356
+ - test/support/data_miner_with_alchemist.rb
357
+ - test/support/data_miner_with_conversions.rb
358
+ - test/support/data_miner_without_unit_converter.rb
359
+ - test/support/pet.rb
196
360
  - test/support/pet_color_dictionary.en.csv
197
361
  - test/support/pet_color_dictionary.es.csv
198
362
  - test/support/pets.csv
@@ -202,4 +366,5 @@ test_files:
202
366
  - test/test_earth_import.rb
203
367
  - test/test_earth_tap.rb
204
368
  - test/test_safety.rb
369
+ - test/test_unit_conversion.rb
205
370
  has_rdoc: