data_miner 1.3.8 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
data/CHANGELOG CHANGED
@@ -1,3 +1,45 @@
1
+ 2.0.1 / 2012-04-18
2
+
3
+ * Enhancements
4
+
5
+ * DataMiner.run -> DataMiner.perform
6
+ * Some basic tests that don't rely on Earth
7
+
8
+ * Bug fixes
9
+
10
+ * Fix the "call stack" - the thing that keeps infinite loops from occurring
11
+ * Make sure sources get refreshed every time you re-run data miner
12
+ * Make sure dictionaries " " " " " " "
13
+
14
+ 2.0.0 / 2012-04-17
15
+
16
+ * Breaking changes
17
+
18
+ * Renamed data_miner_config to data_miner_script (etc. for class/method naming)
19
+ * Simplify DataMiner.run arguments
20
+ was: DataMiner.run(:resource_names => ['Country'])
21
+ now: DataMiner.run(['Country'])
22
+ * Rename "resources" to "models"
23
+ was: DataMiner.resource_names
24
+ now: DataMiner.model_names
25
+ * Expect procs instead of lambdas (because they are just instance-eval'ed now, Blockenspiel is no longer guessing where to find methods)
26
+ was: :synthesize => lambda { class_method }
27
+ now: :synthesize => proc { Klass.class_method }
28
+ * Use UnicodeUtils to correctly upcase and downcase
29
+ * Use throw/catch instead of exceptions to signal to force a step to stop successfully
30
+ was: DataMiner::Succeed
31
+ now: throw :data_miner_succeed
32
+ * Import steps no longer accept deprecated :table => RemoteTable or :errata => Errata options
33
+ * DataMiner::Run structure has changed (in addition to other internals)
34
+
35
+ * Enhancements
36
+
37
+ * Easy to modify data miner scripts using DataMiner::Script#{append|prepend|append_once|prepend_once}
38
+ * DRYer codebase
39
+ * No longer depends on Blockenspiel
40
+ * Uses UnixUtils instead of its own spawning code
41
+ * Should be threadsafe (no more autoload, has mutexes, more careful/fewer singletons, etc.)
42
+
1
43
  1.1.0
2
44
  * fixed dependency issues
3
45
  1.0.0
data/Gemfile CHANGED
@@ -1,4 +1,20 @@
1
- source "http://rubygems.org"
1
+ source :rubygems
2
2
 
3
- # Specify your gem's dependencies in data_miner.gemspec
4
- gemspec :path => '.'
3
+ gemspec
4
+
5
+ # development dependencies
6
+ gem 'fuzzy_match'
7
+ gem 'minitest'
8
+ gem 'minitest-reporters'
9
+ gem 'mysql2'
10
+ gem 'rake'
11
+ gem 'yard'
12
+ gem 'earth'
13
+ if RUBY_VERSION >= '1.9'
14
+ gem 'unicode_utils'
15
+ end
16
+ # if RUBY_VERSION >= '1.9'
17
+ # gem 'ruby-debug19' # replace with debugger?
18
+ # else
19
+ # gem 'ruby-debug'
20
+ # end
@@ -9,7 +9,7 @@ Programmatically import useful data into your ActiveRecord models.
9
9
  You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
10
10
 
11
11
  class Country < ActiveRecord::Base
12
- set_primary_key :iso_3166_code
12
+ self.primary_key = :iso_3166_code
13
13
 
14
14
  data_miner do
15
15
  import 'the official ISO country list',
@@ -62,7 +62,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
62
62
  # Tell ActiveRecord that we want to use a string primary key.
63
63
  # This makes it easier to repeatedly truncate and re-import this
64
64
  # table without breaking associations.
65
- set_primary_key :icao_code
65
+ self.primary_key = :icao_code
66
66
 
67
67
  # Use the mini_record-compat gem to define the database schema in-line.
68
68
  # It will destructively and automatically add/remove columns.
@@ -268,7 +268,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
268
268
  update_all "weighting = (#{segments.project(segments[:passengers].sum).where(aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])).to_sql})"
269
269
  end
270
270
 
271
- # And finally re-run the import of resources that depend on this resource.
271
+ # And finally re-run the import of resources that depend on this model.
272
272
  # Don't worry about calling Aircraft.run_data_miner! at the top of AircraftManufacturer's data_miner block;
273
273
  # that's the right way to do dependencies. It won't get called twice in the same run.
274
274
  [ AircraftManufacturer ].each do |synthetic_resource|
data/Rakefile CHANGED
@@ -1,27 +1,25 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
3
 
4
4
  require 'rake'
5
5
  require 'rake/testtask'
6
6
  Rake::TestTask.new(:test) do |test|
7
- test.libs << 'lib' << 'test'
7
+ test.libs << 'test'
8
8
  test.pattern = 'test/**/test_*.rb'
9
9
  test.verbose = true
10
10
  end
11
11
 
12
- task :default => :test
13
-
14
- begin
15
- require 'rake/rdoctask'
16
- Rake::RDocTask.new do |rdoc|
17
- rdoc.rdoc_dir = 'rdoc'
18
- rdoc.title = 'data_miner'
19
- rdoc.options << '--line-numbers' << '--inline-source'
20
- rdoc.rdoc_files.include('README*')
21
- rdoc.rdoc_files.include('lib/**/*.rb')
12
+ task :test_separately do
13
+ Dir[File.expand_path('../test/**/test_*.rb', __FILE__)].each do |path|
14
+ system "rake test TEST=#{path}"
22
15
  end
23
- rescue LoadError
24
- puts "Rdoc is not available"
16
+ end
17
+
18
+ task :default => :test_separately
19
+
20
+ require 'yard'
21
+ YARD::Rake::YardocTask.new do |y|
22
+ y.options << '--no-private'
25
23
  end
26
24
 
27
25
  gemspec = eval(File.read(Dir["*.gemspec"].first))
@@ -1,11 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
3
- require "data_miner/version"
2
+ require File.expand_path("../lib/data_miner/version", __FILE__)
4
3
 
5
4
  Gem::Specification.new do |s|
6
5
  s.name = "data_miner"
7
6
  s.version = DataMiner::VERSION
8
- s.platform = Gem::Platform::RUBY
9
7
  s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner"]
10
8
  s.email = ["seamus@abshere.net"]
11
9
  s.homepage = "https://github.com/seamusabshere/data_miner"
@@ -23,17 +21,8 @@ Gem::Specification.new do |s|
23
21
  s.add_runtime_dependency 'activerecord', '>=2.3.4'
24
22
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
25
23
  s.add_runtime_dependency 'conversions', '>=1.4.4'
26
- s.add_runtime_dependency 'blockenspiel', '>=0.3.2'
27
24
  s.add_runtime_dependency 'errata', '>=1.0.1'
28
- s.add_development_dependency 'mini_record-compat'
29
- s.add_development_dependency 'loose_tight_dictionary', ">=0.0.5"
30
- s.add_development_dependency 'test-unit'
31
- s.add_development_dependency 'shoulda'
32
- s.add_development_dependency 'mysql'
33
- s.add_development_dependency 'rake'
34
- # if RUBY_VERSION >= '1.9'
35
- # s.add_development_dependency 'ruby-debug19'
36
- # else
37
- # s.add_development_dependency 'ruby-debug'
38
- # end
25
+ s.add_runtime_dependency 'active_record_inline_schema'
26
+ s.add_runtime_dependency 'aasm'
27
+ s.add_runtime_dependency 'lock_method', '>=0.5.1'
39
28
  end
@@ -1,91 +1,90 @@
1
+ require 'singleton'
2
+ require 'set'
1
3
  require 'active_support'
2
4
  require 'active_support/version'
3
- %w{
4
- active_support/core_ext/array/conversions
5
- active_support/core_ext/string/access
6
- active_support/core_ext/string/multibyte
7
- }.each do |active_support_3_requirement|
8
- require active_support_3_requirement
9
- end if ::ActiveSupport::VERSION::MAJOR == 3
5
+ if ::ActiveSupport::VERSION::MAJOR >= 3
6
+ require 'active_support/core_ext'
7
+ end
8
+ require 'active_record'
9
+ if RUBY_VERSION >= '1.9'
10
+ begin
11
+ require 'unicode_utils/downcase'
12
+ rescue LoadError
13
+ Kernel.warn '[data_miner] You may wish to include unicode_utils in your Gemfile to improve accuracy of downcasing'
14
+ end
15
+ end
10
16
 
11
- require 'singleton'
17
+ require 'data_miner/active_record_extensions'
18
+ require 'data_miner/attribute'
19
+ require 'data_miner/script'
20
+ require 'data_miner/dictionary'
21
+ require 'data_miner/step'
22
+ require 'data_miner/step/import'
23
+ require 'data_miner/step/tap'
24
+ require 'data_miner/step/process'
25
+ require 'data_miner/run'
12
26
 
13
27
  class DataMiner
14
- include ::Singleton
15
-
16
- class MissingHashColumn < StandardError; end
17
- class Finish < StandardError; end
18
- class Skip < StandardError; end
19
-
20
- autoload :ActiveRecordExtensions, 'data_miner/active_record_extensions'
21
- autoload :Attribute, 'data_miner/attribute'
22
- autoload :Config, 'data_miner/config'
23
- autoload :Dictionary, 'data_miner/dictionary'
24
- autoload :Import, 'data_miner/import'
25
- autoload :Tap, 'data_miner/tap'
26
- autoload :Process, 'data_miner/process'
27
- autoload :Run, 'data_miner/run'
28
-
29
28
  class << self
29
+ delegate :perform, :to => :instance
30
+ delegate :run, :to => :instance
30
31
  delegate :logger, :to => :instance
31
32
  delegate :logger=, :to => :instance
32
- delegate :run, :to => :instance
33
- delegate :resource_names, :to => :instance
34
- end
35
-
36
- # http://avdi.org/devblog/2009/07/14/recursively-symbolize-keys/
37
- def self.recursively_stringify_keys(hash)
38
- hash.inject(::Hash.new) do |result, (key, value)|
39
- new_key = case key
40
- when ::Symbol then key.to_s
41
- else key
42
- end
43
- new_value = case value
44
- when ::Hash then ::DataMiner.recursively_stringify_keys(value)
45
- else value
46
- end
47
- result[new_key] = new_value
48
- result
33
+ delegate :model_names, :to => :instance
34
+
35
+ # @private
36
+ def downcase(str)
37
+ defined?(::UnicodeUtils) ? ::UnicodeUtils.downcase(str) : str.downcase
38
+ end
39
+
40
+ # @private
41
+ def upcase(str)
42
+ defined?(::UnicodeUtils) ? ::UnicodeUtils.upcase(str) : str.upcase
43
+ end
44
+
45
+ # @private
46
+ def compress_whitespace(str)
47
+ str.gsub(INNER_SPACE, ' ').strip
49
48
  end
50
49
  end
51
-
50
+
51
+ MUTEX = ::Mutex.new
52
+ INNER_SPACE = /[ ]+/
53
+
54
+ include ::Singleton
55
+
52
56
  attr_writer :logger
53
- def logger
54
- return @logger if @logger
55
- if defined?(::Rails)
56
- @logger = ::Rails.logger
57
- elsif defined?(::ActiveRecord) and active_record_logger = ::ActiveRecord::Base.logger
58
- @logger = active_record_logger
59
- else
60
- require 'logger'
61
- @logger = ::Logger.new $stderr
57
+
58
+ def perform(model_names = DataMiner.model_names)
59
+ Script.uniq do
60
+ model_names.each do |model_name|
61
+ model_name.constantize.run_data_miner!
62
+ end
62
63
  end
63
64
  end
64
65
 
65
- def resource_names
66
- @resource_names ||= []
67
- end
66
+ # legacy
67
+ alias :run :perform
68
68
 
69
- def call_stack
70
- @call_stack ||= []
71
- end
72
-
73
- # Mine data. Defaults to all resource_names touched by DataMiner.
74
- #
75
- # Options
76
- # * <tt>:resource_names</tt>: array of resource (class) names to mine
77
- def run(options = {})
78
- options = options.dup
79
- options.stringify_keys!
80
- options['preserve_call_stack_between_runs'] = true
81
- resource_names.each do |resource_name|
82
- if options['resource_names'].blank? or options['resource_names'].include?(resource_name)
83
- resource_name.constantize.data_miner_config.run options
69
+ def logger
70
+ @logger || MUTEX.synchronize do
71
+ @logger ||= if defined?(::Rails)
72
+ ::Rails.logger
73
+ elsif defined?(::ActiveRecord) and active_record_logger = ::ActiveRecord::Base.logger
74
+ active_record_logger
75
+ else
76
+ require 'logger'
77
+ ::Logger.new $stderr
84
78
  end
85
79
  end
86
- call_stack.clear
87
80
  end
81
+
82
+ def model_names
83
+ @model_names || MUTEX.synchronize do
84
+ @model_names ||= ::Set.new
85
+ end
86
+ end
87
+
88
88
  end
89
89
 
90
- require 'active_record'
91
90
  ::ActiveRecord::Base.extend ::DataMiner::ActiveRecordExtensions
@@ -1,43 +1,38 @@
1
1
  require 'active_record'
2
- require 'blockenspiel'
2
+ require 'lock_method'
3
3
 
4
4
  class DataMiner
5
5
  module ActiveRecordExtensions
6
- def data_miner_config
7
- @data_miner_config ||= ::DataMiner::Config.new self
8
- end
9
-
10
- def data_miner_config=(config)
11
- @data_miner_config = config
6
+ MUTEX = ::Mutex.new
7
+
8
+ def data_miner_script
9
+ @data_miner_script || MUTEX.synchronize do
10
+ @data_miner_script ||= DataMiner::Script.new(self)
11
+ end
12
12
  end
13
13
 
14
14
  def data_miner_runs
15
- ::DataMiner::Run.scoped :conditions => { :resource_name => name }
15
+ DataMiner::Run.scoped :conditions => { :model_name => name }
16
16
  end
17
17
 
18
- def run_data_miner!(options = {})
19
- data_miner_config.run options
18
+ def run_data_miner!
19
+ data_miner_script.perform
20
20
  end
21
21
 
22
22
  def run_data_miner_on_parent_associations!
23
- reflect_on_all_associations(:belongs_to).each do |assoc|
24
- next if assoc.options[:polymorphic]
25
- assoc.klass.run_data_miner!
23
+ reflect_on_all_associations(:belongs_to).reject do |assoc|
24
+ assoc.options[:polymorphic]
25
+ end.each do |non_polymorphic_belongs_to_assoc|
26
+ non_polymorphic_belongs_to_assoc.klass.run_data_miner!
26
27
  end
27
28
  end
28
29
 
29
30
  def data_miner(options = {}, &blk)
30
- ::DataMiner.instance.resource_names.push name unless ::DataMiner.instance.resource_names.include?(name)
31
-
31
+ DataMiner.model_names.add name
32
32
  unless options[:append]
33
- self.data_miner_config = ::DataMiner::Config.new self
33
+ @data_miner_script = nil
34
34
  end
35
-
36
- ::Blockenspiel.invoke blk, data_miner_config
37
-
38
- data_miner_config.after_invoke
35
+ data_miner_script.append_block blk
39
36
  end
40
37
  end
41
38
  end
42
-
43
-
@@ -2,12 +2,20 @@ require 'conversions'
2
2
 
3
3
  class DataMiner
4
4
  class Attribute
5
- attr_reader :step
6
- attr_reader :name
7
- attr_reader :options
8
-
9
- def resource
10
- step.resource
5
+ class << self
6
+ def check_options(options)
7
+ errors = []
8
+ if options[:dictionary].is_a?(Dictionary)
9
+ errors << %{:dictionary must be a Hash of options}
10
+ end
11
+ if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
12
+ errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
13
+ end
14
+ if (units_options = options.select { |k, _| k.to_s.include?('units') }).any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
15
+ errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
16
+ end
17
+ errors
18
+ end
11
19
  end
12
20
 
13
21
  VALID_OPTIONS = %w{
@@ -29,28 +37,114 @@ class DataMiner
29
37
  field_number
30
38
  chars
31
39
  synthesize
32
- }
40
+ }.map(&:to_sym)
33
41
 
34
- def initialize(step, name, options = {})
35
- @options = ::DataMiner.recursively_stringify_keys options
42
+ VALID_UNIT_DEFINITION_SETS = [
43
+ [:units],
44
+ [:from_units, :to_units],
45
+ [:units_field_name],
46
+ [:units_field_name, :to_units],
47
+ [:units_field_number],
48
+ [:units_field_number, :to_units],
49
+ ]
50
+
51
+ DEFAULT_SPLIT = /\s+/
52
+ DEFAULT_KEEP = 0
53
+ DEFAULT_DELIMITER = ', '
54
+ DEFAULT_NULLIFY = false
55
+ DEFAULT_UPCASE = false
56
+ DEFAULT_OVERWRITE = true
57
+
58
+ attr_reader :step
59
+ attr_reader :name
60
+ attr_reader :synthesize
61
+ attr_reader :matcher
62
+ attr_reader :field_number
63
+ attr_reader :field_name
64
+ # For use when joining a range of field numbers
65
+ attr_reader :delimiter
66
+ attr_reader :chars
67
+ attr_reader :split
68
+ attr_reader :to_units
69
+ attr_reader :from_units
70
+ attr_reader :units_field_number
71
+ attr_reader :units_field_name
72
+ attr_reader :sprintf
73
+ attr_reader :static
36
74
 
75
+ def initialize(step, name, options = {})
76
+ options = options.symbolize_keys
77
+ if (errors = Attribute.check_options(options)).any?
78
+ raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
79
+ end
37
80
  @step = step
38
81
  @name = name
39
-
40
- invalid_option_keys = @options.keys.select { |k| not VALID_OPTIONS.include? k }
41
- raise "Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence} (#{inspect})" if invalid_option_keys.any?
82
+ @synthesize = options[:synthesize]
83
+ if @dictionary_boolean = options.has_key?(:dictionary)
84
+ @dictionary_options = options[:dictionary]
85
+ end
86
+ @matcher = options[:matcher].is_a?(::String) ? options[:matcher].constantize.new : options[:matcher]
87
+ if @static_boolean = options.has_key?(:static)
88
+ @static = options[:static]
89
+ end
90
+ @field_number = options[:field_number]
91
+ @field_name = options.fetch(:field_name, name).to_sym
92
+ @delimiter = options.fetch :delimiter, DEFAULT_DELIMITER
93
+ @chars = options[:chars]
94
+ if split = options[:split]
95
+ @split = split.symbolize_keys
96
+ end
97
+ @nullify_boolean = options.fetch :nullify, DEFAULT_NULLIFY
98
+ @upcase_boolean = options.fetch :upcase, DEFAULT_UPCASE
99
+ @from_units = options[:from_units]
100
+ @to_units = options[:to_units] || options[:units]
101
+ @sprintf = options[:sprintf]
102
+ @overwrite_boolean = options.fetch :overwrite, DEFAULT_OVERWRITE
103
+ @units_field_name = options[:units_field_name]
104
+ @units_field_number = options[:units_field_number]
105
+ @dictionary_mutex = ::Mutex.new
42
106
  end
43
-
44
- def inspect
45
- %{#<DataMiner::Attribute(#{resource}##{name})>}
107
+
108
+ def model
109
+ step.model
110
+ end
111
+
112
+ def static?
113
+ @static_boolean
114
+ end
115
+
116
+ def nullify?
117
+ @nullify_boolean
46
118
  end
47
119
 
48
- def value_in_dictionary(str)
49
- dictionary.lookup str
120
+ def upcase?
121
+ @upcase_boolean
50
122
  end
51
-
52
- def value_in_source(row)
53
- value = if wants_static?
123
+
124
+ def dictionary?
125
+ @dictionary_boolean
126
+ end
127
+
128
+ def convert?
129
+ from_units.present? or units_field_name.present? or units_field_number.present?
130
+ end
131
+
132
+ def units?
133
+ to_units.present? or units_field_name.present? or units_field_number.present?
134
+ end
135
+
136
+ def overwrite?
137
+ @overwrite_boolean
138
+ end
139
+
140
+ def read(row)
141
+ if matcher and matched_row = matcher.match(row)
142
+ return matched_row
143
+ end
144
+ if synthesize
145
+ return synthesize.call(row)
146
+ end
147
+ value = if static?
54
148
  static
55
149
  elsif field_number
56
150
  if field_number.is_a?(::Range)
@@ -58,180 +152,83 @@ class DataMiner
58
152
  else
59
153
  row[field_number]
60
154
  end
61
- elsif field_name == 'row_hash'
155
+ elsif field_name == :row_hash
62
156
  row.row_hash
63
157
  elsif row.is_a?(::Hash) or row.is_a?(::ActiveSupport::OrderedHash)
64
- row[field_name]
158
+ row[field_name.to_s] # remote_table hash keys are always strings
159
+ end
160
+ if value.nil?
161
+ return
162
+ end
163
+ if value.is_a? ::ActiveRecord::Base
164
+ return value
65
165
  end
66
- return nil if value.nil?
67
- return value if value.is_a?(::ActiveRecord::Base) # escape valve for parsers that look up associations directly
68
166
  value = value.to_s
69
- value = value[chars] if wants_chars?
70
- value = do_split(value) if wants_split?
71
- value.gsub! /[ ]+/, ' '
72
- value.strip!
73
- return nil if value.blank? and wants_nullification?
74
- value.upcase! if wants_upcase?
75
- value = do_convert row, value if wants_conversion?
76
- value = do_sprintf value if wants_sprintf?
77
- value
78
- end
79
-
80
- def match_row(row)
81
- matcher.match row
82
- end
83
-
84
- def value_from_row(row)
85
- return match_row row if wants_matcher?
86
- value = value_in_source row
87
- return value if value.is_a? ::ActiveRecord::Base # carry through trapdoor
88
- value = value_in_dictionary value if wants_dictionary?
89
- value = synthesize.call(row) if wants_synthesize?
90
- value
91
- end
92
-
93
- def set_record_from_row(record, row)
94
- return false if !wants_overwriting? and !record.send(name).nil?
95
- record.send "#{name}=", value_from_row(row)
96
- if wants_units?
97
- unit = (to_units || unit_from_source(row)).to_s
98
- unit = nil if unit.blank? and wants_nullification?
99
- record.send "#{name}_units=", unit
167
+ if chars
168
+ value = value[chars]
100
169
  end
101
- end
102
-
103
- def unit_from_source(row)
104
- row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
105
- end
106
-
107
- def do_convert(row, value)
108
- unless wants_units?
109
- raise ::RuntimeError, "[data_miner] If you use 'from_units', you need to set 'to_units' (#{inspect})"
170
+ if split
171
+ pattern = split.fetch :pattern, DEFAULT_SPLIT
172
+ keep = split.fetch :keep, DEFAULT_KEEP
173
+ value = value.to_s.split(pattern)[keep].to_s
110
174
  end
111
- final_from_units = (from_units || unit_from_source(row))
112
- final_to_units = (to_units || unit_from_source(row))
113
- if final_from_units.blank? or final_to_units.blank?
114
- raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
175
+ value = DataMiner.compress_whitespace value
176
+ if nullify? and value.blank?
177
+ return
115
178
  end
116
- value.to_f.convert final_from_units, final_to_units
117
- end
118
-
119
- def do_sprintf(value)
120
- if /\%[0-9\.]*f/.match sprintf
121
- value = value.to_f
122
- elsif /\%[0-9\.]*d/.match sprintf
123
- value = value.to_i
179
+ if upcase?
180
+ value = DataMiner.upcase value
124
181
  end
125
- sprintf % value
126
- end
127
-
128
- def do_split(value)
129
- pattern = split_options['pattern'] || /\s+/ # default is split on whitespace
130
- keep = split_options['keep'] || 0 # default is keep first element
131
- value.to_s.split(pattern)[keep].to_s
132
- end
133
-
134
- def column_type
135
- resource.columns_hash[name.to_s].type
136
- end
137
-
138
- # Our wants and needs :)
139
- def wants_split?
140
- split_options.present?
141
- end
142
- def wants_sprintf?
143
- sprintf.present?
144
- end
145
- def wants_upcase?
146
- upcase.present?
147
- end
148
- def wants_static?
149
- options.has_key? 'static'
150
- end
151
- def wants_nullification?
152
- nullify == true
153
- end
154
- def wants_chars?
155
- chars.present?
156
- end
157
- def wants_synthesize?
158
- synthesize.is_a?(::Proc)
159
- end
160
- def wants_overwriting?
161
- overwrite != false
162
- end
163
- def wants_conversion?
164
- from_units.present? or units_field_name.present? or units_field_number.present?
165
- end
166
- def wants_units?
167
- to_units.present? or units_field_name.present? or units_field_number.present?
168
- end
169
- def wants_dictionary?
170
- options['dictionary'].present?
171
- end
172
- def wants_matcher?
173
- options['matcher'].present?
182
+ if convert?
183
+ final_from_units = from_units || read_units(row)
184
+ final_to_units = to_units || read_units(row)
185
+ if final_from_units.blank? or final_to_units.blank?
186
+ raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
187
+ end
188
+ value = value.to_f.convert final_from_units, final_to_units
189
+ end
190
+ if sprintf
191
+ if sprintf.end_with?('f')
192
+ value = value.to_f
193
+ elsif sprintf.end_with?('d')
194
+ value = value.to_i
195
+ end
196
+ value = sprintf % value
197
+ end
198
+ if dictionary?
199
+ value = dictionary.lookup(value)
200
+ end
201
+ value
174
202
  end
175
203
 
176
- # Options that always have values
177
- def field_name
178
- (options['field_name'] || name).to_s
179
- end
180
- def delimiter
181
- (options['delimiter'] || ', ')
182
- end
183
-
184
- # Options that can't be referred to by their names
185
- def split_options
186
- options['split']
187
- end
188
-
189
- def from_units
190
- options['from_units']
191
- end
192
- def to_units
193
- options['to_units'] || options['units']
194
- end
195
- def sprintf
196
- options['sprintf']
197
- end
198
- def nullify
199
- options['nullify']
200
- end
201
- def overwrite
202
- options['overwrite']
203
- end
204
- def upcase
205
- options['upcase']
206
- end
207
- def units_field_name
208
- options['units_field_name']
209
- end
210
- def units_field_number
211
- options['units_field_number']
212
- end
213
- def field_number
214
- options['field_number']
215
- end
216
- def chars
217
- options['chars']
218
- end
219
- def synthesize
220
- options['synthesize']
221
- end
222
- def static
223
- options['static']
204
+ def set_from_row(target, row)
205
+ if overwrite? or target.send(name).nil?
206
+ target.send "#{name}=", read(row)
207
+ end
208
+ if units? and ((final_to_units = (to_units || read_units(row))) or nullify?)
209
+ target.send "#{name}_units=", final_to_units
210
+ end
224
211
  end
225
- # must be cleared before every run! (because it relies on remote data)
212
+
226
213
  def dictionary
227
- @dictionary ||= (options['dictionary'].is_a?(Dictionary) ? options['dictionary'] : Dictionary.new(options['dictionary']))
214
+ @dictionary || @dictionary_mutex.synchronize do
215
+ @dictionary ||= Dictionary.new(@dictionary_options)
216
+ end
228
217
  end
229
- def matcher
230
- @matcher ||= (options['matcher'].is_a?(::String) ? options['matcher'].constantize.new : options['matcher'])
218
+
219
+ def refresh
220
+ @dictionary = nil
231
221
  end
232
-
222
+
223
+ private
224
+
225
+ def read_units(row)
226
+ if units = row[units_field_name || units_field_number]
227
+ DataMiner.compress_whitespace(units).underscore.to_sym
228
+ end
229
+ end
230
+
233
231
  def free
234
- @dictionary.free if @dictionary.is_a?(Dictionary)
235
232
  @dictionary = nil
236
233
  end
237
234
  end