data_miner 1.3.8 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/CHANGELOG +42 -0
  2. data/Gemfile +19 -3
  3. data/README.rdoc +3 -3
  4. data/Rakefile +13 -15
  5. data/data_miner.gemspec +4 -15
  6. data/lib/data_miner.rb +69 -70
  7. data/lib/data_miner/active_record_extensions.rb +17 -22
  8. data/lib/data_miner/attribute.rb +176 -179
  9. data/lib/data_miner/dictionary.rb +38 -31
  10. data/lib/data_miner/run.rb +49 -18
  11. data/lib/data_miner/script.rb +116 -0
  12. data/lib/data_miner/step.rb +5 -0
  13. data/lib/data_miner/step/import.rb +74 -0
  14. data/lib/data_miner/step/process.rb +34 -0
  15. data/lib/data_miner/step/tap.rb +134 -0
  16. data/lib/data_miner/version.rb +1 -1
  17. data/test/helper.rb +26 -24
  18. data/test/support/breeds.xls +0 -0
  19. data/test/support/pet_color_dictionary.en.csv +5 -0
  20. data/test/support/pet_color_dictionary.es.csv +5 -0
  21. data/test/support/pets.csv +5 -0
  22. data/test/support/pets_funny.csv +4 -0
  23. data/test/test_data_miner.rb +103 -0
  24. data/test/test_earth_import.rb +25 -0
  25. data/test/test_earth_tap.rb +25 -0
  26. data/test/test_safety.rb +43 -0
  27. metadata +72 -78
  28. data/.document +0 -5
  29. data/lib/data_miner/config.rb +0 -124
  30. data/lib/data_miner/import.rb +0 -93
  31. data/lib/data_miner/process.rb +0 -38
  32. data/lib/data_miner/tap.rb +0 -143
  33. data/test/support/aircraft.rb +0 -102
  34. data/test/support/airport.rb +0 -16
  35. data/test/support/automobile_fuel_type.rb +0 -40
  36. data/test/support/automobile_variant.rb +0 -362
  37. data/test/support/country.rb +0 -15
  38. data/test/support/test_database.rb +0 -311
  39. data/test/test_data_miner_attribute.rb +0 -111
  40. data/test/test_data_miner_process.rb +0 -18
  41. data/test/test_old_syntax.rb +0 -825
  42. data/test/test_tap.rb +0 -21
data/CHANGELOG CHANGED
@@ -1,3 +1,45 @@
1
+ 2.0.1 / 2012-04-18
2
+
3
+ * Enhancements
4
+
5
+ * DataMiner.run -> DataMiner.perform
6
+ * Some basic tests that don't rely on Earth
7
+
8
+ * Bug fixes
9
+
10
+ * Fix the "call stack" - the thing that keeps infinite loops from occurring
11
+ * Make sure sources get refreshed every time you re-run data miner
12
+ * Make sure dictionaries " " " " " " "
13
+
14
+ 2.0.0 / 2012-04-17
15
+
16
+ * Breaking changes
17
+
18
+ * Renamed data_miner_config to data_miner_script (etc. for class/method naming)
19
+ * Simplify DataMiner.run arguments
20
+ was: DataMiner.run(:resource_names => ['Country'])
21
+ now: DataMiner.run(['Country'])
22
+ * Rename "resources" to "models"
23
+ was: DataMiner.resource_names
24
+ now: DataMiner.model_names
25
+ * Expect procs instead of lambdas (because they are just instance-eval'ed now, Blockenspiel is no longer guessing where to find methods)
26
+ was: :synthesize => lambda { class_method }
27
+ now: :synthesize => proc { Klass.class_method }
28
+ * Use UnicodeUtils to correctly upcase and downcase
29
+ * Use throw/catch instead of exceptions to signal to force a step to stop successfully
30
+ was: DataMiner::Succeed
31
+ now: throw :data_miner_succeed
32
+ * Import steps no longer accept deprecated :table => RemoteTable or :errata => Errata options
33
+ * DataMiner::Run structure has changed (in addition to other internals)
34
+
35
+ * Enhancements
36
+
37
+ * Easy to modify data miner scripts using DataMiner::Script#{append|prepend|append_once|prepend_once}
38
+ * DRYer codebase
39
+ * No longer depends on Blockenspiel
40
+ * Uses UnixUtils instead of its own spawning code
41
+ * Should be threadsafe (no more autoload, has mutexes, more careful/fewer singletons, etc.)
42
+
1
43
  1.1.0
2
44
  * fixed dependency issues
3
45
  1.0.0
data/Gemfile CHANGED
@@ -1,4 +1,20 @@
1
- source "http://rubygems.org"
1
+ source :rubygems
2
2
 
3
- # Specify your gem's dependencies in data_miner.gemspec
4
- gemspec :path => '.'
3
+ gemspec
4
+
5
+ # development dependencies
6
+ gem 'fuzzy_match'
7
+ gem 'minitest'
8
+ gem 'minitest-reporters'
9
+ gem 'mysql2'
10
+ gem 'rake'
11
+ gem 'yard'
12
+ gem 'earth'
13
+ if RUBY_VERSION >= '1.9'
14
+ gem 'unicode_utils'
15
+ end
16
+ # if RUBY_VERSION >= '1.9'
17
+ # gem 'ruby-debug19' # replace with debugger?
18
+ # else
19
+ # gem 'ruby-debug'
20
+ # end
@@ -9,7 +9,7 @@ Programmatically import useful data into your ActiveRecord models.
9
9
  You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
10
10
 
11
11
  class Country < ActiveRecord::Base
12
- set_primary_key :iso_3166_code
12
+ self.primary_key = :iso_3166_code
13
13
 
14
14
  data_miner do
15
15
  import 'the official ISO country list',
@@ -62,7 +62,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
62
62
  # Tell ActiveRecord that we want to use a string primary key.
63
63
  # This makes it easier to repeatedly truncate and re-import this
64
64
  # table without breaking associations.
65
- set_primary_key :icao_code
65
+ self.primary_key = :icao_code
66
66
 
67
67
  # Use the mini_record-compat gem to define the database schema in-line.
68
68
  # It will destructively and automatically add/remove columns.
@@ -268,7 +268,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
268
268
  update_all "weighting = (#{segments.project(segments[:passengers].sum).where(aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])).to_sql})"
269
269
  end
270
270
 
271
- # And finally re-run the import of resources that depend on this resource.
271
+ # And finally re-run the import of resources that depend on this model.
272
272
  # Don't worry about calling Aircraft.run_data_miner! at the top of AircraftManufacturer's data_miner block;
273
273
  # that's the right way to do dependencies. It won't get called twice in the same run.
274
274
  [ AircraftManufacturer ].each do |synthetic_resource|
data/Rakefile CHANGED
@@ -1,27 +1,25 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
3
 
4
4
  require 'rake'
5
5
  require 'rake/testtask'
6
6
  Rake::TestTask.new(:test) do |test|
7
- test.libs << 'lib' << 'test'
7
+ test.libs << 'test'
8
8
  test.pattern = 'test/**/test_*.rb'
9
9
  test.verbose = true
10
10
  end
11
11
 
12
- task :default => :test
13
-
14
- begin
15
- require 'rake/rdoctask'
16
- Rake::RDocTask.new do |rdoc|
17
- rdoc.rdoc_dir = 'rdoc'
18
- rdoc.title = 'data_miner'
19
- rdoc.options << '--line-numbers' << '--inline-source'
20
- rdoc.rdoc_files.include('README*')
21
- rdoc.rdoc_files.include('lib/**/*.rb')
12
+ task :test_separately do
13
+ Dir[File.expand_path('../test/**/test_*.rb', __FILE__)].each do |path|
14
+ system "rake test TEST=#{path}"
22
15
  end
23
- rescue LoadError
24
- puts "Rdoc is not available"
16
+ end
17
+
18
+ task :default => :test_separately
19
+
20
+ require 'yard'
21
+ YARD::Rake::YardocTask.new do |y|
22
+ y.options << '--no-private'
25
23
  end
26
24
 
27
25
  gemspec = eval(File.read(Dir["*.gemspec"].first))
@@ -1,11 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
3
- require "data_miner/version"
2
+ require File.expand_path("../lib/data_miner/version", __FILE__)
4
3
 
5
4
  Gem::Specification.new do |s|
6
5
  s.name = "data_miner"
7
6
  s.version = DataMiner::VERSION
8
- s.platform = Gem::Platform::RUBY
9
7
  s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner"]
10
8
  s.email = ["seamus@abshere.net"]
11
9
  s.homepage = "https://github.com/seamusabshere/data_miner"
@@ -23,17 +21,8 @@ Gem::Specification.new do |s|
23
21
  s.add_runtime_dependency 'activerecord', '>=2.3.4'
24
22
  s.add_runtime_dependency 'activesupport', '>=2.3.4'
25
23
  s.add_runtime_dependency 'conversions', '>=1.4.4'
26
- s.add_runtime_dependency 'blockenspiel', '>=0.3.2'
27
24
  s.add_runtime_dependency 'errata', '>=1.0.1'
28
- s.add_development_dependency 'mini_record-compat'
29
- s.add_development_dependency 'loose_tight_dictionary', ">=0.0.5"
30
- s.add_development_dependency 'test-unit'
31
- s.add_development_dependency 'shoulda'
32
- s.add_development_dependency 'mysql'
33
- s.add_development_dependency 'rake'
34
- # if RUBY_VERSION >= '1.9'
35
- # s.add_development_dependency 'ruby-debug19'
36
- # else
37
- # s.add_development_dependency 'ruby-debug'
38
- # end
25
+ s.add_runtime_dependency 'active_record_inline_schema'
26
+ s.add_runtime_dependency 'aasm'
27
+ s.add_runtime_dependency 'lock_method', '>=0.5.1'
39
28
  end
@@ -1,91 +1,90 @@
1
+ require 'singleton'
2
+ require 'set'
1
3
  require 'active_support'
2
4
  require 'active_support/version'
3
- %w{
4
- active_support/core_ext/array/conversions
5
- active_support/core_ext/string/access
6
- active_support/core_ext/string/multibyte
7
- }.each do |active_support_3_requirement|
8
- require active_support_3_requirement
9
- end if ::ActiveSupport::VERSION::MAJOR == 3
5
+ if ::ActiveSupport::VERSION::MAJOR >= 3
6
+ require 'active_support/core_ext'
7
+ end
8
+ require 'active_record'
9
+ if RUBY_VERSION >= '1.9'
10
+ begin
11
+ require 'unicode_utils/downcase'
12
+ rescue LoadError
13
+ Kernel.warn '[data_miner] You may wish to include unicode_utils in your Gemfile to improve accuracy of downcasing'
14
+ end
15
+ end
10
16
 
11
- require 'singleton'
17
+ require 'data_miner/active_record_extensions'
18
+ require 'data_miner/attribute'
19
+ require 'data_miner/script'
20
+ require 'data_miner/dictionary'
21
+ require 'data_miner/step'
22
+ require 'data_miner/step/import'
23
+ require 'data_miner/step/tap'
24
+ require 'data_miner/step/process'
25
+ require 'data_miner/run'
12
26
 
13
27
  class DataMiner
14
- include ::Singleton
15
-
16
- class MissingHashColumn < StandardError; end
17
- class Finish < StandardError; end
18
- class Skip < StandardError; end
19
-
20
- autoload :ActiveRecordExtensions, 'data_miner/active_record_extensions'
21
- autoload :Attribute, 'data_miner/attribute'
22
- autoload :Config, 'data_miner/config'
23
- autoload :Dictionary, 'data_miner/dictionary'
24
- autoload :Import, 'data_miner/import'
25
- autoload :Tap, 'data_miner/tap'
26
- autoload :Process, 'data_miner/process'
27
- autoload :Run, 'data_miner/run'
28
-
29
28
  class << self
29
+ delegate :perform, :to => :instance
30
+ delegate :run, :to => :instance
30
31
  delegate :logger, :to => :instance
31
32
  delegate :logger=, :to => :instance
32
- delegate :run, :to => :instance
33
- delegate :resource_names, :to => :instance
34
- end
35
-
36
- # http://avdi.org/devblog/2009/07/14/recursively-symbolize-keys/
37
- def self.recursively_stringify_keys(hash)
38
- hash.inject(::Hash.new) do |result, (key, value)|
39
- new_key = case key
40
- when ::Symbol then key.to_s
41
- else key
42
- end
43
- new_value = case value
44
- when ::Hash then ::DataMiner.recursively_stringify_keys(value)
45
- else value
46
- end
47
- result[new_key] = new_value
48
- result
33
+ delegate :model_names, :to => :instance
34
+
35
+ # @private
36
+ def downcase(str)
37
+ defined?(::UnicodeUtils) ? ::UnicodeUtils.downcase(str) : str.downcase
38
+ end
39
+
40
+ # @private
41
+ def upcase(str)
42
+ defined?(::UnicodeUtils) ? ::UnicodeUtils.upcase(str) : str.upcase
43
+ end
44
+
45
+ # @private
46
+ def compress_whitespace(str)
47
+ str.gsub(INNER_SPACE, ' ').strip
49
48
  end
50
49
  end
51
-
50
+
51
+ MUTEX = ::Mutex.new
52
+ INNER_SPACE = /[ ]+/
53
+
54
+ include ::Singleton
55
+
52
56
  attr_writer :logger
53
- def logger
54
- return @logger if @logger
55
- if defined?(::Rails)
56
- @logger = ::Rails.logger
57
- elsif defined?(::ActiveRecord) and active_record_logger = ::ActiveRecord::Base.logger
58
- @logger = active_record_logger
59
- else
60
- require 'logger'
61
- @logger = ::Logger.new $stderr
57
+
58
+ def perform(model_names = DataMiner.model_names)
59
+ Script.uniq do
60
+ model_names.each do |model_name|
61
+ model_name.constantize.run_data_miner!
62
+ end
62
63
  end
63
64
  end
64
65
 
65
- def resource_names
66
- @resource_names ||= []
67
- end
66
+ # legacy
67
+ alias :run :perform
68
68
 
69
- def call_stack
70
- @call_stack ||= []
71
- end
72
-
73
- # Mine data. Defaults to all resource_names touched by DataMiner.
74
- #
75
- # Options
76
- # * <tt>:resource_names</tt>: array of resource (class) names to mine
77
- def run(options = {})
78
- options = options.dup
79
- options.stringify_keys!
80
- options['preserve_call_stack_between_runs'] = true
81
- resource_names.each do |resource_name|
82
- if options['resource_names'].blank? or options['resource_names'].include?(resource_name)
83
- resource_name.constantize.data_miner_config.run options
69
+ def logger
70
+ @logger || MUTEX.synchronize do
71
+ @logger ||= if defined?(::Rails)
72
+ ::Rails.logger
73
+ elsif defined?(::ActiveRecord) and active_record_logger = ::ActiveRecord::Base.logger
74
+ active_record_logger
75
+ else
76
+ require 'logger'
77
+ ::Logger.new $stderr
84
78
  end
85
79
  end
86
- call_stack.clear
87
80
  end
81
+
82
+ def model_names
83
+ @model_names || MUTEX.synchronize do
84
+ @model_names ||= ::Set.new
85
+ end
86
+ end
87
+
88
88
  end
89
89
 
90
- require 'active_record'
91
90
  ::ActiveRecord::Base.extend ::DataMiner::ActiveRecordExtensions
@@ -1,43 +1,38 @@
1
1
  require 'active_record'
2
- require 'blockenspiel'
2
+ require 'lock_method'
3
3
 
4
4
  class DataMiner
5
5
  module ActiveRecordExtensions
6
- def data_miner_config
7
- @data_miner_config ||= ::DataMiner::Config.new self
8
- end
9
-
10
- def data_miner_config=(config)
11
- @data_miner_config = config
6
+ MUTEX = ::Mutex.new
7
+
8
+ def data_miner_script
9
+ @data_miner_script || MUTEX.synchronize do
10
+ @data_miner_script ||= DataMiner::Script.new(self)
11
+ end
12
12
  end
13
13
 
14
14
  def data_miner_runs
15
- ::DataMiner::Run.scoped :conditions => { :resource_name => name }
15
+ DataMiner::Run.scoped :conditions => { :model_name => name }
16
16
  end
17
17
 
18
- def run_data_miner!(options = {})
19
- data_miner_config.run options
18
+ def run_data_miner!
19
+ data_miner_script.perform
20
20
  end
21
21
 
22
22
  def run_data_miner_on_parent_associations!
23
- reflect_on_all_associations(:belongs_to).each do |assoc|
24
- next if assoc.options[:polymorphic]
25
- assoc.klass.run_data_miner!
23
+ reflect_on_all_associations(:belongs_to).reject do |assoc|
24
+ assoc.options[:polymorphic]
25
+ end.each do |non_polymorphic_belongs_to_assoc|
26
+ non_polymorphic_belongs_to_assoc.klass.run_data_miner!
26
27
  end
27
28
  end
28
29
 
29
30
  def data_miner(options = {}, &blk)
30
- ::DataMiner.instance.resource_names.push name unless ::DataMiner.instance.resource_names.include?(name)
31
-
31
+ DataMiner.model_names.add name
32
32
  unless options[:append]
33
- self.data_miner_config = ::DataMiner::Config.new self
33
+ @data_miner_script = nil
34
34
  end
35
-
36
- ::Blockenspiel.invoke blk, data_miner_config
37
-
38
- data_miner_config.after_invoke
35
+ data_miner_script.append_block blk
39
36
  end
40
37
  end
41
38
  end
42
-
43
-
@@ -2,12 +2,20 @@ require 'conversions'
2
2
 
3
3
  class DataMiner
4
4
  class Attribute
5
- attr_reader :step
6
- attr_reader :name
7
- attr_reader :options
8
-
9
- def resource
10
- step.resource
5
+ class << self
6
+ def check_options(options)
7
+ errors = []
8
+ if options[:dictionary].is_a?(Dictionary)
9
+ errors << %{:dictionary must be a Hash of options}
10
+ end
11
+ if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
12
+ errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
13
+ end
14
+ if (units_options = options.select { |k, _| k.to_s.include?('units') }).any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
15
+ errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
16
+ end
17
+ errors
18
+ end
11
19
  end
12
20
 
13
21
  VALID_OPTIONS = %w{
@@ -29,28 +37,114 @@ class DataMiner
29
37
  field_number
30
38
  chars
31
39
  synthesize
32
- }
40
+ }.map(&:to_sym)
33
41
 
34
- def initialize(step, name, options = {})
35
- @options = ::DataMiner.recursively_stringify_keys options
42
+ VALID_UNIT_DEFINITION_SETS = [
43
+ [:units],
44
+ [:from_units, :to_units],
45
+ [:units_field_name],
46
+ [:units_field_name, :to_units],
47
+ [:units_field_number],
48
+ [:units_field_number, :to_units],
49
+ ]
50
+
51
+ DEFAULT_SPLIT = /\s+/
52
+ DEFAULT_KEEP = 0
53
+ DEFAULT_DELIMITER = ', '
54
+ DEFAULT_NULLIFY = false
55
+ DEFAULT_UPCASE = false
56
+ DEFAULT_OVERWRITE = true
57
+
58
+ attr_reader :step
59
+ attr_reader :name
60
+ attr_reader :synthesize
61
+ attr_reader :matcher
62
+ attr_reader :field_number
63
+ attr_reader :field_name
64
+ # For use when joining a range of field numbers
65
+ attr_reader :delimiter
66
+ attr_reader :chars
67
+ attr_reader :split
68
+ attr_reader :to_units
69
+ attr_reader :from_units
70
+ attr_reader :units_field_number
71
+ attr_reader :units_field_name
72
+ attr_reader :sprintf
73
+ attr_reader :static
36
74
 
75
+ def initialize(step, name, options = {})
76
+ options = options.symbolize_keys
77
+ if (errors = Attribute.check_options(options)).any?
78
+ raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
79
+ end
37
80
  @step = step
38
81
  @name = name
39
-
40
- invalid_option_keys = @options.keys.select { |k| not VALID_OPTIONS.include? k }
41
- raise "Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence} (#{inspect})" if invalid_option_keys.any?
82
+ @synthesize = options[:synthesize]
83
+ if @dictionary_boolean = options.has_key?(:dictionary)
84
+ @dictionary_options = options[:dictionary]
85
+ end
86
+ @matcher = options[:matcher].is_a?(::String) ? options[:matcher].constantize.new : options[:matcher]
87
+ if @static_boolean = options.has_key?(:static)
88
+ @static = options[:static]
89
+ end
90
+ @field_number = options[:field_number]
91
+ @field_name = options.fetch(:field_name, name).to_sym
92
+ @delimiter = options.fetch :delimiter, DEFAULT_DELIMITER
93
+ @chars = options[:chars]
94
+ if split = options[:split]
95
+ @split = split.symbolize_keys
96
+ end
97
+ @nullify_boolean = options.fetch :nullify, DEFAULT_NULLIFY
98
+ @upcase_boolean = options.fetch :upcase, DEFAULT_UPCASE
99
+ @from_units = options[:from_units]
100
+ @to_units = options[:to_units] || options[:units]
101
+ @sprintf = options[:sprintf]
102
+ @overwrite_boolean = options.fetch :overwrite, DEFAULT_OVERWRITE
103
+ @units_field_name = options[:units_field_name]
104
+ @units_field_number = options[:units_field_number]
105
+ @dictionary_mutex = ::Mutex.new
42
106
  end
43
-
44
- def inspect
45
- %{#<DataMiner::Attribute(#{resource}##{name})>}
107
+
108
+ def model
109
+ step.model
110
+ end
111
+
112
+ def static?
113
+ @static_boolean
114
+ end
115
+
116
+ def nullify?
117
+ @nullify_boolean
46
118
  end
47
119
 
48
- def value_in_dictionary(str)
49
- dictionary.lookup str
120
+ def upcase?
121
+ @upcase_boolean
50
122
  end
51
-
52
- def value_in_source(row)
53
- value = if wants_static?
123
+
124
+ def dictionary?
125
+ @dictionary_boolean
126
+ end
127
+
128
+ def convert?
129
+ from_units.present? or units_field_name.present? or units_field_number.present?
130
+ end
131
+
132
+ def units?
133
+ to_units.present? or units_field_name.present? or units_field_number.present?
134
+ end
135
+
136
+ def overwrite?
137
+ @overwrite_boolean
138
+ end
139
+
140
+ def read(row)
141
+ if matcher and matched_row = matcher.match(row)
142
+ return matched_row
143
+ end
144
+ if synthesize
145
+ return synthesize.call(row)
146
+ end
147
+ value = if static?
54
148
  static
55
149
  elsif field_number
56
150
  if field_number.is_a?(::Range)
@@ -58,180 +152,83 @@ class DataMiner
58
152
  else
59
153
  row[field_number]
60
154
  end
61
- elsif field_name == 'row_hash'
155
+ elsif field_name == :row_hash
62
156
  row.row_hash
63
157
  elsif row.is_a?(::Hash) or row.is_a?(::ActiveSupport::OrderedHash)
64
- row[field_name]
158
+ row[field_name.to_s] # remote_table hash keys are always strings
159
+ end
160
+ if value.nil?
161
+ return
162
+ end
163
+ if value.is_a? ::ActiveRecord::Base
164
+ return value
65
165
  end
66
- return nil if value.nil?
67
- return value if value.is_a?(::ActiveRecord::Base) # escape valve for parsers that look up associations directly
68
166
  value = value.to_s
69
- value = value[chars] if wants_chars?
70
- value = do_split(value) if wants_split?
71
- value.gsub! /[ ]+/, ' '
72
- value.strip!
73
- return nil if value.blank? and wants_nullification?
74
- value.upcase! if wants_upcase?
75
- value = do_convert row, value if wants_conversion?
76
- value = do_sprintf value if wants_sprintf?
77
- value
78
- end
79
-
80
- def match_row(row)
81
- matcher.match row
82
- end
83
-
84
- def value_from_row(row)
85
- return match_row row if wants_matcher?
86
- value = value_in_source row
87
- return value if value.is_a? ::ActiveRecord::Base # carry through trapdoor
88
- value = value_in_dictionary value if wants_dictionary?
89
- value = synthesize.call(row) if wants_synthesize?
90
- value
91
- end
92
-
93
- def set_record_from_row(record, row)
94
- return false if !wants_overwriting? and !record.send(name).nil?
95
- record.send "#{name}=", value_from_row(row)
96
- if wants_units?
97
- unit = (to_units || unit_from_source(row)).to_s
98
- unit = nil if unit.blank? and wants_nullification?
99
- record.send "#{name}_units=", unit
167
+ if chars
168
+ value = value[chars]
100
169
  end
101
- end
102
-
103
- def unit_from_source(row)
104
- row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
105
- end
106
-
107
- def do_convert(row, value)
108
- unless wants_units?
109
- raise ::RuntimeError, "[data_miner] If you use 'from_units', you need to set 'to_units' (#{inspect})"
170
+ if split
171
+ pattern = split.fetch :pattern, DEFAULT_SPLIT
172
+ keep = split.fetch :keep, DEFAULT_KEEP
173
+ value = value.to_s.split(pattern)[keep].to_s
110
174
  end
111
- final_from_units = (from_units || unit_from_source(row))
112
- final_to_units = (to_units || unit_from_source(row))
113
- if final_from_units.blank? or final_to_units.blank?
114
- raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
175
+ value = DataMiner.compress_whitespace value
176
+ if nullify? and value.blank?
177
+ return
115
178
  end
116
- value.to_f.convert final_from_units, final_to_units
117
- end
118
-
119
- def do_sprintf(value)
120
- if /\%[0-9\.]*f/.match sprintf
121
- value = value.to_f
122
- elsif /\%[0-9\.]*d/.match sprintf
123
- value = value.to_i
179
+ if upcase?
180
+ value = DataMiner.upcase value
124
181
  end
125
- sprintf % value
126
- end
127
-
128
- def do_split(value)
129
- pattern = split_options['pattern'] || /\s+/ # default is split on whitespace
130
- keep = split_options['keep'] || 0 # default is keep first element
131
- value.to_s.split(pattern)[keep].to_s
132
- end
133
-
134
- def column_type
135
- resource.columns_hash[name.to_s].type
136
- end
137
-
138
- # Our wants and needs :)
139
- def wants_split?
140
- split_options.present?
141
- end
142
- def wants_sprintf?
143
- sprintf.present?
144
- end
145
- def wants_upcase?
146
- upcase.present?
147
- end
148
- def wants_static?
149
- options.has_key? 'static'
150
- end
151
- def wants_nullification?
152
- nullify == true
153
- end
154
- def wants_chars?
155
- chars.present?
156
- end
157
- def wants_synthesize?
158
- synthesize.is_a?(::Proc)
159
- end
160
- def wants_overwriting?
161
- overwrite != false
162
- end
163
- def wants_conversion?
164
- from_units.present? or units_field_name.present? or units_field_number.present?
165
- end
166
- def wants_units?
167
- to_units.present? or units_field_name.present? or units_field_number.present?
168
- end
169
- def wants_dictionary?
170
- options['dictionary'].present?
171
- end
172
- def wants_matcher?
173
- options['matcher'].present?
182
+ if convert?
183
+ final_from_units = from_units || read_units(row)
184
+ final_to_units = to_units || read_units(row)
185
+ if final_from_units.blank? or final_to_units.blank?
186
+ raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
187
+ end
188
+ value = value.to_f.convert final_from_units, final_to_units
189
+ end
190
+ if sprintf
191
+ if sprintf.end_with?('f')
192
+ value = value.to_f
193
+ elsif sprintf.end_with?('d')
194
+ value = value.to_i
195
+ end
196
+ value = sprintf % value
197
+ end
198
+ if dictionary?
199
+ value = dictionary.lookup(value)
200
+ end
201
+ value
174
202
  end
175
203
 
176
- # Options that always have values
177
- def field_name
178
- (options['field_name'] || name).to_s
179
- end
180
- def delimiter
181
- (options['delimiter'] || ', ')
182
- end
183
-
184
- # Options that can't be referred to by their names
185
- def split_options
186
- options['split']
187
- end
188
-
189
- def from_units
190
- options['from_units']
191
- end
192
- def to_units
193
- options['to_units'] || options['units']
194
- end
195
- def sprintf
196
- options['sprintf']
197
- end
198
- def nullify
199
- options['nullify']
200
- end
201
- def overwrite
202
- options['overwrite']
203
- end
204
- def upcase
205
- options['upcase']
206
- end
207
- def units_field_name
208
- options['units_field_name']
209
- end
210
- def units_field_number
211
- options['units_field_number']
212
- end
213
- def field_number
214
- options['field_number']
215
- end
216
- def chars
217
- options['chars']
218
- end
219
- def synthesize
220
- options['synthesize']
221
- end
222
- def static
223
- options['static']
204
+ def set_from_row(target, row)
205
+ if overwrite? or target.send(name).nil?
206
+ target.send "#{name}=", read(row)
207
+ end
208
+ if units? and ((final_to_units = (to_units || read_units(row))) or nullify?)
209
+ target.send "#{name}_units=", final_to_units
210
+ end
224
211
  end
225
- # must be cleared before every run! (because it relies on remote data)
212
+
226
213
  def dictionary
227
- @dictionary ||= (options['dictionary'].is_a?(Dictionary) ? options['dictionary'] : Dictionary.new(options['dictionary']))
214
+ @dictionary || @dictionary_mutex.synchronize do
215
+ @dictionary ||= Dictionary.new(@dictionary_options)
216
+ end
228
217
  end
229
- def matcher
230
- @matcher ||= (options['matcher'].is_a?(::String) ? options['matcher'].constantize.new : options['matcher'])
218
+
219
+ def refresh
220
+ @dictionary = nil
231
221
  end
232
-
222
+
223
+ private
224
+
225
+ def read_units(row)
226
+ if units = row[units_field_name || units_field_number]
227
+ DataMiner.compress_whitespace(units).underscore.to_sym
228
+ end
229
+ end
230
+
233
231
  def free
234
- @dictionary.free if @dictionary.is_a?(Dictionary)
235
232
  @dictionary = nil
236
233
  end
237
234
  end