data_miner 1.3.8 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +42 -0
- data/Gemfile +19 -3
- data/README.rdoc +3 -3
- data/Rakefile +13 -15
- data/data_miner.gemspec +4 -15
- data/lib/data_miner.rb +69 -70
- data/lib/data_miner/active_record_extensions.rb +17 -22
- data/lib/data_miner/attribute.rb +176 -179
- data/lib/data_miner/dictionary.rb +38 -31
- data/lib/data_miner/run.rb +49 -18
- data/lib/data_miner/script.rb +116 -0
- data/lib/data_miner/step.rb +5 -0
- data/lib/data_miner/step/import.rb +74 -0
- data/lib/data_miner/step/process.rb +34 -0
- data/lib/data_miner/step/tap.rb +134 -0
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +26 -24
- data/test/support/breeds.xls +0 -0
- data/test/support/pet_color_dictionary.en.csv +5 -0
- data/test/support/pet_color_dictionary.es.csv +5 -0
- data/test/support/pets.csv +5 -0
- data/test/support/pets_funny.csv +4 -0
- data/test/test_data_miner.rb +103 -0
- data/test/test_earth_import.rb +25 -0
- data/test/test_earth_tap.rb +25 -0
- data/test/test_safety.rb +43 -0
- metadata +72 -78
- data/.document +0 -5
- data/lib/data_miner/config.rb +0 -124
- data/lib/data_miner/import.rb +0 -93
- data/lib/data_miner/process.rb +0 -38
- data/lib/data_miner/tap.rb +0 -143
- data/test/support/aircraft.rb +0 -102
- data/test/support/airport.rb +0 -16
- data/test/support/automobile_fuel_type.rb +0 -40
- data/test/support/automobile_variant.rb +0 -362
- data/test/support/country.rb +0 -15
- data/test/support/test_database.rb +0 -311
- data/test/test_data_miner_attribute.rb +0 -111
- data/test/test_data_miner_process.rb +0 -18
- data/test/test_old_syntax.rb +0 -825
- data/test/test_tap.rb +0 -21
data/CHANGELOG
CHANGED
@@ -1,3 +1,45 @@
|
|
1
|
+
2.0.1 / 2012-04-18
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
|
5
|
+
* DataMiner.run -> DataMiner.perform
|
6
|
+
* Some basic tests that don't rely on Earth
|
7
|
+
|
8
|
+
* Bug fixes
|
9
|
+
|
10
|
+
* Fix the "call stack" - the thing that keeps infinite loops from occurring
|
11
|
+
* Make sure sources get refreshed every time you re-run data miner
|
12
|
+
* Make sure dictionaries " " " " " " "
|
13
|
+
|
14
|
+
2.0.0 / 2012-04-17
|
15
|
+
|
16
|
+
* Breaking changes
|
17
|
+
|
18
|
+
* Renamed data_miner_config to data_miner_script (etc. for class/method naming)
|
19
|
+
* Simplify DataMiner.run arguments
|
20
|
+
was: DataMiner.run(:resource_names => ['Country'])
|
21
|
+
now: DataMiner.run(['Country'])
|
22
|
+
* Rename "resources" to "models"
|
23
|
+
was: DataMiner.resource_names
|
24
|
+
now: DataMiner.model_names
|
25
|
+
* Expect procs instead of lambdas (because they are just instance-eval'ed now, Blockenspiel is no longer guessing where to find methods)
|
26
|
+
was: :synthesize => lambda { class_method }
|
27
|
+
now: :synthesize => proc { Klass.class_method }
|
28
|
+
* Use UnicodeUtils to correctly upcase and downcase
|
29
|
+
* Use throw/catch instead of exceptions to signal to force a step to stop successfully
|
30
|
+
was: DataMiner::Succeed
|
31
|
+
now: throw :data_miner_succeed
|
32
|
+
* Import steps no longer accept deprecated :table => RemoteTable or :errata => Errata options
|
33
|
+
* DataMiner::Run structure has changed (in addition to other internals)
|
34
|
+
|
35
|
+
* Enhancements
|
36
|
+
|
37
|
+
* Easy to modify data miner scripts using DataMiner::Script#{append|prepend|append_once|prepend_once}
|
38
|
+
* DRYer codebase
|
39
|
+
* No longer depends on Blockenspiel
|
40
|
+
* Uses UnixUtils instead of its own spawning code
|
41
|
+
* Should be threadsafe (no more autoload, has mutexes, more careful/fewer singletons, etc.)
|
42
|
+
|
1
43
|
1.1.0
|
2
44
|
* fixed dependency issues
|
3
45
|
1.0.0
|
data/Gemfile
CHANGED
@@ -1,4 +1,20 @@
|
|
1
|
-
source
|
1
|
+
source :rubygems
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
# development dependencies
|
6
|
+
gem 'fuzzy_match'
|
7
|
+
gem 'minitest'
|
8
|
+
gem 'minitest-reporters'
|
9
|
+
gem 'mysql2'
|
10
|
+
gem 'rake'
|
11
|
+
gem 'yard'
|
12
|
+
gem 'earth'
|
13
|
+
if RUBY_VERSION >= '1.9'
|
14
|
+
gem 'unicode_utils'
|
15
|
+
end
|
16
|
+
# if RUBY_VERSION >= '1.9'
|
17
|
+
# gem 'ruby-debug19' # replace with debugger?
|
18
|
+
# else
|
19
|
+
# gem 'ruby-debug'
|
20
|
+
# end
|
data/README.rdoc
CHANGED
@@ -9,7 +9,7 @@ Programmatically import useful data into your ActiveRecord models.
|
|
9
9
|
You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
|
10
10
|
|
11
11
|
class Country < ActiveRecord::Base
|
12
|
-
|
12
|
+
self.primary_key = :iso_3166_code
|
13
13
|
|
14
14
|
data_miner do
|
15
15
|
import 'the official ISO country list',
|
@@ -62,7 +62,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
|
|
62
62
|
# Tell ActiveRecord that we want to use a string primary key.
|
63
63
|
# This makes it easier to repeatedly truncate and re-import this
|
64
64
|
# table without breaking associations.
|
65
|
-
|
65
|
+
self.primary_key = :icao_code
|
66
66
|
|
67
67
|
# Use the mini_record-compat gem to define the database schema in-line.
|
68
68
|
# It will destructively and automatically add/remove columns.
|
@@ -268,7 +268,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
|
|
268
268
|
update_all "weighting = (#{segments.project(segments[:passengers].sum).where(aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])).to_sql})"
|
269
269
|
end
|
270
270
|
|
271
|
-
# And finally re-run the import of resources that depend on this
|
271
|
+
# And finally re-run the import of resources that depend on this model.
|
272
272
|
# Don't worry about calling Aircraft.run_data_miner! at the top of AircraftManufacturer's data_miner block;
|
273
273
|
# that's the right way to do dependencies. It won't get called twice in the same run.
|
274
274
|
[ AircraftManufacturer ].each do |synthetic_resource|
|
data/Rakefile
CHANGED
@@ -1,27 +1,25 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
3
|
|
4
4
|
require 'rake'
|
5
5
|
require 'rake/testtask'
|
6
6
|
Rake::TestTask.new(:test) do |test|
|
7
|
-
test.libs << '
|
7
|
+
test.libs << 'test'
|
8
8
|
test.pattern = 'test/**/test_*.rb'
|
9
9
|
test.verbose = true
|
10
10
|
end
|
11
11
|
|
12
|
-
task :
|
13
|
-
|
14
|
-
|
15
|
-
require 'rake/rdoctask'
|
16
|
-
Rake::RDocTask.new do |rdoc|
|
17
|
-
rdoc.rdoc_dir = 'rdoc'
|
18
|
-
rdoc.title = 'data_miner'
|
19
|
-
rdoc.options << '--line-numbers' << '--inline-source'
|
20
|
-
rdoc.rdoc_files.include('README*')
|
21
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
12
|
+
task :test_separately do
|
13
|
+
Dir[File.expand_path('../test/**/test_*.rb', __FILE__)].each do |path|
|
14
|
+
system "rake test TEST=#{path}"
|
22
15
|
end
|
23
|
-
|
24
|
-
|
16
|
+
end
|
17
|
+
|
18
|
+
task :default => :test_separately
|
19
|
+
|
20
|
+
require 'yard'
|
21
|
+
YARD::Rake::YardocTask.new do |y|
|
22
|
+
y.options << '--no-private'
|
25
23
|
end
|
26
24
|
|
27
25
|
gemspec = eval(File.read(Dir["*.gemspec"].first))
|
data/data_miner.gemspec
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
require "data_miner/version"
|
2
|
+
require File.expand_path("../lib/data_miner/version", __FILE__)
|
4
3
|
|
5
4
|
Gem::Specification.new do |s|
|
6
5
|
s.name = "data_miner"
|
7
6
|
s.version = DataMiner::VERSION
|
8
|
-
s.platform = Gem::Platform::RUBY
|
9
7
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner"]
|
10
8
|
s.email = ["seamus@abshere.net"]
|
11
9
|
s.homepage = "https://github.com/seamusabshere/data_miner"
|
@@ -23,17 +21,8 @@ Gem::Specification.new do |s|
|
|
23
21
|
s.add_runtime_dependency 'activerecord', '>=2.3.4'
|
24
22
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
25
23
|
s.add_runtime_dependency 'conversions', '>=1.4.4'
|
26
|
-
s.add_runtime_dependency 'blockenspiel', '>=0.3.2'
|
27
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
28
|
-
s.
|
29
|
-
s.
|
30
|
-
s.
|
31
|
-
s.add_development_dependency 'shoulda'
|
32
|
-
s.add_development_dependency 'mysql'
|
33
|
-
s.add_development_dependency 'rake'
|
34
|
-
# if RUBY_VERSION >= '1.9'
|
35
|
-
# s.add_development_dependency 'ruby-debug19'
|
36
|
-
# else
|
37
|
-
# s.add_development_dependency 'ruby-debug'
|
38
|
-
# end
|
25
|
+
s.add_runtime_dependency 'active_record_inline_schema'
|
26
|
+
s.add_runtime_dependency 'aasm'
|
27
|
+
s.add_runtime_dependency 'lock_method', '>=0.5.1'
|
39
28
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -1,91 +1,90 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'set'
|
1
3
|
require 'active_support'
|
2
4
|
require 'active_support/version'
|
3
|
-
|
4
|
-
active_support/core_ext
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
if ::ActiveSupport::VERSION::MAJOR >= 3
|
6
|
+
require 'active_support/core_ext'
|
7
|
+
end
|
8
|
+
require 'active_record'
|
9
|
+
if RUBY_VERSION >= '1.9'
|
10
|
+
begin
|
11
|
+
require 'unicode_utils/downcase'
|
12
|
+
rescue LoadError
|
13
|
+
Kernel.warn '[data_miner] You may wish to include unicode_utils in your Gemfile to improve accuracy of downcasing'
|
14
|
+
end
|
15
|
+
end
|
10
16
|
|
11
|
-
require '
|
17
|
+
require 'data_miner/active_record_extensions'
|
18
|
+
require 'data_miner/attribute'
|
19
|
+
require 'data_miner/script'
|
20
|
+
require 'data_miner/dictionary'
|
21
|
+
require 'data_miner/step'
|
22
|
+
require 'data_miner/step/import'
|
23
|
+
require 'data_miner/step/tap'
|
24
|
+
require 'data_miner/step/process'
|
25
|
+
require 'data_miner/run'
|
12
26
|
|
13
27
|
class DataMiner
|
14
|
-
include ::Singleton
|
15
|
-
|
16
|
-
class MissingHashColumn < StandardError; end
|
17
|
-
class Finish < StandardError; end
|
18
|
-
class Skip < StandardError; end
|
19
|
-
|
20
|
-
autoload :ActiveRecordExtensions, 'data_miner/active_record_extensions'
|
21
|
-
autoload :Attribute, 'data_miner/attribute'
|
22
|
-
autoload :Config, 'data_miner/config'
|
23
|
-
autoload :Dictionary, 'data_miner/dictionary'
|
24
|
-
autoload :Import, 'data_miner/import'
|
25
|
-
autoload :Tap, 'data_miner/tap'
|
26
|
-
autoload :Process, 'data_miner/process'
|
27
|
-
autoload :Run, 'data_miner/run'
|
28
|
-
|
29
28
|
class << self
|
29
|
+
delegate :perform, :to => :instance
|
30
|
+
delegate :run, :to => :instance
|
30
31
|
delegate :logger, :to => :instance
|
31
32
|
delegate :logger=, :to => :instance
|
32
|
-
delegate :
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
result[new_key] = new_value
|
48
|
-
result
|
33
|
+
delegate :model_names, :to => :instance
|
34
|
+
|
35
|
+
# @private
|
36
|
+
def downcase(str)
|
37
|
+
defined?(::UnicodeUtils) ? ::UnicodeUtils.downcase(str) : str.downcase
|
38
|
+
end
|
39
|
+
|
40
|
+
# @private
|
41
|
+
def upcase(str)
|
42
|
+
defined?(::UnicodeUtils) ? ::UnicodeUtils.upcase(str) : str.upcase
|
43
|
+
end
|
44
|
+
|
45
|
+
# @private
|
46
|
+
def compress_whitespace(str)
|
47
|
+
str.gsub(INNER_SPACE, ' ').strip
|
49
48
|
end
|
50
49
|
end
|
51
|
-
|
50
|
+
|
51
|
+
MUTEX = ::Mutex.new
|
52
|
+
INNER_SPACE = /[ ]+/
|
53
|
+
|
54
|
+
include ::Singleton
|
55
|
+
|
52
56
|
attr_writer :logger
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
else
|
60
|
-
require 'logger'
|
61
|
-
@logger = ::Logger.new $stderr
|
57
|
+
|
58
|
+
def perform(model_names = DataMiner.model_names)
|
59
|
+
Script.uniq do
|
60
|
+
model_names.each do |model_name|
|
61
|
+
model_name.constantize.run_data_miner!
|
62
|
+
end
|
62
63
|
end
|
63
64
|
end
|
64
65
|
|
65
|
-
|
66
|
-
|
67
|
-
end
|
66
|
+
# legacy
|
67
|
+
alias :run :perform
|
68
68
|
|
69
|
-
def
|
70
|
-
@
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
options = options.dup
|
79
|
-
options.stringify_keys!
|
80
|
-
options['preserve_call_stack_between_runs'] = true
|
81
|
-
resource_names.each do |resource_name|
|
82
|
-
if options['resource_names'].blank? or options['resource_names'].include?(resource_name)
|
83
|
-
resource_name.constantize.data_miner_config.run options
|
69
|
+
def logger
|
70
|
+
@logger || MUTEX.synchronize do
|
71
|
+
@logger ||= if defined?(::Rails)
|
72
|
+
::Rails.logger
|
73
|
+
elsif defined?(::ActiveRecord) and active_record_logger = ::ActiveRecord::Base.logger
|
74
|
+
active_record_logger
|
75
|
+
else
|
76
|
+
require 'logger'
|
77
|
+
::Logger.new $stderr
|
84
78
|
end
|
85
79
|
end
|
86
|
-
call_stack.clear
|
87
80
|
end
|
81
|
+
|
82
|
+
def model_names
|
83
|
+
@model_names || MUTEX.synchronize do
|
84
|
+
@model_names ||= ::Set.new
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
88
|
end
|
89
89
|
|
90
|
-
require 'active_record'
|
91
90
|
::ActiveRecord::Base.extend ::DataMiner::ActiveRecordExtensions
|
@@ -1,43 +1,38 @@
|
|
1
1
|
require 'active_record'
|
2
|
-
require '
|
2
|
+
require 'lock_method'
|
3
3
|
|
4
4
|
class DataMiner
|
5
5
|
module ActiveRecordExtensions
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
MUTEX = ::Mutex.new
|
7
|
+
|
8
|
+
def data_miner_script
|
9
|
+
@data_miner_script || MUTEX.synchronize do
|
10
|
+
@data_miner_script ||= DataMiner::Script.new(self)
|
11
|
+
end
|
12
12
|
end
|
13
13
|
|
14
14
|
def data_miner_runs
|
15
|
-
|
15
|
+
DataMiner::Run.scoped :conditions => { :model_name => name }
|
16
16
|
end
|
17
17
|
|
18
|
-
def run_data_miner!
|
19
|
-
|
18
|
+
def run_data_miner!
|
19
|
+
data_miner_script.perform
|
20
20
|
end
|
21
21
|
|
22
22
|
def run_data_miner_on_parent_associations!
|
23
|
-
reflect_on_all_associations(:belongs_to).
|
24
|
-
|
25
|
-
|
23
|
+
reflect_on_all_associations(:belongs_to).reject do |assoc|
|
24
|
+
assoc.options[:polymorphic]
|
25
|
+
end.each do |non_polymorphic_belongs_to_assoc|
|
26
|
+
non_polymorphic_belongs_to_assoc.klass.run_data_miner!
|
26
27
|
end
|
27
28
|
end
|
28
29
|
|
29
30
|
def data_miner(options = {}, &blk)
|
30
|
-
|
31
|
-
|
31
|
+
DataMiner.model_names.add name
|
32
32
|
unless options[:append]
|
33
|
-
|
33
|
+
@data_miner_script = nil
|
34
34
|
end
|
35
|
-
|
36
|
-
::Blockenspiel.invoke blk, data_miner_config
|
37
|
-
|
38
|
-
data_miner_config.after_invoke
|
35
|
+
data_miner_script.append_block blk
|
39
36
|
end
|
40
37
|
end
|
41
38
|
end
|
42
|
-
|
43
|
-
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -2,12 +2,20 @@ require 'conversions'
|
|
2
2
|
|
3
3
|
class DataMiner
|
4
4
|
class Attribute
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
class << self
|
6
|
+
def check_options(options)
|
7
|
+
errors = []
|
8
|
+
if options[:dictionary].is_a?(Dictionary)
|
9
|
+
errors << %{:dictionary must be a Hash of options}
|
10
|
+
end
|
11
|
+
if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
|
12
|
+
errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
|
13
|
+
end
|
14
|
+
if (units_options = options.select { |k, _| k.to_s.include?('units') }).any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
|
15
|
+
errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
|
16
|
+
end
|
17
|
+
errors
|
18
|
+
end
|
11
19
|
end
|
12
20
|
|
13
21
|
VALID_OPTIONS = %w{
|
@@ -29,28 +37,114 @@ class DataMiner
|
|
29
37
|
field_number
|
30
38
|
chars
|
31
39
|
synthesize
|
32
|
-
}
|
40
|
+
}.map(&:to_sym)
|
33
41
|
|
34
|
-
|
35
|
-
|
42
|
+
VALID_UNIT_DEFINITION_SETS = [
|
43
|
+
[:units],
|
44
|
+
[:from_units, :to_units],
|
45
|
+
[:units_field_name],
|
46
|
+
[:units_field_name, :to_units],
|
47
|
+
[:units_field_number],
|
48
|
+
[:units_field_number, :to_units],
|
49
|
+
]
|
50
|
+
|
51
|
+
DEFAULT_SPLIT = /\s+/
|
52
|
+
DEFAULT_KEEP = 0
|
53
|
+
DEFAULT_DELIMITER = ', '
|
54
|
+
DEFAULT_NULLIFY = false
|
55
|
+
DEFAULT_UPCASE = false
|
56
|
+
DEFAULT_OVERWRITE = true
|
57
|
+
|
58
|
+
attr_reader :step
|
59
|
+
attr_reader :name
|
60
|
+
attr_reader :synthesize
|
61
|
+
attr_reader :matcher
|
62
|
+
attr_reader :field_number
|
63
|
+
attr_reader :field_name
|
64
|
+
# For use when joining a range of field numbers
|
65
|
+
attr_reader :delimiter
|
66
|
+
attr_reader :chars
|
67
|
+
attr_reader :split
|
68
|
+
attr_reader :to_units
|
69
|
+
attr_reader :from_units
|
70
|
+
attr_reader :units_field_number
|
71
|
+
attr_reader :units_field_name
|
72
|
+
attr_reader :sprintf
|
73
|
+
attr_reader :static
|
36
74
|
|
75
|
+
def initialize(step, name, options = {})
|
76
|
+
options = options.symbolize_keys
|
77
|
+
if (errors = Attribute.check_options(options)).any?
|
78
|
+
raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
|
79
|
+
end
|
37
80
|
@step = step
|
38
81
|
@name = name
|
39
|
-
|
40
|
-
|
41
|
-
|
82
|
+
@synthesize = options[:synthesize]
|
83
|
+
if @dictionary_boolean = options.has_key?(:dictionary)
|
84
|
+
@dictionary_options = options[:dictionary]
|
85
|
+
end
|
86
|
+
@matcher = options[:matcher].is_a?(::String) ? options[:matcher].constantize.new : options[:matcher]
|
87
|
+
if @static_boolean = options.has_key?(:static)
|
88
|
+
@static = options[:static]
|
89
|
+
end
|
90
|
+
@field_number = options[:field_number]
|
91
|
+
@field_name = options.fetch(:field_name, name).to_sym
|
92
|
+
@delimiter = options.fetch :delimiter, DEFAULT_DELIMITER
|
93
|
+
@chars = options[:chars]
|
94
|
+
if split = options[:split]
|
95
|
+
@split = split.symbolize_keys
|
96
|
+
end
|
97
|
+
@nullify_boolean = options.fetch :nullify, DEFAULT_NULLIFY
|
98
|
+
@upcase_boolean = options.fetch :upcase, DEFAULT_UPCASE
|
99
|
+
@from_units = options[:from_units]
|
100
|
+
@to_units = options[:to_units] || options[:units]
|
101
|
+
@sprintf = options[:sprintf]
|
102
|
+
@overwrite_boolean = options.fetch :overwrite, DEFAULT_OVERWRITE
|
103
|
+
@units_field_name = options[:units_field_name]
|
104
|
+
@units_field_number = options[:units_field_number]
|
105
|
+
@dictionary_mutex = ::Mutex.new
|
42
106
|
end
|
43
|
-
|
44
|
-
def
|
45
|
-
|
107
|
+
|
108
|
+
def model
|
109
|
+
step.model
|
110
|
+
end
|
111
|
+
|
112
|
+
def static?
|
113
|
+
@static_boolean
|
114
|
+
end
|
115
|
+
|
116
|
+
def nullify?
|
117
|
+
@nullify_boolean
|
46
118
|
end
|
47
119
|
|
48
|
-
def
|
49
|
-
|
120
|
+
def upcase?
|
121
|
+
@upcase_boolean
|
50
122
|
end
|
51
|
-
|
52
|
-
def
|
53
|
-
|
123
|
+
|
124
|
+
def dictionary?
|
125
|
+
@dictionary_boolean
|
126
|
+
end
|
127
|
+
|
128
|
+
def convert?
|
129
|
+
from_units.present? or units_field_name.present? or units_field_number.present?
|
130
|
+
end
|
131
|
+
|
132
|
+
def units?
|
133
|
+
to_units.present? or units_field_name.present? or units_field_number.present?
|
134
|
+
end
|
135
|
+
|
136
|
+
def overwrite?
|
137
|
+
@overwrite_boolean
|
138
|
+
end
|
139
|
+
|
140
|
+
def read(row)
|
141
|
+
if matcher and matched_row = matcher.match(row)
|
142
|
+
return matched_row
|
143
|
+
end
|
144
|
+
if synthesize
|
145
|
+
return synthesize.call(row)
|
146
|
+
end
|
147
|
+
value = if static?
|
54
148
|
static
|
55
149
|
elsif field_number
|
56
150
|
if field_number.is_a?(::Range)
|
@@ -58,180 +152,83 @@ class DataMiner
|
|
58
152
|
else
|
59
153
|
row[field_number]
|
60
154
|
end
|
61
|
-
elsif field_name ==
|
155
|
+
elsif field_name == :row_hash
|
62
156
|
row.row_hash
|
63
157
|
elsif row.is_a?(::Hash) or row.is_a?(::ActiveSupport::OrderedHash)
|
64
|
-
row[field_name]
|
158
|
+
row[field_name.to_s] # remote_table hash keys are always strings
|
159
|
+
end
|
160
|
+
if value.nil?
|
161
|
+
return
|
162
|
+
end
|
163
|
+
if value.is_a? ::ActiveRecord::Base
|
164
|
+
return value
|
65
165
|
end
|
66
|
-
return nil if value.nil?
|
67
|
-
return value if value.is_a?(::ActiveRecord::Base) # escape valve for parsers that look up associations directly
|
68
166
|
value = value.to_s
|
69
|
-
|
70
|
-
|
71
|
-
value.gsub! /[ ]+/, ' '
|
72
|
-
value.strip!
|
73
|
-
return nil if value.blank? and wants_nullification?
|
74
|
-
value.upcase! if wants_upcase?
|
75
|
-
value = do_convert row, value if wants_conversion?
|
76
|
-
value = do_sprintf value if wants_sprintf?
|
77
|
-
value
|
78
|
-
end
|
79
|
-
|
80
|
-
def match_row(row)
|
81
|
-
matcher.match row
|
82
|
-
end
|
83
|
-
|
84
|
-
def value_from_row(row)
|
85
|
-
return match_row row if wants_matcher?
|
86
|
-
value = value_in_source row
|
87
|
-
return value if value.is_a? ::ActiveRecord::Base # carry through trapdoor
|
88
|
-
value = value_in_dictionary value if wants_dictionary?
|
89
|
-
value = synthesize.call(row) if wants_synthesize?
|
90
|
-
value
|
91
|
-
end
|
92
|
-
|
93
|
-
def set_record_from_row(record, row)
|
94
|
-
return false if !wants_overwriting? and !record.send(name).nil?
|
95
|
-
record.send "#{name}=", value_from_row(row)
|
96
|
-
if wants_units?
|
97
|
-
unit = (to_units || unit_from_source(row)).to_s
|
98
|
-
unit = nil if unit.blank? and wants_nullification?
|
99
|
-
record.send "#{name}_units=", unit
|
167
|
+
if chars
|
168
|
+
value = value[chars]
|
100
169
|
end
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
end
|
106
|
-
|
107
|
-
def do_convert(row, value)
|
108
|
-
unless wants_units?
|
109
|
-
raise ::RuntimeError, "[data_miner] If you use 'from_units', you need to set 'to_units' (#{inspect})"
|
170
|
+
if split
|
171
|
+
pattern = split.fetch :pattern, DEFAULT_SPLIT
|
172
|
+
keep = split.fetch :keep, DEFAULT_KEEP
|
173
|
+
value = value.to_s.split(pattern)[keep].to_s
|
110
174
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
175
|
+
value = DataMiner.compress_whitespace value
|
176
|
+
if nullify? and value.blank?
|
177
|
+
return
|
115
178
|
end
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
def do_sprintf(value)
|
120
|
-
if /\%[0-9\.]*f/.match sprintf
|
121
|
-
value = value.to_f
|
122
|
-
elsif /\%[0-9\.]*d/.match sprintf
|
123
|
-
value = value.to_i
|
179
|
+
if upcase?
|
180
|
+
value = DataMiner.upcase value
|
124
181
|
end
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
def wants_upcase?
|
146
|
-
upcase.present?
|
147
|
-
end
|
148
|
-
def wants_static?
|
149
|
-
options.has_key? 'static'
|
150
|
-
end
|
151
|
-
def wants_nullification?
|
152
|
-
nullify == true
|
153
|
-
end
|
154
|
-
def wants_chars?
|
155
|
-
chars.present?
|
156
|
-
end
|
157
|
-
def wants_synthesize?
|
158
|
-
synthesize.is_a?(::Proc)
|
159
|
-
end
|
160
|
-
def wants_overwriting?
|
161
|
-
overwrite != false
|
162
|
-
end
|
163
|
-
def wants_conversion?
|
164
|
-
from_units.present? or units_field_name.present? or units_field_number.present?
|
165
|
-
end
|
166
|
-
def wants_units?
|
167
|
-
to_units.present? or units_field_name.present? or units_field_number.present?
|
168
|
-
end
|
169
|
-
def wants_dictionary?
|
170
|
-
options['dictionary'].present?
|
171
|
-
end
|
172
|
-
def wants_matcher?
|
173
|
-
options['matcher'].present?
|
182
|
+
if convert?
|
183
|
+
final_from_units = from_units || read_units(row)
|
184
|
+
final_to_units = to_units || read_units(row)
|
185
|
+
if final_from_units.blank? or final_to_units.blank?
|
186
|
+
raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
187
|
+
end
|
188
|
+
value = value.to_f.convert final_from_units, final_to_units
|
189
|
+
end
|
190
|
+
if sprintf
|
191
|
+
if sprintf.end_with?('f')
|
192
|
+
value = value.to_f
|
193
|
+
elsif sprintf.end_with?('d')
|
194
|
+
value = value.to_i
|
195
|
+
end
|
196
|
+
value = sprintf % value
|
197
|
+
end
|
198
|
+
if dictionary?
|
199
|
+
value = dictionary.lookup(value)
|
200
|
+
end
|
201
|
+
value
|
174
202
|
end
|
175
203
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
# Options that can't be referred to by their names
|
185
|
-
def split_options
|
186
|
-
options['split']
|
187
|
-
end
|
188
|
-
|
189
|
-
def from_units
|
190
|
-
options['from_units']
|
191
|
-
end
|
192
|
-
def to_units
|
193
|
-
options['to_units'] || options['units']
|
194
|
-
end
|
195
|
-
def sprintf
|
196
|
-
options['sprintf']
|
197
|
-
end
|
198
|
-
def nullify
|
199
|
-
options['nullify']
|
200
|
-
end
|
201
|
-
def overwrite
|
202
|
-
options['overwrite']
|
203
|
-
end
|
204
|
-
def upcase
|
205
|
-
options['upcase']
|
206
|
-
end
|
207
|
-
def units_field_name
|
208
|
-
options['units_field_name']
|
209
|
-
end
|
210
|
-
def units_field_number
|
211
|
-
options['units_field_number']
|
212
|
-
end
|
213
|
-
def field_number
|
214
|
-
options['field_number']
|
215
|
-
end
|
216
|
-
def chars
|
217
|
-
options['chars']
|
218
|
-
end
|
219
|
-
def synthesize
|
220
|
-
options['synthesize']
|
221
|
-
end
|
222
|
-
def static
|
223
|
-
options['static']
|
204
|
+
def set_from_row(target, row)
|
205
|
+
if overwrite? or target.send(name).nil?
|
206
|
+
target.send "#{name}=", read(row)
|
207
|
+
end
|
208
|
+
if units? and ((final_to_units = (to_units || read_units(row))) or nullify?)
|
209
|
+
target.send "#{name}_units=", final_to_units
|
210
|
+
end
|
224
211
|
end
|
225
|
-
|
212
|
+
|
226
213
|
def dictionary
|
227
|
-
@dictionary
|
214
|
+
@dictionary || @dictionary_mutex.synchronize do
|
215
|
+
@dictionary ||= Dictionary.new(@dictionary_options)
|
216
|
+
end
|
228
217
|
end
|
229
|
-
|
230
|
-
|
218
|
+
|
219
|
+
def refresh
|
220
|
+
@dictionary = nil
|
231
221
|
end
|
232
|
-
|
222
|
+
|
223
|
+
private
|
224
|
+
|
225
|
+
def read_units(row)
|
226
|
+
if units = row[units_field_name || units_field_number]
|
227
|
+
DataMiner.compress_whitespace(units).underscore.to_sym
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
233
231
|
def free
|
234
|
-
@dictionary.free if @dictionary.is_a?(Dictionary)
|
235
232
|
@dictionary = nil
|
236
233
|
end
|
237
234
|
end
|