data_miner 1.3.8 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +42 -0
- data/Gemfile +19 -3
- data/README.rdoc +3 -3
- data/Rakefile +13 -15
- data/data_miner.gemspec +4 -15
- data/lib/data_miner.rb +69 -70
- data/lib/data_miner/active_record_extensions.rb +17 -22
- data/lib/data_miner/attribute.rb +176 -179
- data/lib/data_miner/dictionary.rb +38 -31
- data/lib/data_miner/run.rb +49 -18
- data/lib/data_miner/script.rb +116 -0
- data/lib/data_miner/step.rb +5 -0
- data/lib/data_miner/step/import.rb +74 -0
- data/lib/data_miner/step/process.rb +34 -0
- data/lib/data_miner/step/tap.rb +134 -0
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +26 -24
- data/test/support/breeds.xls +0 -0
- data/test/support/pet_color_dictionary.en.csv +5 -0
- data/test/support/pet_color_dictionary.es.csv +5 -0
- data/test/support/pets.csv +5 -0
- data/test/support/pets_funny.csv +4 -0
- data/test/test_data_miner.rb +103 -0
- data/test/test_earth_import.rb +25 -0
- data/test/test_earth_tap.rb +25 -0
- data/test/test_safety.rb +43 -0
- metadata +72 -78
- data/.document +0 -5
- data/lib/data_miner/config.rb +0 -124
- data/lib/data_miner/import.rb +0 -93
- data/lib/data_miner/process.rb +0 -38
- data/lib/data_miner/tap.rb +0 -143
- data/test/support/aircraft.rb +0 -102
- data/test/support/airport.rb +0 -16
- data/test/support/automobile_fuel_type.rb +0 -40
- data/test/support/automobile_variant.rb +0 -362
- data/test/support/country.rb +0 -15
- data/test/support/test_database.rb +0 -311
- data/test/test_data_miner_attribute.rb +0 -111
- data/test/test_data_miner_process.rb +0 -18
- data/test/test_old_syntax.rb +0 -825
- data/test/test_tap.rb +0 -21
data/CHANGELOG
CHANGED
@@ -1,3 +1,45 @@
|
|
1
|
+
2.0.1 / 2012-04-18
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
|
5
|
+
* DataMiner.run -> DataMiner.perform
|
6
|
+
* Some basic tests that don't rely on Earth
|
7
|
+
|
8
|
+
* Bug fixes
|
9
|
+
|
10
|
+
* Fix the "call stack" - the thing that keeps infinite loops from occurring
|
11
|
+
* Make sure sources get refreshed every time you re-run data miner
|
12
|
+
* Make sure dictionaries " " " " " " "
|
13
|
+
|
14
|
+
2.0.0 / 2012-04-17
|
15
|
+
|
16
|
+
* Breaking changes
|
17
|
+
|
18
|
+
* Renamed data_miner_config to data_miner_script (etc. for class/method naming)
|
19
|
+
* Simplify DataMiner.run arguments
|
20
|
+
was: DataMiner.run(:resource_names => ['Country'])
|
21
|
+
now: DataMiner.run(['Country'])
|
22
|
+
* Rename "resources" to "models"
|
23
|
+
was: DataMiner.resource_names
|
24
|
+
now: DataMiner.model_names
|
25
|
+
* Expect procs instead of lambdas (because they are just instance-eval'ed now, Blockenspiel is no longer guessing where to find methods)
|
26
|
+
was: :synthesize => lambda { class_method }
|
27
|
+
now: :synthesize => proc { Klass.class_method }
|
28
|
+
* Use UnicodeUtils to correctly upcase and downcase
|
29
|
+
* Use throw/catch instead of exceptions to signal to force a step to stop successfully
|
30
|
+
was: DataMiner::Succeed
|
31
|
+
now: throw :data_miner_succeed
|
32
|
+
* Import steps no longer accept deprecated :table => RemoteTable or :errata => Errata options
|
33
|
+
* DataMiner::Run structure has changed (in addition to other internals)
|
34
|
+
|
35
|
+
* Enhancements
|
36
|
+
|
37
|
+
* Easy to modify data miner scripts using DataMiner::Script#{append|prepend|append_once|prepend_once}
|
38
|
+
* DRYer codebase
|
39
|
+
* No longer depends on Blockenspiel
|
40
|
+
* Uses UnixUtils instead of its own spawning code
|
41
|
+
* Should be threadsafe (no more autoload, has mutexes, more careful/fewer singletons, etc.)
|
42
|
+
|
1
43
|
1.1.0
|
2
44
|
* fixed dependency issues
|
3
45
|
1.0.0
|
data/Gemfile
CHANGED
@@ -1,4 +1,20 @@
|
|
1
|
-
source
|
1
|
+
source :rubygems
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
gemspec
|
4
|
+
|
5
|
+
# development dependencies
|
6
|
+
gem 'fuzzy_match'
|
7
|
+
gem 'minitest'
|
8
|
+
gem 'minitest-reporters'
|
9
|
+
gem 'mysql2'
|
10
|
+
gem 'rake'
|
11
|
+
gem 'yard'
|
12
|
+
gem 'earth'
|
13
|
+
if RUBY_VERSION >= '1.9'
|
14
|
+
gem 'unicode_utils'
|
15
|
+
end
|
16
|
+
# if RUBY_VERSION >= '1.9'
|
17
|
+
# gem 'ruby-debug19' # replace with debugger?
|
18
|
+
# else
|
19
|
+
# gem 'ruby-debug'
|
20
|
+
# end
|
data/README.rdoc
CHANGED
@@ -9,7 +9,7 @@ Programmatically import useful data into your ActiveRecord models.
|
|
9
9
|
You define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
|
10
10
|
|
11
11
|
class Country < ActiveRecord::Base
|
12
|
-
|
12
|
+
self.primary_key = :iso_3166_code
|
13
13
|
|
14
14
|
data_miner do
|
15
15
|
import 'the official ISO country list',
|
@@ -62,7 +62,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
|
|
62
62
|
# Tell ActiveRecord that we want to use a string primary key.
|
63
63
|
# This makes it easier to repeatedly truncate and re-import this
|
64
64
|
# table without breaking associations.
|
65
|
-
|
65
|
+
self.primary_key = :icao_code
|
66
66
|
|
67
67
|
# Use the mini_record-compat gem to define the database schema in-line.
|
68
68
|
# It will destructively and automatically add/remove columns.
|
@@ -268,7 +268,7 @@ This is how we linked together (http://data.brighterplanet.com/aircraft) the FAA
|
|
268
268
|
update_all "weighting = (#{segments.project(segments[:passengers].sum).where(aircraft[:bts_aircraft_type_code].eq(segments[:bts_aircraft_type_code])).to_sql})"
|
269
269
|
end
|
270
270
|
|
271
|
-
# And finally re-run the import of resources that depend on this
|
271
|
+
# And finally re-run the import of resources that depend on this model.
|
272
272
|
# Don't worry about calling Aircraft.run_data_miner! at the top of AircraftManufacturer's data_miner block;
|
273
273
|
# that's the right way to do dependencies. It won't get called twice in the same run.
|
274
274
|
[ AircraftManufacturer ].each do |synthetic_resource|
|
data/Rakefile
CHANGED
@@ -1,27 +1,25 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
3
|
|
4
4
|
require 'rake'
|
5
5
|
require 'rake/testtask'
|
6
6
|
Rake::TestTask.new(:test) do |test|
|
7
|
-
test.libs << '
|
7
|
+
test.libs << 'test'
|
8
8
|
test.pattern = 'test/**/test_*.rb'
|
9
9
|
test.verbose = true
|
10
10
|
end
|
11
11
|
|
12
|
-
task :
|
13
|
-
|
14
|
-
|
15
|
-
require 'rake/rdoctask'
|
16
|
-
Rake::RDocTask.new do |rdoc|
|
17
|
-
rdoc.rdoc_dir = 'rdoc'
|
18
|
-
rdoc.title = 'data_miner'
|
19
|
-
rdoc.options << '--line-numbers' << '--inline-source'
|
20
|
-
rdoc.rdoc_files.include('README*')
|
21
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
12
|
+
task :test_separately do
|
13
|
+
Dir[File.expand_path('../test/**/test_*.rb', __FILE__)].each do |path|
|
14
|
+
system "rake test TEST=#{path}"
|
22
15
|
end
|
23
|
-
|
24
|
-
|
16
|
+
end
|
17
|
+
|
18
|
+
task :default => :test_separately
|
19
|
+
|
20
|
+
require 'yard'
|
21
|
+
YARD::Rake::YardocTask.new do |y|
|
22
|
+
y.options << '--no-private'
|
25
23
|
end
|
26
24
|
|
27
25
|
gemspec = eval(File.read(Dir["*.gemspec"].first))
|
data/data_miner.gemspec
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
require "data_miner/version"
|
2
|
+
require File.expand_path("../lib/data_miner/version", __FILE__)
|
4
3
|
|
5
4
|
Gem::Specification.new do |s|
|
6
5
|
s.name = "data_miner"
|
7
6
|
s.version = DataMiner::VERSION
|
8
|
-
s.platform = Gem::Platform::RUBY
|
9
7
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl", "Derek Kastner"]
|
10
8
|
s.email = ["seamus@abshere.net"]
|
11
9
|
s.homepage = "https://github.com/seamusabshere/data_miner"
|
@@ -23,17 +21,8 @@ Gem::Specification.new do |s|
|
|
23
21
|
s.add_runtime_dependency 'activerecord', '>=2.3.4'
|
24
22
|
s.add_runtime_dependency 'activesupport', '>=2.3.4'
|
25
23
|
s.add_runtime_dependency 'conversions', '>=1.4.4'
|
26
|
-
s.add_runtime_dependency 'blockenspiel', '>=0.3.2'
|
27
24
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
28
|
-
s.
|
29
|
-
s.
|
30
|
-
s.
|
31
|
-
s.add_development_dependency 'shoulda'
|
32
|
-
s.add_development_dependency 'mysql'
|
33
|
-
s.add_development_dependency 'rake'
|
34
|
-
# if RUBY_VERSION >= '1.9'
|
35
|
-
# s.add_development_dependency 'ruby-debug19'
|
36
|
-
# else
|
37
|
-
# s.add_development_dependency 'ruby-debug'
|
38
|
-
# end
|
25
|
+
s.add_runtime_dependency 'active_record_inline_schema'
|
26
|
+
s.add_runtime_dependency 'aasm'
|
27
|
+
s.add_runtime_dependency 'lock_method', '>=0.5.1'
|
39
28
|
end
|
data/lib/data_miner.rb
CHANGED
@@ -1,91 +1,90 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'set'
|
1
3
|
require 'active_support'
|
2
4
|
require 'active_support/version'
|
3
|
-
|
4
|
-
active_support/core_ext
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
if ::ActiveSupport::VERSION::MAJOR >= 3
|
6
|
+
require 'active_support/core_ext'
|
7
|
+
end
|
8
|
+
require 'active_record'
|
9
|
+
if RUBY_VERSION >= '1.9'
|
10
|
+
begin
|
11
|
+
require 'unicode_utils/downcase'
|
12
|
+
rescue LoadError
|
13
|
+
Kernel.warn '[data_miner] You may wish to include unicode_utils in your Gemfile to improve accuracy of downcasing'
|
14
|
+
end
|
15
|
+
end
|
10
16
|
|
11
|
-
require '
|
17
|
+
require 'data_miner/active_record_extensions'
|
18
|
+
require 'data_miner/attribute'
|
19
|
+
require 'data_miner/script'
|
20
|
+
require 'data_miner/dictionary'
|
21
|
+
require 'data_miner/step'
|
22
|
+
require 'data_miner/step/import'
|
23
|
+
require 'data_miner/step/tap'
|
24
|
+
require 'data_miner/step/process'
|
25
|
+
require 'data_miner/run'
|
12
26
|
|
13
27
|
class DataMiner
|
14
|
-
include ::Singleton
|
15
|
-
|
16
|
-
class MissingHashColumn < StandardError; end
|
17
|
-
class Finish < StandardError; end
|
18
|
-
class Skip < StandardError; end
|
19
|
-
|
20
|
-
autoload :ActiveRecordExtensions, 'data_miner/active_record_extensions'
|
21
|
-
autoload :Attribute, 'data_miner/attribute'
|
22
|
-
autoload :Config, 'data_miner/config'
|
23
|
-
autoload :Dictionary, 'data_miner/dictionary'
|
24
|
-
autoload :Import, 'data_miner/import'
|
25
|
-
autoload :Tap, 'data_miner/tap'
|
26
|
-
autoload :Process, 'data_miner/process'
|
27
|
-
autoload :Run, 'data_miner/run'
|
28
|
-
|
29
28
|
class << self
|
29
|
+
delegate :perform, :to => :instance
|
30
|
+
delegate :run, :to => :instance
|
30
31
|
delegate :logger, :to => :instance
|
31
32
|
delegate :logger=, :to => :instance
|
32
|
-
delegate :
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
result[new_key] = new_value
|
48
|
-
result
|
33
|
+
delegate :model_names, :to => :instance
|
34
|
+
|
35
|
+
# @private
|
36
|
+
def downcase(str)
|
37
|
+
defined?(::UnicodeUtils) ? ::UnicodeUtils.downcase(str) : str.downcase
|
38
|
+
end
|
39
|
+
|
40
|
+
# @private
|
41
|
+
def upcase(str)
|
42
|
+
defined?(::UnicodeUtils) ? ::UnicodeUtils.upcase(str) : str.upcase
|
43
|
+
end
|
44
|
+
|
45
|
+
# @private
|
46
|
+
def compress_whitespace(str)
|
47
|
+
str.gsub(INNER_SPACE, ' ').strip
|
49
48
|
end
|
50
49
|
end
|
51
|
-
|
50
|
+
|
51
|
+
MUTEX = ::Mutex.new
|
52
|
+
INNER_SPACE = /[ ]+/
|
53
|
+
|
54
|
+
include ::Singleton
|
55
|
+
|
52
56
|
attr_writer :logger
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
else
|
60
|
-
require 'logger'
|
61
|
-
@logger = ::Logger.new $stderr
|
57
|
+
|
58
|
+
def perform(model_names = DataMiner.model_names)
|
59
|
+
Script.uniq do
|
60
|
+
model_names.each do |model_name|
|
61
|
+
model_name.constantize.run_data_miner!
|
62
|
+
end
|
62
63
|
end
|
63
64
|
end
|
64
65
|
|
65
|
-
|
66
|
-
|
67
|
-
end
|
66
|
+
# legacy
|
67
|
+
alias :run :perform
|
68
68
|
|
69
|
-
def
|
70
|
-
@
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
options = options.dup
|
79
|
-
options.stringify_keys!
|
80
|
-
options['preserve_call_stack_between_runs'] = true
|
81
|
-
resource_names.each do |resource_name|
|
82
|
-
if options['resource_names'].blank? or options['resource_names'].include?(resource_name)
|
83
|
-
resource_name.constantize.data_miner_config.run options
|
69
|
+
def logger
|
70
|
+
@logger || MUTEX.synchronize do
|
71
|
+
@logger ||= if defined?(::Rails)
|
72
|
+
::Rails.logger
|
73
|
+
elsif defined?(::ActiveRecord) and active_record_logger = ::ActiveRecord::Base.logger
|
74
|
+
active_record_logger
|
75
|
+
else
|
76
|
+
require 'logger'
|
77
|
+
::Logger.new $stderr
|
84
78
|
end
|
85
79
|
end
|
86
|
-
call_stack.clear
|
87
80
|
end
|
81
|
+
|
82
|
+
def model_names
|
83
|
+
@model_names || MUTEX.synchronize do
|
84
|
+
@model_names ||= ::Set.new
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
88
|
end
|
89
89
|
|
90
|
-
require 'active_record'
|
91
90
|
::ActiveRecord::Base.extend ::DataMiner::ActiveRecordExtensions
|
@@ -1,43 +1,38 @@
|
|
1
1
|
require 'active_record'
|
2
|
-
require '
|
2
|
+
require 'lock_method'
|
3
3
|
|
4
4
|
class DataMiner
|
5
5
|
module ActiveRecordExtensions
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
MUTEX = ::Mutex.new
|
7
|
+
|
8
|
+
def data_miner_script
|
9
|
+
@data_miner_script || MUTEX.synchronize do
|
10
|
+
@data_miner_script ||= DataMiner::Script.new(self)
|
11
|
+
end
|
12
12
|
end
|
13
13
|
|
14
14
|
def data_miner_runs
|
15
|
-
|
15
|
+
DataMiner::Run.scoped :conditions => { :model_name => name }
|
16
16
|
end
|
17
17
|
|
18
|
-
def run_data_miner!
|
19
|
-
|
18
|
+
def run_data_miner!
|
19
|
+
data_miner_script.perform
|
20
20
|
end
|
21
21
|
|
22
22
|
def run_data_miner_on_parent_associations!
|
23
|
-
reflect_on_all_associations(:belongs_to).
|
24
|
-
|
25
|
-
|
23
|
+
reflect_on_all_associations(:belongs_to).reject do |assoc|
|
24
|
+
assoc.options[:polymorphic]
|
25
|
+
end.each do |non_polymorphic_belongs_to_assoc|
|
26
|
+
non_polymorphic_belongs_to_assoc.klass.run_data_miner!
|
26
27
|
end
|
27
28
|
end
|
28
29
|
|
29
30
|
def data_miner(options = {}, &blk)
|
30
|
-
|
31
|
-
|
31
|
+
DataMiner.model_names.add name
|
32
32
|
unless options[:append]
|
33
|
-
|
33
|
+
@data_miner_script = nil
|
34
34
|
end
|
35
|
-
|
36
|
-
::Blockenspiel.invoke blk, data_miner_config
|
37
|
-
|
38
|
-
data_miner_config.after_invoke
|
35
|
+
data_miner_script.append_block blk
|
39
36
|
end
|
40
37
|
end
|
41
38
|
end
|
42
|
-
|
43
|
-
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -2,12 +2,20 @@ require 'conversions'
|
|
2
2
|
|
3
3
|
class DataMiner
|
4
4
|
class Attribute
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
5
|
+
class << self
|
6
|
+
def check_options(options)
|
7
|
+
errors = []
|
8
|
+
if options[:dictionary].is_a?(Dictionary)
|
9
|
+
errors << %{:dictionary must be a Hash of options}
|
10
|
+
end
|
11
|
+
if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
|
12
|
+
errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
|
13
|
+
end
|
14
|
+
if (units_options = options.select { |k, _| k.to_s.include?('units') }).any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
|
15
|
+
errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
|
16
|
+
end
|
17
|
+
errors
|
18
|
+
end
|
11
19
|
end
|
12
20
|
|
13
21
|
VALID_OPTIONS = %w{
|
@@ -29,28 +37,114 @@ class DataMiner
|
|
29
37
|
field_number
|
30
38
|
chars
|
31
39
|
synthesize
|
32
|
-
}
|
40
|
+
}.map(&:to_sym)
|
33
41
|
|
34
|
-
|
35
|
-
|
42
|
+
VALID_UNIT_DEFINITION_SETS = [
|
43
|
+
[:units],
|
44
|
+
[:from_units, :to_units],
|
45
|
+
[:units_field_name],
|
46
|
+
[:units_field_name, :to_units],
|
47
|
+
[:units_field_number],
|
48
|
+
[:units_field_number, :to_units],
|
49
|
+
]
|
50
|
+
|
51
|
+
DEFAULT_SPLIT = /\s+/
|
52
|
+
DEFAULT_KEEP = 0
|
53
|
+
DEFAULT_DELIMITER = ', '
|
54
|
+
DEFAULT_NULLIFY = false
|
55
|
+
DEFAULT_UPCASE = false
|
56
|
+
DEFAULT_OVERWRITE = true
|
57
|
+
|
58
|
+
attr_reader :step
|
59
|
+
attr_reader :name
|
60
|
+
attr_reader :synthesize
|
61
|
+
attr_reader :matcher
|
62
|
+
attr_reader :field_number
|
63
|
+
attr_reader :field_name
|
64
|
+
# For use when joining a range of field numbers
|
65
|
+
attr_reader :delimiter
|
66
|
+
attr_reader :chars
|
67
|
+
attr_reader :split
|
68
|
+
attr_reader :to_units
|
69
|
+
attr_reader :from_units
|
70
|
+
attr_reader :units_field_number
|
71
|
+
attr_reader :units_field_name
|
72
|
+
attr_reader :sprintf
|
73
|
+
attr_reader :static
|
36
74
|
|
75
|
+
def initialize(step, name, options = {})
|
76
|
+
options = options.symbolize_keys
|
77
|
+
if (errors = Attribute.check_options(options)).any?
|
78
|
+
raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
|
79
|
+
end
|
37
80
|
@step = step
|
38
81
|
@name = name
|
39
|
-
|
40
|
-
|
41
|
-
|
82
|
+
@synthesize = options[:synthesize]
|
83
|
+
if @dictionary_boolean = options.has_key?(:dictionary)
|
84
|
+
@dictionary_options = options[:dictionary]
|
85
|
+
end
|
86
|
+
@matcher = options[:matcher].is_a?(::String) ? options[:matcher].constantize.new : options[:matcher]
|
87
|
+
if @static_boolean = options.has_key?(:static)
|
88
|
+
@static = options[:static]
|
89
|
+
end
|
90
|
+
@field_number = options[:field_number]
|
91
|
+
@field_name = options.fetch(:field_name, name).to_sym
|
92
|
+
@delimiter = options.fetch :delimiter, DEFAULT_DELIMITER
|
93
|
+
@chars = options[:chars]
|
94
|
+
if split = options[:split]
|
95
|
+
@split = split.symbolize_keys
|
96
|
+
end
|
97
|
+
@nullify_boolean = options.fetch :nullify, DEFAULT_NULLIFY
|
98
|
+
@upcase_boolean = options.fetch :upcase, DEFAULT_UPCASE
|
99
|
+
@from_units = options[:from_units]
|
100
|
+
@to_units = options[:to_units] || options[:units]
|
101
|
+
@sprintf = options[:sprintf]
|
102
|
+
@overwrite_boolean = options.fetch :overwrite, DEFAULT_OVERWRITE
|
103
|
+
@units_field_name = options[:units_field_name]
|
104
|
+
@units_field_number = options[:units_field_number]
|
105
|
+
@dictionary_mutex = ::Mutex.new
|
42
106
|
end
|
43
|
-
|
44
|
-
def
|
45
|
-
|
107
|
+
|
108
|
+
def model
|
109
|
+
step.model
|
110
|
+
end
|
111
|
+
|
112
|
+
def static?
|
113
|
+
@static_boolean
|
114
|
+
end
|
115
|
+
|
116
|
+
def nullify?
|
117
|
+
@nullify_boolean
|
46
118
|
end
|
47
119
|
|
48
|
-
def
|
49
|
-
|
120
|
+
def upcase?
|
121
|
+
@upcase_boolean
|
50
122
|
end
|
51
|
-
|
52
|
-
def
|
53
|
-
|
123
|
+
|
124
|
+
def dictionary?
|
125
|
+
@dictionary_boolean
|
126
|
+
end
|
127
|
+
|
128
|
+
def convert?
|
129
|
+
from_units.present? or units_field_name.present? or units_field_number.present?
|
130
|
+
end
|
131
|
+
|
132
|
+
def units?
|
133
|
+
to_units.present? or units_field_name.present? or units_field_number.present?
|
134
|
+
end
|
135
|
+
|
136
|
+
def overwrite?
|
137
|
+
@overwrite_boolean
|
138
|
+
end
|
139
|
+
|
140
|
+
def read(row)
|
141
|
+
if matcher and matched_row = matcher.match(row)
|
142
|
+
return matched_row
|
143
|
+
end
|
144
|
+
if synthesize
|
145
|
+
return synthesize.call(row)
|
146
|
+
end
|
147
|
+
value = if static?
|
54
148
|
static
|
55
149
|
elsif field_number
|
56
150
|
if field_number.is_a?(::Range)
|
@@ -58,180 +152,83 @@ class DataMiner
|
|
58
152
|
else
|
59
153
|
row[field_number]
|
60
154
|
end
|
61
|
-
elsif field_name ==
|
155
|
+
elsif field_name == :row_hash
|
62
156
|
row.row_hash
|
63
157
|
elsif row.is_a?(::Hash) or row.is_a?(::ActiveSupport::OrderedHash)
|
64
|
-
row[field_name]
|
158
|
+
row[field_name.to_s] # remote_table hash keys are always strings
|
159
|
+
end
|
160
|
+
if value.nil?
|
161
|
+
return
|
162
|
+
end
|
163
|
+
if value.is_a? ::ActiveRecord::Base
|
164
|
+
return value
|
65
165
|
end
|
66
|
-
return nil if value.nil?
|
67
|
-
return value if value.is_a?(::ActiveRecord::Base) # escape valve for parsers that look up associations directly
|
68
166
|
value = value.to_s
|
69
|
-
|
70
|
-
|
71
|
-
value.gsub! /[ ]+/, ' '
|
72
|
-
value.strip!
|
73
|
-
return nil if value.blank? and wants_nullification?
|
74
|
-
value.upcase! if wants_upcase?
|
75
|
-
value = do_convert row, value if wants_conversion?
|
76
|
-
value = do_sprintf value if wants_sprintf?
|
77
|
-
value
|
78
|
-
end
|
79
|
-
|
80
|
-
def match_row(row)
|
81
|
-
matcher.match row
|
82
|
-
end
|
83
|
-
|
84
|
-
def value_from_row(row)
|
85
|
-
return match_row row if wants_matcher?
|
86
|
-
value = value_in_source row
|
87
|
-
return value if value.is_a? ::ActiveRecord::Base # carry through trapdoor
|
88
|
-
value = value_in_dictionary value if wants_dictionary?
|
89
|
-
value = synthesize.call(row) if wants_synthesize?
|
90
|
-
value
|
91
|
-
end
|
92
|
-
|
93
|
-
def set_record_from_row(record, row)
|
94
|
-
return false if !wants_overwriting? and !record.send(name).nil?
|
95
|
-
record.send "#{name}=", value_from_row(row)
|
96
|
-
if wants_units?
|
97
|
-
unit = (to_units || unit_from_source(row)).to_s
|
98
|
-
unit = nil if unit.blank? and wants_nullification?
|
99
|
-
record.send "#{name}_units=", unit
|
167
|
+
if chars
|
168
|
+
value = value[chars]
|
100
169
|
end
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
end
|
106
|
-
|
107
|
-
def do_convert(row, value)
|
108
|
-
unless wants_units?
|
109
|
-
raise ::RuntimeError, "[data_miner] If you use 'from_units', you need to set 'to_units' (#{inspect})"
|
170
|
+
if split
|
171
|
+
pattern = split.fetch :pattern, DEFAULT_SPLIT
|
172
|
+
keep = split.fetch :keep, DEFAULT_KEEP
|
173
|
+
value = value.to_s.split(pattern)[keep].to_s
|
110
174
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
175
|
+
value = DataMiner.compress_whitespace value
|
176
|
+
if nullify? and value.blank?
|
177
|
+
return
|
115
178
|
end
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
def do_sprintf(value)
|
120
|
-
if /\%[0-9\.]*f/.match sprintf
|
121
|
-
value = value.to_f
|
122
|
-
elsif /\%[0-9\.]*d/.match sprintf
|
123
|
-
value = value.to_i
|
179
|
+
if upcase?
|
180
|
+
value = DataMiner.upcase value
|
124
181
|
end
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
def wants_upcase?
|
146
|
-
upcase.present?
|
147
|
-
end
|
148
|
-
def wants_static?
|
149
|
-
options.has_key? 'static'
|
150
|
-
end
|
151
|
-
def wants_nullification?
|
152
|
-
nullify == true
|
153
|
-
end
|
154
|
-
def wants_chars?
|
155
|
-
chars.present?
|
156
|
-
end
|
157
|
-
def wants_synthesize?
|
158
|
-
synthesize.is_a?(::Proc)
|
159
|
-
end
|
160
|
-
def wants_overwriting?
|
161
|
-
overwrite != false
|
162
|
-
end
|
163
|
-
def wants_conversion?
|
164
|
-
from_units.present? or units_field_name.present? or units_field_number.present?
|
165
|
-
end
|
166
|
-
def wants_units?
|
167
|
-
to_units.present? or units_field_name.present? or units_field_number.present?
|
168
|
-
end
|
169
|
-
def wants_dictionary?
|
170
|
-
options['dictionary'].present?
|
171
|
-
end
|
172
|
-
def wants_matcher?
|
173
|
-
options['matcher'].present?
|
182
|
+
if convert?
|
183
|
+
final_from_units = from_units || read_units(row)
|
184
|
+
final_to_units = to_units || read_units(row)
|
185
|
+
if final_from_units.blank? or final_to_units.blank?
|
186
|
+
raise ::RuntimeError, "[data_miner] Missing units (from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
187
|
+
end
|
188
|
+
value = value.to_f.convert final_from_units, final_to_units
|
189
|
+
end
|
190
|
+
if sprintf
|
191
|
+
if sprintf.end_with?('f')
|
192
|
+
value = value.to_f
|
193
|
+
elsif sprintf.end_with?('d')
|
194
|
+
value = value.to_i
|
195
|
+
end
|
196
|
+
value = sprintf % value
|
197
|
+
end
|
198
|
+
if dictionary?
|
199
|
+
value = dictionary.lookup(value)
|
200
|
+
end
|
201
|
+
value
|
174
202
|
end
|
175
203
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
# Options that can't be referred to by their names
|
185
|
-
def split_options
|
186
|
-
options['split']
|
187
|
-
end
|
188
|
-
|
189
|
-
def from_units
|
190
|
-
options['from_units']
|
191
|
-
end
|
192
|
-
def to_units
|
193
|
-
options['to_units'] || options['units']
|
194
|
-
end
|
195
|
-
def sprintf
|
196
|
-
options['sprintf']
|
197
|
-
end
|
198
|
-
def nullify
|
199
|
-
options['nullify']
|
200
|
-
end
|
201
|
-
def overwrite
|
202
|
-
options['overwrite']
|
203
|
-
end
|
204
|
-
def upcase
|
205
|
-
options['upcase']
|
206
|
-
end
|
207
|
-
def units_field_name
|
208
|
-
options['units_field_name']
|
209
|
-
end
|
210
|
-
def units_field_number
|
211
|
-
options['units_field_number']
|
212
|
-
end
|
213
|
-
def field_number
|
214
|
-
options['field_number']
|
215
|
-
end
|
216
|
-
def chars
|
217
|
-
options['chars']
|
218
|
-
end
|
219
|
-
def synthesize
|
220
|
-
options['synthesize']
|
221
|
-
end
|
222
|
-
def static
|
223
|
-
options['static']
|
204
|
+
def set_from_row(target, row)
|
205
|
+
if overwrite? or target.send(name).nil?
|
206
|
+
target.send "#{name}=", read(row)
|
207
|
+
end
|
208
|
+
if units? and ((final_to_units = (to_units || read_units(row))) or nullify?)
|
209
|
+
target.send "#{name}_units=", final_to_units
|
210
|
+
end
|
224
211
|
end
|
225
|
-
|
212
|
+
|
226
213
|
def dictionary
|
227
|
-
@dictionary
|
214
|
+
@dictionary || @dictionary_mutex.synchronize do
|
215
|
+
@dictionary ||= Dictionary.new(@dictionary_options)
|
216
|
+
end
|
228
217
|
end
|
229
|
-
|
230
|
-
|
218
|
+
|
219
|
+
def refresh
|
220
|
+
@dictionary = nil
|
231
221
|
end
|
232
|
-
|
222
|
+
|
223
|
+
private
|
224
|
+
|
225
|
+
def read_units(row)
|
226
|
+
if units = row[units_field_name || units_field_number]
|
227
|
+
DataMiner.compress_whitespace(units).underscore.to_sym
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
233
231
|
def free
|
234
|
-
@dictionary.free if @dictionary.is_a?(Dictionary)
|
235
232
|
@dictionary = nil
|
236
233
|
end
|
237
234
|
end
|