data_miner 2.5.2 → 3.0.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +18 -0
- data/Gemfile +0 -2
- data/data_miner.gemspec +3 -7
- data/lib/data_miner.rb +2 -31
- data/lib/data_miner/active_record_class_methods.rb +5 -11
- data/lib/data_miner/attribute.rb +100 -198
- data/lib/data_miner/script.rb +5 -11
- data/lib/data_miner/step/import.rb +41 -27
- data/lib/data_miner/step/sql.rb +10 -10
- data/lib/data_miner/version.rb +1 -1
- data/test/data_miner/step/test_sql.rb +14 -18
- data/test/data_miner/test_attribute.rb +0 -32
- data/test/helper.rb +4 -9
- data/test/support/data_miner_with_alchemist.rb +1 -5
- data/test/support/pet.rb +10 -9
- data/test/support/pet2.rb +1 -1
- data/test/support/pets.csv +2 -2
- data/test/test_data_miner.rb +6 -40
- metadata +9 -97
- data/lib/data_miner/dictionary.rb +0 -84
- data/lib/data_miner/run.rb +0 -144
- data/lib/data_miner/run/column_statistic.rb +0 -78
- data/lib/data_miner/unit_converter.rb +0 -12
- data/lib/data_miner/unit_converter/alchemist.rb +0 -11
- data/lib/data_miner/unit_converter/conversions.rb +0 -11
- data/test/data_miner/step/test_import.rb +0 -35
- data/test/data_miner/unit_converter/test_alchemist.rb +0 -20
- data/test/data_miner/unit_converter/test_conversions.rb +0 -20
- data/test/support/data_miner_with_conversions.rb +0 -16
- data/test/support/data_miner_without_unit_converter.rb +0 -51
- data/test/test_data_miner_run_column_statistic.rb +0 -52
- data/test/test_earth_import.rb +0 -26
- data/test/test_safety.rb +0 -84
- data/test/test_unit_conversion.rb +0 -16
data/CHANGELOG
CHANGED
@@ -1,3 +1,21 @@
|
|
1
|
+
3.0.0.alpha / 2013-07-24
|
2
|
+
|
3
|
+
* breaking changes
|
4
|
+
|
5
|
+
* :dictionary is now just a plain Hash-like object (responds to []) - and no longer attempts to refresh between runs
|
6
|
+
* no more unit conversions
|
7
|
+
* always nullifies blank strings
|
8
|
+
* always overwrites columns whether there was a non-null value there before or not (no more :overwrite option)
|
9
|
+
* don't use synthesize option, just pass a block
|
10
|
+
* synthesized values get the same whitespace compression and stripping as other values
|
11
|
+
* not tested against the Earth library, which has particular requirements and makes testing too complicated
|
12
|
+
* doesn't keep DataMiner::Run or DataMiner::Run::ColumnStatistic records
|
13
|
+
* no more :matcher option
|
14
|
+
|
15
|
+
* enhancements
|
16
|
+
|
17
|
+
* if you have a postgres hstore column called "foo", you can do store 'foo.bar'
|
18
|
+
|
1
19
|
2.5.2 / 2013-07-05
|
2
20
|
|
3
21
|
* Bug fixes
|
data/Gemfile
CHANGED
data/data_miner.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.email = ["seamus@abshere.net", "rossmeissl@gmail.com", "dkastner@gmail.com", "ijhough@gmail.com", "towerhe@gmail.com"]
|
9
9
|
s.homepage = "https://github.com/seamusabshere/data_miner"
|
10
10
|
s.summary = %{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.}
|
11
|
-
s.description = %q{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.
|
11
|
+
s.description = %q{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models. Uses Upsert internally for speed.}
|
12
12
|
|
13
13
|
s.rubyforge_project = "data_miner"
|
14
14
|
|
@@ -17,8 +17,6 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
18
|
s.require_paths = ["lib"]
|
19
19
|
|
20
|
-
s.add_runtime_dependency 'aasm'
|
21
|
-
s.add_runtime_dependency 'active_record_inline_schema', '>=0.6.1'
|
22
20
|
s.add_runtime_dependency 'activerecord', '> 3'
|
23
21
|
s.add_runtime_dependency 'activesupport', '> 3'
|
24
22
|
s.add_runtime_dependency 'errata', '>=1.0.1'
|
@@ -28,11 +26,9 @@ Gem::Specification.new do |s|
|
|
28
26
|
s.add_runtime_dependency 'unix_utils'
|
29
27
|
s.add_runtime_dependency 'roo', '>=1.10.3'
|
30
28
|
|
31
|
-
s.add_development_dependency '
|
32
|
-
s.add_development_dependency '
|
33
|
-
s.add_development_dependency 'earth'
|
29
|
+
s.add_development_dependency 'pry'
|
30
|
+
s.add_development_dependency 'active_record_inline_schema'
|
34
31
|
s.add_development_dependency 'fuzzy_match'
|
35
|
-
s.add_development_dependency 'lock_method'
|
36
32
|
s.add_development_dependency 'minitest'
|
37
33
|
s.add_development_dependency 'minitest-reporters'
|
38
34
|
s.add_development_dependency 'rake'
|
data/lib/data_miner.rb
CHANGED
@@ -17,13 +17,10 @@ end
|
|
17
17
|
require 'data_miner/active_record_class_methods'
|
18
18
|
require 'data_miner/attribute'
|
19
19
|
require 'data_miner/script'
|
20
|
-
require 'data_miner/dictionary'
|
21
20
|
require 'data_miner/step'
|
22
21
|
require 'data_miner/step/import'
|
23
22
|
require 'data_miner/step/process'
|
24
23
|
require 'data_miner/step/sql'
|
25
|
-
require 'data_miner/run'
|
26
|
-
require 'data_miner/unit_converter'
|
27
24
|
|
28
25
|
# A singleton class that holds global configuration for data mining.
|
29
26
|
#
|
@@ -46,23 +43,6 @@ class DataMiner
|
|
46
43
|
def compress_whitespace(str)
|
47
44
|
str.gsub(INNER_SPACE, ONE_SPACE).strip
|
48
45
|
end
|
49
|
-
|
50
|
-
# Set the unit converter.
|
51
|
-
#
|
52
|
-
# @note As of 2012-05-30, there are problems with the alchemist gem and the use of the conversions gem instead is recommended.
|
53
|
-
#
|
54
|
-
# @param [Symbol,nil] conversion_library Either +:alchemist+ or +:conversions+
|
55
|
-
#
|
56
|
-
# @return [nil]
|
57
|
-
def unit_converter=(conversion_library)
|
58
|
-
@unit_converter = UnitConverter.load conversion_library
|
59
|
-
nil
|
60
|
-
end
|
61
|
-
|
62
|
-
# @return [#convert,nil] The user-selected unit converter or nil.
|
63
|
-
def unit_converter
|
64
|
-
@unit_converter
|
65
|
-
end
|
66
46
|
end
|
67
47
|
|
68
48
|
INNER_SPACE = /[ ]+/
|
@@ -76,13 +56,14 @@ class DataMiner
|
|
76
56
|
#
|
77
57
|
# @param [optional, Array<String>] model_names Names of models to be run.
|
78
58
|
#
|
79
|
-
# @return
|
59
|
+
# @return nil
|
80
60
|
def start(model_names = DataMiner.model_names)
|
81
61
|
Script.uniq do
|
82
62
|
model_names.map do |model_name|
|
83
63
|
model_name.constantize.run_data_miner!
|
84
64
|
end
|
85
65
|
end
|
66
|
+
nil
|
86
67
|
end
|
87
68
|
|
88
69
|
# legacy
|
@@ -115,16 +96,6 @@ class DataMiner
|
|
115
96
|
end
|
116
97
|
end
|
117
98
|
|
118
|
-
# Whether per-column stats like max, min, average, standard deviation, etc are enabled.
|
119
|
-
def per_column_statistics?
|
120
|
-
@per_column_statistics == true
|
121
|
-
end
|
122
|
-
|
123
|
-
# Turn on or off per-column stats.
|
124
|
-
def per_column_statistics=(boolean)
|
125
|
-
@per_column_statistics = boolean
|
126
|
-
end
|
127
|
-
|
128
99
|
class << self
|
129
100
|
delegate(*DataMiner.instance_methods(false), :to => :instance)
|
130
101
|
end
|
@@ -12,16 +12,9 @@ class DataMiner
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
# Access to recordkeeping.
|
16
|
-
#
|
17
|
-
# @return [ActiveRecord::Relation] Records of running the data miner script.
|
18
|
-
def data_miner_runs
|
19
|
-
DataMiner::Run.scoped :conditions => { :model_name => name }
|
20
|
-
end
|
21
|
-
|
22
15
|
# Run this model's script.
|
23
16
|
#
|
24
|
-
# @return
|
17
|
+
# @return nil
|
25
18
|
def run_data_miner!
|
26
19
|
data_miner_script.start
|
27
20
|
end
|
@@ -45,13 +38,14 @@ class DataMiner
|
|
45
38
|
# end
|
46
39
|
# end
|
47
40
|
#
|
48
|
-
# @return
|
41
|
+
# @return nil
|
49
42
|
def run_data_miner_on_parent_associations!
|
50
43
|
reflect_on_all_associations(:belongs_to).reject do |assoc|
|
51
|
-
assoc.options[
|
44
|
+
assoc.options['polymorphic']
|
52
45
|
end.map do |non_polymorphic_belongs_to_assoc|
|
53
46
|
non_polymorphic_belongs_to_assoc.klass.run_data_miner!
|
54
47
|
end
|
48
|
+
nil
|
55
49
|
end
|
56
50
|
|
57
51
|
# Define a data miner script.
|
@@ -97,7 +91,7 @@ class DataMiner
|
|
97
91
|
#
|
98
92
|
# @return [nil]
|
99
93
|
def data_miner(options = {}, &blk)
|
100
|
-
unless options[
|
94
|
+
unless options['append']
|
101
95
|
@data_miner_script = nil
|
102
96
|
end
|
103
97
|
data_miner_script.append_block blk
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -9,60 +9,32 @@ class DataMiner
|
|
9
9
|
# @private
|
10
10
|
def check_options(options)
|
11
11
|
errors = []
|
12
|
-
if options[
|
13
|
-
errors << %{:dictionary must
|
12
|
+
if options.has_key?('dictionary') and not options['dictionary'].respond_to?(:[])
|
13
|
+
errors << %{:dictionary must respond to [], like a Hash does}
|
14
14
|
end
|
15
15
|
if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
|
16
16
|
errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
|
17
17
|
end
|
18
|
-
units_options = options.select { |k, _| k.to_s.include?('units') }
|
19
|
-
if units_options.any? and DataMiner.unit_converter.nil?
|
20
|
-
errors << %{You must set DataMiner.unit_converter to :alchemist or :conversions if you wish to convert units}
|
21
|
-
end
|
22
|
-
if units_options.any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
|
23
|
-
errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
|
24
|
-
end
|
25
18
|
errors
|
26
19
|
end
|
27
20
|
end
|
28
21
|
|
29
22
|
VALID_OPTIONS = [
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
:sprintf,
|
40
|
-
:nullify, # deprecated
|
41
|
-
:nullify_blank_strings,
|
42
|
-
:overwrite,
|
43
|
-
:upcase,
|
44
|
-
:units_field_name,
|
45
|
-
:units_field_number,
|
46
|
-
:field_number,
|
47
|
-
:chars,
|
48
|
-
:synthesize,
|
49
|
-
]
|
50
|
-
|
51
|
-
VALID_UNIT_DEFINITION_SETS = [
|
52
|
-
[:units], # no conversion
|
53
|
-
[:from_units, :to_units], # yes
|
54
|
-
[:units_field_name], # no
|
55
|
-
[:units_field_name, :to_units], # yes
|
56
|
-
[:units_field_number], # no
|
57
|
-
[:units_field_number, :to_units], # yes
|
23
|
+
'static',
|
24
|
+
'dictionary',
|
25
|
+
'field_name',
|
26
|
+
'delimiter',
|
27
|
+
'split',
|
28
|
+
'sprintf',
|
29
|
+
'upcase',
|
30
|
+
'field_number',
|
31
|
+
'chars',
|
58
32
|
]
|
59
33
|
|
60
34
|
DEFAULT_SPLIT_PATTERN = /\s+/
|
61
35
|
DEFAULT_SPLIT_KEEP = 0
|
62
36
|
DEFAULT_DELIMITER = ', '
|
63
|
-
DEFAULT_NULLIFY_BLANK_STRINGS = false
|
64
37
|
DEFAULT_UPCASE = false
|
65
|
-
DEFAULT_OVERWRITE = true
|
66
38
|
|
67
39
|
# activerecord-3.2.6/lib/active_record/connection_adapters/column.rb
|
68
40
|
TRUE_VALUES = [true, 1, '1', 't', 'T', 'true', 'TRUE', 'on', 'ON', 'yes', 'YES', 'y', 'Y']
|
@@ -72,24 +44,18 @@ class DataMiner
|
|
72
44
|
attr_reader :step
|
73
45
|
|
74
46
|
# Local column name.
|
75
|
-
# @return [
|
47
|
+
# @return [String]
|
76
48
|
attr_reader :name
|
77
49
|
|
78
|
-
# Synthesize a value by passing a proc that will receive +row+ and should return a final value.
|
50
|
+
# The block passed to a store argument. Synthesize a value by passing a proc that will receive +row+ and should return a final value.
|
51
|
+
#
|
52
|
+
# Unlike past versions of DataMiner, you pass this as a block, not with the :synthesize option.
|
79
53
|
#
|
80
54
|
# +row+ will be a +Hash+ with string keys or (less often) an +Array+
|
81
55
|
#
|
82
56
|
# @return [Proc]
|
83
57
|
attr_reader :synthesize
|
84
58
|
|
85
|
-
# An object that will be sent +#match(row)+ and should return a final value.
|
86
|
-
#
|
87
|
-
# Can be specified as a String which will be constantized into a class and an object of that class instantized with no arguments.
|
88
|
-
#
|
89
|
-
# +row+ will be a +Hash+ with string keys or (less often) an +Array+
|
90
|
-
# @return [Object]
|
91
|
-
attr_reader :matcher
|
92
|
-
|
93
59
|
# Index of where to find the data in the row, starting from zero.
|
94
60
|
#
|
95
61
|
# If you pass a +Range+, then multiple fields will be joined together.
|
@@ -97,10 +63,6 @@ class DataMiner
|
|
97
63
|
# @return [Integer, Range]
|
98
64
|
attr_reader :field_number
|
99
65
|
|
100
|
-
# Where to find the data in the row.
|
101
|
-
# @return [Symbol]
|
102
|
-
attr_reader :field_name
|
103
|
-
|
104
66
|
# A delimiter to be used when joining fields together into a single final value. Used when +:field_number+ is a +Range+. Defaults to DEFAULT_DELIMITER.
|
105
67
|
# @return [String]
|
106
68
|
attr_reader :delimiter
|
@@ -117,26 +79,6 @@ class DataMiner
|
|
117
79
|
# @return [Hash]
|
118
80
|
attr_reader :split
|
119
81
|
|
120
|
-
# Final units. May invoke a conversion using https://rubygems.org/gems/alchemist
|
121
|
-
#
|
122
|
-
# If a local column named +[name]_units+ exists, it will be populated with this value.
|
123
|
-
#
|
124
|
-
# @return [Symbol]
|
125
|
-
attr_reader :to_units
|
126
|
-
|
127
|
-
# Initial units. May invoke a conversion using a conversion gem like https://rubygems.org/gems/alchemist
|
128
|
-
# Be sure to set DataMiner.unit_converter
|
129
|
-
# @return [Symbol]
|
130
|
-
attr_reader :from_units
|
131
|
-
|
132
|
-
# If every row specifies its own units, index of where to find the units. Zero-based.
|
133
|
-
# @return [Integer]
|
134
|
-
attr_reader :units_field_number
|
135
|
-
|
136
|
-
# If every row specifies its own units, where to find the units.
|
137
|
-
# @return [Symbol]
|
138
|
-
attr_reader :units_field_name
|
139
|
-
|
140
82
|
# A +sprintf+-style format to apply.
|
141
83
|
# @return [String]
|
142
84
|
attr_reader :sprintf
|
@@ -145,93 +87,81 @@ class DataMiner
|
|
145
87
|
# @return [String,Numeric,TrueClass,FalseClass,Object]
|
146
88
|
attr_reader :static
|
147
89
|
|
148
|
-
# Only meaningful for string columns. Whether to store blank input (" ") as NULL. Defaults to DEFAULT_NULLIFY_BLANK_STRINGS.
|
149
|
-
# @return [TrueClass,FalseClass]
|
150
|
-
attr_reader :nullify_blank_strings
|
151
|
-
|
152
90
|
# Whether to upcase value. Defaults to DEFAULT_UPCASE.
|
153
91
|
# @return [TrueClass,FalseClass]
|
154
92
|
attr_reader :upcase
|
155
93
|
|
156
|
-
#
|
157
|
-
#
|
158
|
-
|
94
|
+
# Dictionary for translating.
|
95
|
+
#
|
96
|
+
# You pass a Hash or something that responds to []
|
97
|
+
#
|
98
|
+
# @return [#[]]
|
99
|
+
attr_reader :dictionary
|
159
100
|
|
160
101
|
# @private
|
161
|
-
def initialize(step, name, options = {})
|
162
|
-
options = options.
|
102
|
+
def initialize(step, name, options = {}, &blk)
|
103
|
+
options = options.stringify_keys
|
163
104
|
if (errors = Attribute.check_options(options)).any?
|
164
105
|
raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
|
165
106
|
end
|
166
107
|
@step = step
|
167
|
-
@name = name.
|
168
|
-
@synthesize =
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
@matcher = options[:matcher].is_a?(::String) ? options[:matcher].constantize.new : options[:matcher]
|
173
|
-
if @static_boolean = options.has_key?(:static)
|
174
|
-
@static = options[:static]
|
108
|
+
@name = name.to_s
|
109
|
+
@synthesize = blk if block_given?
|
110
|
+
@dictionary = options['dictionary']
|
111
|
+
if @static_boolean = options.has_key?('static')
|
112
|
+
@static = options['static']
|
175
113
|
end
|
176
|
-
@field_number = options[
|
177
|
-
@
|
178
|
-
@delimiter = options.fetch
|
179
|
-
@chars = options[
|
180
|
-
if split = options[
|
181
|
-
@split = split.
|
182
|
-
end
|
183
|
-
@nullify_blank_strings = if options.has_key?(:nullify)
|
184
|
-
# deprecated
|
185
|
-
options[:nullify]
|
186
|
-
else
|
187
|
-
options.fetch :nullify_blank_strings, DEFAULT_NULLIFY_BLANK_STRINGS
|
114
|
+
@field_number = options['field_number']
|
115
|
+
@field_name_settings = options['field_name']
|
116
|
+
@delimiter = options.fetch 'delimiter', DEFAULT_DELIMITER
|
117
|
+
@chars = options['chars']
|
118
|
+
if split = options['split']
|
119
|
+
@split = split.stringify_keys
|
188
120
|
end
|
189
|
-
@upcase = options.fetch
|
190
|
-
@
|
191
|
-
@to_units = options[:to_units] || options[:units]
|
192
|
-
@sprintf = options[:sprintf]
|
193
|
-
@overwrite = options.fetch :overwrite, DEFAULT_OVERWRITE
|
194
|
-
@units_field_name = options[:units_field_name]
|
195
|
-
@units_field_number = options[:units_field_number]
|
196
|
-
@convert_boolean = (@from_units.present? or (@to_units.present? and (@units_field_name.present? or @units_field_number.present?)))
|
197
|
-
@persist_units_boolean = (@to_units.present? or @units_field_name.present? or @units_field_number.present?)
|
198
|
-
@dictionary_mutex = ::Mutex.new
|
121
|
+
@upcase = options.fetch 'upcase', DEFAULT_UPCASE
|
122
|
+
@sprintf = options['sprintf']
|
199
123
|
end
|
200
124
|
|
201
|
-
#
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
125
|
+
# @private
|
126
|
+
def hstore_column
|
127
|
+
return @hstore_column if defined?(@hstore_column)
|
128
|
+
@hstore_column = name.split('.', 2)[0]
|
129
|
+
end
|
130
|
+
|
131
|
+
# @private
|
132
|
+
def hstore_key
|
133
|
+
return @hstore_key if defined?(@hstore_key)
|
134
|
+
@hstore_key = name.split('.', 2)[1]
|
135
|
+
end
|
136
|
+
|
137
|
+
# Where to find the data in the row.
|
138
|
+
# @return [String]
|
139
|
+
def field_name
|
140
|
+
return @field_name if defined?(@field_name)
|
141
|
+
@field_name = if @field_name_settings
|
142
|
+
@field_name_settings.to_s
|
143
|
+
elsif hstore?
|
144
|
+
hstore_key
|
145
|
+
else
|
146
|
+
name
|
209
147
|
end
|
210
148
|
end
|
211
149
|
|
212
150
|
# # @private
|
213
|
-
# TODO make sure that nil handling is replicated when using upsert
|
214
151
|
def set_from_row(local_record, remote_row)
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
local_record.send
|
220
|
-
currently_nil = new_value.nil?
|
221
|
-
end
|
222
|
-
if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
|
223
|
-
local_record.send "#{name}_units=", final_to_units
|
152
|
+
new_value = read remote_row
|
153
|
+
if hstore?
|
154
|
+
local_record.send(hstore_column)[hstore_key] = new_value
|
155
|
+
else
|
156
|
+
local_record.send("#{name}=", new_value)
|
224
157
|
end
|
225
158
|
end
|
226
159
|
|
227
160
|
# @private
|
228
161
|
def updates(remote_row)
|
229
162
|
v = read remote_row
|
230
|
-
if
|
231
|
-
|
232
|
-
to_units || read_units(remote_row)
|
233
|
-
end
|
234
|
-
{ name => v, "#{name}_units" => v_units }
|
163
|
+
if hstore?
|
164
|
+
{ hstore_column => { hstore_key => v } }
|
235
165
|
else
|
236
166
|
{ name => v }
|
237
167
|
end
|
@@ -239,27 +169,23 @@ class DataMiner
|
|
239
169
|
|
240
170
|
# @private
|
241
171
|
def read(row)
|
242
|
-
|
243
|
-
raise RuntimeError, "[data_miner] Table #{model.table_name} does not have column #{name.inspect}"
|
244
|
-
end
|
245
|
-
if matcher and matcher_output = matcher.match(row)
|
246
|
-
return matcher_output
|
247
|
-
end
|
248
|
-
if synthesize
|
249
|
-
return synthesize.call(row)
|
172
|
+
if not column_exists?
|
173
|
+
raise RuntimeError, "[data_miner] Table #{model.table_name} does not have column #{(hstore? ? hstore_column : name).inspect}"
|
250
174
|
end
|
251
175
|
value = if static?
|
252
176
|
static
|
177
|
+
elsif synthesize
|
178
|
+
synthesize.call(row)
|
253
179
|
elsif field_number
|
254
180
|
if field_number.is_a?(::Range)
|
255
181
|
field_number.map { |n| row[n] }.join(delimiter)
|
256
182
|
else
|
257
183
|
row[field_number]
|
258
184
|
end
|
259
|
-
elsif field_name ==
|
185
|
+
elsif field_name == 'row_hash'
|
260
186
|
row.row_hash
|
261
187
|
elsif row.is_a?(::Hash) or row.is_a?(::ActiveSupport::OrderedHash)
|
262
|
-
row[field_name
|
188
|
+
row[field_name] # remote_table hash keys are always strings
|
263
189
|
end
|
264
190
|
if value.nil?
|
265
191
|
return
|
@@ -296,47 +222,29 @@ class DataMiner
|
|
296
222
|
value = value[chars]
|
297
223
|
end
|
298
224
|
if split
|
299
|
-
pattern = split.fetch
|
300
|
-
keep = split.fetch
|
225
|
+
pattern = split.fetch 'pattern', DEFAULT_SPLIT_PATTERN
|
226
|
+
keep = split.fetch 'keep', DEFAULT_SPLIT_KEEP
|
301
227
|
value = value.to_s.split(pattern)[keep].to_s
|
302
228
|
end
|
303
|
-
if value.blank?
|
229
|
+
if value.blank? # TODO false is "blank"
|
304
230
|
return
|
305
231
|
end
|
306
232
|
value = DataMiner.compress_whitespace value
|
307
233
|
if upcase
|
308
234
|
value = DataMiner.upcase value
|
309
235
|
end
|
310
|
-
if convert?
|
311
|
-
value = convert_units value, row
|
312
|
-
end
|
313
236
|
if sprintf
|
314
|
-
|
315
|
-
value = value.to_f
|
316
|
-
elsif sprintf.end_with?('d')
|
317
|
-
value = value.to_i
|
318
|
-
end
|
319
|
-
value = sprintf % value
|
237
|
+
value = sprintf % value.to_f
|
320
238
|
end
|
321
|
-
if dictionary
|
322
|
-
value = dictionary
|
239
|
+
if dictionary
|
240
|
+
value = dictionary[value]
|
323
241
|
end
|
324
242
|
value
|
325
243
|
end
|
326
244
|
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
final_to_units = to_units || read_units(row)
|
331
|
-
unless final_from_units and final_to_units
|
332
|
-
raise RuntimeError, "[data_miner] Missing units: from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
|
333
|
-
end
|
334
|
-
DataMiner.unit_converter.convert value, final_from_units, final_to_units
|
335
|
-
end
|
336
|
-
|
337
|
-
# @private
|
338
|
-
def refresh
|
339
|
-
@dictionary = nil
|
245
|
+
def hstore?
|
246
|
+
return @hstore_boolean if defined?(@hstore_boolean)
|
247
|
+
@hstore_boolean = name.include?('.')
|
340
248
|
end
|
341
249
|
|
342
250
|
private
|
@@ -347,48 +255,42 @@ class DataMiner
|
|
347
255
|
|
348
256
|
def column_exists?
|
349
257
|
return @column_exists_boolean if defined?(@column_exists_boolean)
|
350
|
-
|
258
|
+
if hstore?
|
259
|
+
@column_exists_boolean = model.column_names.include? hstore_column
|
260
|
+
else
|
261
|
+
@column_exists_boolean = model.column_names.include? name
|
262
|
+
end
|
351
263
|
end
|
352
264
|
|
353
265
|
def text_column?
|
354
266
|
return @text_column_boolean if defined?(@text_column_boolean)
|
355
|
-
|
267
|
+
if hstore?
|
268
|
+
@text_column_boolean = true
|
269
|
+
else
|
270
|
+
@text_column_boolean = model.columns_hash[name].text?
|
271
|
+
end
|
356
272
|
end
|
357
273
|
|
358
274
|
def number_column?
|
359
275
|
return @number_column_boolean if defined?(@number_column_boolean)
|
360
|
-
|
276
|
+
if hstore?
|
277
|
+
@number_column_boolean = false
|
278
|
+
else
|
279
|
+
@number_column_boolean = model.columns_hash[name].number?
|
280
|
+
end
|
361
281
|
end
|
362
282
|
|
363
283
|
def boolean_column?
|
364
284
|
return @boolean_column_boolean if defined?(@boolean_column_boolean)
|
365
|
-
|
285
|
+
if hstore?
|
286
|
+
@boolean_column_boolean = false
|
287
|
+
else
|
288
|
+
@boolean_column_boolean = (model.columns_hash[name].type == :boolean)
|
289
|
+
end
|
366
290
|
end
|
367
291
|
|
368
292
|
def static?
|
369
293
|
@static_boolean
|
370
294
|
end
|
371
|
-
|
372
|
-
def dictionary?
|
373
|
-
@dictionary_boolean
|
374
|
-
end
|
375
|
-
|
376
|
-
def convert?
|
377
|
-
@convert_boolean
|
378
|
-
end
|
379
|
-
|
380
|
-
def persist_units?
|
381
|
-
@persist_units_boolean
|
382
|
-
end
|
383
|
-
|
384
|
-
def read_units(row)
|
385
|
-
if units = row[units_field_name || units_field_number]
|
386
|
-
DataMiner.compress_whitespace(units).underscore
|
387
|
-
end
|
388
|
-
end
|
389
|
-
|
390
|
-
def free
|
391
|
-
@dictionary = nil
|
392
|
-
end
|
393
295
|
end
|
394
296
|
end
|