data_miner 2.0.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +2 -0
- data/CHANGELOG +13 -0
- data/lib/data_miner/active_record_class_methods.rb +3 -3
- data/lib/data_miner/attribute.rb +31 -13
- data/lib/data_miner/run.rb +7 -0
- data/lib/data_miner/script.rb +2 -2
- data/lib/data_miner/step/import.rb +2 -1
- data/lib/data_miner/step/process.rb +1 -1
- data/lib/data_miner/step/tap.rb +1 -1
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/support/pets.csv +5 -5
- data/test/test_data_miner.rb +57 -1
- metadata +3 -2
data/.yardopts
ADDED
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
2.0.3 / 2012-05-07
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
|
5
|
+
* Rename :nullify to :nullify_blank_strings to clarify that it only affects textual columns and means "treat blank input as null".
|
6
|
+
* Don't try to set units for a column that is currently nil (thanks @ihough)
|
7
|
+
|
8
|
+
* Bug fixes
|
9
|
+
|
10
|
+
* Don't blow up if mass-assignment rules are strict.
|
11
|
+
* Don't accidentally set a numeric column to 0.0 when the input is blank or nil.
|
12
|
+
|
1
13
|
2.0.2 / 2012-05-04
|
2
14
|
|
3
15
|
* Breaking changes
|
@@ -8,6 +20,7 @@
|
|
8
20
|
* Enhancements
|
9
21
|
|
10
22
|
* Real documentation!
|
23
|
+
* Make it easier to clear locks with DataMiner::Run.clear_locks
|
11
24
|
* Replace class-level mutexes with simple Thread.exclusive calls
|
12
25
|
* Simplified DataMiner::Dictionary
|
13
26
|
|
@@ -62,9 +62,9 @@ class DataMiner
|
|
62
62
|
#
|
63
63
|
# @yield [] The block defining the steps.
|
64
64
|
#
|
65
|
-
# @see DataMiner::Script#import
|
66
|
-
# @see DataMiner::Script#process
|
67
|
-
# @see DataMiner::Script#tap
|
65
|
+
# @see DataMiner::Script#import Creating an import step by calling DataMiner::Script#import from inside a data miner script
|
66
|
+
# @see DataMiner::Script#process Creating a process step by calling DataMiner::Script#process from inside a data miner script
|
67
|
+
# @see DataMiner::Script#tap Creating a tap step by calling DataMiner::Script#tap from inside a data miner script
|
68
68
|
#
|
69
69
|
# @example Creating steps
|
70
70
|
# class MyModel < ActiveRecord::Base
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -4,8 +4,8 @@ class DataMiner
|
|
4
4
|
# A mapping between a local model column and a remote data source column.
|
5
5
|
#
|
6
6
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
7
|
-
# @see DataMiner::Step::Import#store
|
8
|
-
# @see DataMiner::Step::Import#key
|
7
|
+
# @see DataMiner::Step::Import#store Telling an import step to store a column with DataMiner::Step::Import#store
|
8
|
+
# @see DataMiner::Step::Import#key Telling an import step to key on a column with DataMiner::Step::Import#key
|
9
9
|
class Attribute
|
10
10
|
class << self
|
11
11
|
# @private
|
@@ -35,7 +35,8 @@ class DataMiner
|
|
35
35
|
:split,
|
36
36
|
:units,
|
37
37
|
:sprintf,
|
38
|
-
:nullify,
|
38
|
+
:nullify, # deprecated
|
39
|
+
:nullify_blank_strings,
|
39
40
|
:overwrite,
|
40
41
|
:upcase,
|
41
42
|
:units_field_name,
|
@@ -57,7 +58,7 @@ class DataMiner
|
|
57
58
|
DEFAULT_SPLIT_PATTERN = /\s+/
|
58
59
|
DEFAULT_SPLIT_KEEP = 0
|
59
60
|
DEFAULT_DELIMITER = ', '
|
60
|
-
|
61
|
+
DEFAULT_NULLIFY_BLANK_STRINGS = false
|
61
62
|
DEFAULT_UPCASE = false
|
62
63
|
DEFAULT_OVERWRITE = true
|
63
64
|
|
@@ -137,9 +138,9 @@ class DataMiner
|
|
137
138
|
# @return [String,Numeric,TrueClass,FalseClass,Object]
|
138
139
|
attr_reader :static
|
139
140
|
|
140
|
-
#
|
141
|
+
# Only meaningful for string columns. Whether to store blank input (" ") as NULL. Defaults to DEFAULT_NULLIFY_BLANK_STRINGS.
|
141
142
|
# @return [TrueClass,FalseClass]
|
142
|
-
attr_reader :
|
143
|
+
attr_reader :nullify_blank_strings
|
143
144
|
|
144
145
|
# Whether to upcase value. Defaults to DEFAULT_UPCASE.
|
145
146
|
# @return [TrueClass,FalseClass]
|
@@ -156,7 +157,7 @@ class DataMiner
|
|
156
157
|
raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
|
157
158
|
end
|
158
159
|
@step = step
|
159
|
-
@name = name
|
160
|
+
@name = name.to_sym
|
160
161
|
@synthesize = options[:synthesize]
|
161
162
|
if @dictionary_boolean = options.has_key?(:dictionary)
|
162
163
|
@dictionary_settings = options[:dictionary]
|
@@ -172,7 +173,12 @@ class DataMiner
|
|
172
173
|
if split = options[:split]
|
173
174
|
@split = split.symbolize_keys
|
174
175
|
end
|
175
|
-
@
|
176
|
+
@nullify_blank_strings = if options.has_key?(:nullify)
|
177
|
+
# deprecated
|
178
|
+
options[:nullify]
|
179
|
+
else
|
180
|
+
options.fetch :nullify_blank_strings, DEFAULT_NULLIFY_BLANK_STRINGS
|
181
|
+
end
|
176
182
|
@upcase = options.fetch :upcase, DEFAULT_UPCASE
|
177
183
|
@from_units = options[:from_units]
|
178
184
|
@to_units = options[:to_units] || options[:units]
|
@@ -196,10 +202,16 @@ class DataMiner
|
|
196
202
|
|
197
203
|
# @private
|
198
204
|
def set_from_row(local_record, remote_row)
|
199
|
-
|
200
|
-
|
205
|
+
previously_nil = local_record.send(name).nil?
|
206
|
+
currently_nil = false
|
207
|
+
|
208
|
+
if previously_nil or overwrite
|
209
|
+
new_value = read remote_row
|
210
|
+
local_record.send "#{name}=", new_value
|
211
|
+
currently_nil = new_value.nil?
|
201
212
|
end
|
202
|
-
|
213
|
+
|
214
|
+
if not currently_nil and units? and (final_to_units = (to_units || read_units(remote_row)))
|
203
215
|
local_record.send "#{name}_units=", final_to_units
|
204
216
|
end
|
205
217
|
end
|
@@ -240,10 +252,10 @@ class DataMiner
|
|
240
252
|
keep = split.fetch :keep, DEFAULT_SPLIT_KEEP
|
241
253
|
value = value.to_s.split(pattern)[keep].to_s
|
242
254
|
end
|
243
|
-
value
|
244
|
-
if nullify and value.blank?
|
255
|
+
if value.blank? and (not stringlike_column? or nullify_blank_strings)
|
245
256
|
return
|
246
257
|
end
|
258
|
+
value = DataMiner.compress_whitespace value
|
247
259
|
if upcase
|
248
260
|
value = DataMiner.upcase value
|
249
261
|
end
|
@@ -280,6 +292,12 @@ class DataMiner
|
|
280
292
|
step.model
|
281
293
|
end
|
282
294
|
|
295
|
+
def stringlike_column?
|
296
|
+
return @stringlike_column_query[0] if @stringlike_column_query.is_a?(::Array)
|
297
|
+
@stringlike_column_query = [model.columns_hash[name.to_s].type == :string]
|
298
|
+
@stringlike_column_query[0]
|
299
|
+
end
|
300
|
+
|
283
301
|
def static?
|
284
302
|
@static_boolean
|
285
303
|
end
|
data/lib/data_miner/run.rb
CHANGED
data/lib/data_miner/script.rb
CHANGED
@@ -130,7 +130,7 @@ class DataMiner
|
|
130
130
|
# @yield [] A block defining how to +key+ the import (to make it idempotent) and which columns to +store+.
|
131
131
|
#
|
132
132
|
# @note Be sure to check out https://github.com/seamusabshere/remote_table and https://github.com/seamusabshere/errata for available +table_and_errata_settings+.
|
133
|
-
# @note There are hundreds of +import+ examples in https://github.com/brighterplanet/earth
|
133
|
+
# @note There are hundreds of +import+ examples in https://github.com/brighterplanet/earth. The {file:README.markdown README} points to a few (at the bottom.)
|
134
134
|
# @note We often use string primary keys to make idempotency easier. https://github.com/seamusabshere/active_record_inline_schema supports defining these inline.
|
135
135
|
#
|
136
136
|
# @example From the README
|
@@ -213,7 +213,7 @@ class DataMiner
|
|
213
213
|
Script.current_stack.clear
|
214
214
|
end
|
215
215
|
Script.current_stack << model_name
|
216
|
-
Run.
|
216
|
+
Run.perform(model_name) do
|
217
217
|
steps.each do |step|
|
218
218
|
step.perform
|
219
219
|
model.reset_column_information
|
@@ -8,7 +8,8 @@ class DataMiner
|
|
8
8
|
# Create these by calling +import+ inside a +data_miner+ block.
|
9
9
|
#
|
10
10
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
11
|
-
# @see DataMiner::Script#import
|
11
|
+
# @see DataMiner::Script#import Creating an import step by calling DataMiner::Script#import from inside a data miner script
|
12
|
+
# @see DataMiner::Attribute The Attribute class, which maps local columns and remote data fields from within an import step
|
12
13
|
class Import < Step
|
13
14
|
# The mappings of local columns to remote data source fields.
|
14
15
|
# @return [Array<DataMiner::Attribute>]
|
@@ -5,7 +5,7 @@ class DataMiner
|
|
5
5
|
# Create these by calling +process+ inside a +data_miner+ block.
|
6
6
|
#
|
7
7
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
8
|
-
# @see DataMiner::Script#process
|
8
|
+
# @see DataMiner::Script#process Creating a process step by calling DataMiner::Script#process from inside a data miner script
|
9
9
|
class Process < Step
|
10
10
|
# @private
|
11
11
|
attr_reader :script
|
data/lib/data_miner/step/tap.rb
CHANGED
@@ -7,7 +7,7 @@ class DataMiner
|
|
7
7
|
# Create these by calling +tap+ inside a +data_miner+ block.
|
8
8
|
#
|
9
9
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
10
|
-
# @see DataMiner::Script#tap
|
10
|
+
# @see DataMiner::Script#tap Creating a tap step by calling DataMiner::Script#tap from inside a data miner script
|
11
11
|
class Tap < Step
|
12
12
|
DEFAULT_PORTS = {
|
13
13
|
:mysql => 3306,
|
data/lib/data_miner/version.rb
CHANGED
data/test/helper.rb
CHANGED
data/test/support/pets.csv
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
name,breed,color,age
|
2
|
-
Pierre,Tabby,GO,4
|
3
|
-
Jerry,Beagle,BR/BL,5
|
4
|
-
Amigo,Spanish Lizarto,GR/BU,17
|
5
|
-
Johnny,Beagle,BR/BL,2
|
1
|
+
name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
+
Pierre,Tabby,GO,4,years,4.4,30,tomato,"eh"
|
3
|
+
Jerry,Beagle,BR/BL,5,years,10,30,cheese,"che"
|
4
|
+
Amigo,Spanish Lizarto,GR/BU,17,years," ",3,crickets," "
|
5
|
+
Johnny,Beagle,BR/BL,2,years,20,45," ",
|
data/test/test_data_miner.rb
CHANGED
@@ -13,15 +13,26 @@ class Pet < ActiveRecord::Base
|
|
13
13
|
col :breed_id
|
14
14
|
col :color_id
|
15
15
|
col :age, :type => :integer
|
16
|
+
col :age_units
|
17
|
+
col :weight, :type => :float
|
18
|
+
col :weight_units
|
19
|
+
col :height, :type => :integer
|
20
|
+
col :height_units
|
21
|
+
col :favorite_food
|
22
|
+
col :command_phrase
|
16
23
|
belongs_to :breed
|
17
24
|
data_miner do
|
18
25
|
process :auto_upgrade!
|
19
26
|
process :run_data_miner_on_parent_associations!
|
20
27
|
import("A list of pets", :url => "file://#{PETS}") do
|
21
28
|
key :name
|
22
|
-
store :age
|
29
|
+
store :age, :units_field_name => 'age_units'
|
23
30
|
store :breed_id, :field_name => :breed
|
24
31
|
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
32
|
+
store :weight, :from_units => :pounds, :to_units => :kilograms
|
33
|
+
store :favorite_food, :nullify_blank_strings => true
|
34
|
+
store :command_phrase
|
35
|
+
store :height, :units => :centimetres
|
25
36
|
end
|
26
37
|
end
|
27
38
|
end
|
@@ -46,6 +57,11 @@ class Breed < ActiveRecord::Base
|
|
46
57
|
end
|
47
58
|
end
|
48
59
|
|
60
|
+
ActiveRecord::Base.mass_assignment_sanitizer = :strict
|
61
|
+
ActiveRecord::Base.descendants.each do |model|
|
62
|
+
model.attr_accessible nil
|
63
|
+
end
|
64
|
+
|
49
65
|
Pet.auto_upgrade!
|
50
66
|
|
51
67
|
describe DataMiner do
|
@@ -53,6 +69,17 @@ describe DataMiner do
|
|
53
69
|
before do
|
54
70
|
Pet.delete_all
|
55
71
|
end
|
72
|
+
it "it does not depend on mass-assignment" do
|
73
|
+
lambda do
|
74
|
+
Pet.new(:name => 'hello').save!
|
75
|
+
end.must_raise(ActiveModel::MassAssignmentSecurity::Error)
|
76
|
+
lambda do
|
77
|
+
Pet.new(:color_id => 'hello').save!
|
78
|
+
end.must_raise(ActiveModel::MassAssignmentSecurity::Error)
|
79
|
+
lambda do
|
80
|
+
Pet.new(:age => 'hello').save!
|
81
|
+
end.must_raise(ActiveModel::MassAssignmentSecurity::Error)
|
82
|
+
end
|
56
83
|
it "is idempotent given a key" do
|
57
84
|
Pet.run_data_miner!
|
58
85
|
first_count = Pet.count
|
@@ -99,5 +126,34 @@ describe DataMiner do
|
|
99
126
|
Breed.run_data_miner!
|
100
127
|
Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
|
101
128
|
end
|
129
|
+
it "performs unit conversions" do
|
130
|
+
Pet.run_data_miner!
|
131
|
+
Pet.find('Pierre').weight.must_be_close_to(4.4.pounds.to(:kilograms), 0.00001)
|
132
|
+
end
|
133
|
+
it "sets units" do
|
134
|
+
Pet.run_data_miner!
|
135
|
+
Pet.find('Pierre').age_units.must_equal 'years'
|
136
|
+
Pet.find('Pierre').weight_units.must_equal 'kilograms'
|
137
|
+
Pet.find('Pierre').height_units.must_equal 'centimetres'
|
138
|
+
end
|
139
|
+
it "always nullifies numeric columns when blank/nil is the input" do
|
140
|
+
Pet.run_data_miner!
|
141
|
+
Pet.find('Amigo').weight.must_be_nil
|
142
|
+
end
|
143
|
+
it "doesn't nullify string columns by default" do
|
144
|
+
Pet.run_data_miner!
|
145
|
+
Pet.find('Amigo').command_phrase.must_equal ''
|
146
|
+
Pet.find('Johnny').command_phrase.must_equal ''
|
147
|
+
end
|
148
|
+
it "nullifies string columns on demand" do
|
149
|
+
Pet.run_data_miner!
|
150
|
+
Pet.find('Jerry').favorite_food.must_equal 'cheese'
|
151
|
+
Pet.find('Johnny').favorite_food.must_be_nil
|
152
|
+
end
|
153
|
+
it "doesn't set units if the input was blank/null" do
|
154
|
+
Pet.run_data_miner!
|
155
|
+
Pet.find('Amigo').weight.must_be_nil
|
156
|
+
Pet.find('Amigo').weight_units.must_be_nil
|
157
|
+
end
|
102
158
|
end
|
103
159
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-05-
|
14
|
+
date: 2012-05-07 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: remote_table
|
@@ -151,6 +151,7 @@ extensions: []
|
|
151
151
|
extra_rdoc_files: []
|
152
152
|
files:
|
153
153
|
- .gitignore
|
154
|
+
- .yardopts
|
154
155
|
- CHANGELOG
|
155
156
|
- Gemfile
|
156
157
|
- LICENSE
|