data_miner 2.0.2 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +2 -0
- data/CHANGELOG +13 -0
- data/lib/data_miner/active_record_class_methods.rb +3 -3
- data/lib/data_miner/attribute.rb +31 -13
- data/lib/data_miner/run.rb +7 -0
- data/lib/data_miner/script.rb +2 -2
- data/lib/data_miner/step/import.rb +2 -1
- data/lib/data_miner/step/process.rb +1 -1
- data/lib/data_miner/step/tap.rb +1 -1
- data/lib/data_miner/version.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/support/pets.csv +5 -5
- data/test/test_data_miner.rb +57 -1
- metadata +3 -2
data/.yardopts
ADDED
data/CHANGELOG
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
2.0.3 / 2012-05-07
|
2
|
+
|
3
|
+
* Enhancements
|
4
|
+
|
5
|
+
* Rename :nullify to :nullify_blank_strings to clarify that it only affects textual columns and means "treat blank input as null".
|
6
|
+
* Don't try to set units for a column that is currently nil (thanks @ihough)
|
7
|
+
|
8
|
+
* Bug fixes
|
9
|
+
|
10
|
+
* Don't blow up if mass-assignment rules are strict.
|
11
|
+
* Don't accidentally set a numeric column to 0.0 when the input is blank or nil.
|
12
|
+
|
1
13
|
2.0.2 / 2012-05-04
|
2
14
|
|
3
15
|
* Breaking changes
|
@@ -8,6 +20,7 @@
|
|
8
20
|
* Enhancements
|
9
21
|
|
10
22
|
* Real documentation!
|
23
|
+
* Make it easier to clear locks with DataMiner::Run.clear_locks
|
11
24
|
* Replace class-level mutexes with simple Thread.exclusive calls
|
12
25
|
* Simplified DataMiner::Dictionary
|
13
26
|
|
@@ -62,9 +62,9 @@ class DataMiner
|
|
62
62
|
#
|
63
63
|
# @yield [] The block defining the steps.
|
64
64
|
#
|
65
|
-
# @see DataMiner::Script#import
|
66
|
-
# @see DataMiner::Script#process
|
67
|
-
# @see DataMiner::Script#tap
|
65
|
+
# @see DataMiner::Script#import Creating an import step by calling DataMiner::Script#import from inside a data miner script
|
66
|
+
# @see DataMiner::Script#process Creating a process step by calling DataMiner::Script#process from inside a data miner script
|
67
|
+
# @see DataMiner::Script#tap Creating a tap step by calling DataMiner::Script#tap from inside a data miner script
|
68
68
|
#
|
69
69
|
# @example Creating steps
|
70
70
|
# class MyModel < ActiveRecord::Base
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -4,8 +4,8 @@ class DataMiner
|
|
4
4
|
# A mapping between a local model column and a remote data source column.
|
5
5
|
#
|
6
6
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
7
|
-
# @see DataMiner::Step::Import#store
|
8
|
-
# @see DataMiner::Step::Import#key
|
7
|
+
# @see DataMiner::Step::Import#store Telling an import step to store a column with DataMiner::Step::Import#store
|
8
|
+
# @see DataMiner::Step::Import#key Telling an import step to key on a column with DataMiner::Step::Import#key
|
9
9
|
class Attribute
|
10
10
|
class << self
|
11
11
|
# @private
|
@@ -35,7 +35,8 @@ class DataMiner
|
|
35
35
|
:split,
|
36
36
|
:units,
|
37
37
|
:sprintf,
|
38
|
-
:nullify,
|
38
|
+
:nullify, # deprecated
|
39
|
+
:nullify_blank_strings,
|
39
40
|
:overwrite,
|
40
41
|
:upcase,
|
41
42
|
:units_field_name,
|
@@ -57,7 +58,7 @@ class DataMiner
|
|
57
58
|
DEFAULT_SPLIT_PATTERN = /\s+/
|
58
59
|
DEFAULT_SPLIT_KEEP = 0
|
59
60
|
DEFAULT_DELIMITER = ', '
|
60
|
-
|
61
|
+
DEFAULT_NULLIFY_BLANK_STRINGS = false
|
61
62
|
DEFAULT_UPCASE = false
|
62
63
|
DEFAULT_OVERWRITE = true
|
63
64
|
|
@@ -137,9 +138,9 @@ class DataMiner
|
|
137
138
|
# @return [String,Numeric,TrueClass,FalseClass,Object]
|
138
139
|
attr_reader :static
|
139
140
|
|
140
|
-
#
|
141
|
+
# Only meaningful for string columns. Whether to store blank input (" ") as NULL. Defaults to DEFAULT_NULLIFY_BLANK_STRINGS.
|
141
142
|
# @return [TrueClass,FalseClass]
|
142
|
-
attr_reader :
|
143
|
+
attr_reader :nullify_blank_strings
|
143
144
|
|
144
145
|
# Whether to upcase value. Defaults to DEFAULT_UPCASE.
|
145
146
|
# @return [TrueClass,FalseClass]
|
@@ -156,7 +157,7 @@ class DataMiner
|
|
156
157
|
raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
|
157
158
|
end
|
158
159
|
@step = step
|
159
|
-
@name = name
|
160
|
+
@name = name.to_sym
|
160
161
|
@synthesize = options[:synthesize]
|
161
162
|
if @dictionary_boolean = options.has_key?(:dictionary)
|
162
163
|
@dictionary_settings = options[:dictionary]
|
@@ -172,7 +173,12 @@ class DataMiner
|
|
172
173
|
if split = options[:split]
|
173
174
|
@split = split.symbolize_keys
|
174
175
|
end
|
175
|
-
@
|
176
|
+
@nullify_blank_strings = if options.has_key?(:nullify)
|
177
|
+
# deprecated
|
178
|
+
options[:nullify]
|
179
|
+
else
|
180
|
+
options.fetch :nullify_blank_strings, DEFAULT_NULLIFY_BLANK_STRINGS
|
181
|
+
end
|
176
182
|
@upcase = options.fetch :upcase, DEFAULT_UPCASE
|
177
183
|
@from_units = options[:from_units]
|
178
184
|
@to_units = options[:to_units] || options[:units]
|
@@ -196,10 +202,16 @@ class DataMiner
|
|
196
202
|
|
197
203
|
# @private
|
198
204
|
def set_from_row(local_record, remote_row)
|
199
|
-
|
200
|
-
|
205
|
+
previously_nil = local_record.send(name).nil?
|
206
|
+
currently_nil = false
|
207
|
+
|
208
|
+
if previously_nil or overwrite
|
209
|
+
new_value = read remote_row
|
210
|
+
local_record.send "#{name}=", new_value
|
211
|
+
currently_nil = new_value.nil?
|
201
212
|
end
|
202
|
-
|
213
|
+
|
214
|
+
if not currently_nil and units? and (final_to_units = (to_units || read_units(remote_row)))
|
203
215
|
local_record.send "#{name}_units=", final_to_units
|
204
216
|
end
|
205
217
|
end
|
@@ -240,10 +252,10 @@ class DataMiner
|
|
240
252
|
keep = split.fetch :keep, DEFAULT_SPLIT_KEEP
|
241
253
|
value = value.to_s.split(pattern)[keep].to_s
|
242
254
|
end
|
243
|
-
value
|
244
|
-
if nullify and value.blank?
|
255
|
+
if value.blank? and (not stringlike_column? or nullify_blank_strings)
|
245
256
|
return
|
246
257
|
end
|
258
|
+
value = DataMiner.compress_whitespace value
|
247
259
|
if upcase
|
248
260
|
value = DataMiner.upcase value
|
249
261
|
end
|
@@ -280,6 +292,12 @@ class DataMiner
|
|
280
292
|
step.model
|
281
293
|
end
|
282
294
|
|
295
|
+
def stringlike_column?
|
296
|
+
return @stringlike_column_query[0] if @stringlike_column_query.is_a?(::Array)
|
297
|
+
@stringlike_column_query = [model.columns_hash[name.to_s].type == :string]
|
298
|
+
@stringlike_column_query[0]
|
299
|
+
end
|
300
|
+
|
283
301
|
def static?
|
284
302
|
@static_boolean
|
285
303
|
end
|
data/lib/data_miner/run.rb
CHANGED
data/lib/data_miner/script.rb
CHANGED
@@ -130,7 +130,7 @@ class DataMiner
|
|
130
130
|
# @yield [] A block defining how to +key+ the import (to make it idempotent) and which columns to +store+.
|
131
131
|
#
|
132
132
|
# @note Be sure to check out https://github.com/seamusabshere/remote_table and https://github.com/seamusabshere/errata for available +table_and_errata_settings+.
|
133
|
-
# @note There are hundreds of +import+ examples in https://github.com/brighterplanet/earth
|
133
|
+
# @note There are hundreds of +import+ examples in https://github.com/brighterplanet/earth. The {file:README.markdown README} points to a few (at the bottom.)
|
134
134
|
# @note We often use string primary keys to make idempotency easier. https://github.com/seamusabshere/active_record_inline_schema supports defining these inline.
|
135
135
|
#
|
136
136
|
# @example From the README
|
@@ -213,7 +213,7 @@ class DataMiner
|
|
213
213
|
Script.current_stack.clear
|
214
214
|
end
|
215
215
|
Script.current_stack << model_name
|
216
|
-
Run.
|
216
|
+
Run.perform(model_name) do
|
217
217
|
steps.each do |step|
|
218
218
|
step.perform
|
219
219
|
model.reset_column_information
|
@@ -8,7 +8,8 @@ class DataMiner
|
|
8
8
|
# Create these by calling +import+ inside a +data_miner+ block.
|
9
9
|
#
|
10
10
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
11
|
-
# @see DataMiner::Script#import
|
11
|
+
# @see DataMiner::Script#import Creating an import step by calling DataMiner::Script#import from inside a data miner script
|
12
|
+
# @see DataMiner::Attribute The Attribute class, which maps local columns and remote data fields from within an import step
|
12
13
|
class Import < Step
|
13
14
|
# The mappings of local columns to remote data source fields.
|
14
15
|
# @return [Array<DataMiner::Attribute>]
|
@@ -5,7 +5,7 @@ class DataMiner
|
|
5
5
|
# Create these by calling +process+ inside a +data_miner+ block.
|
6
6
|
#
|
7
7
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
8
|
-
# @see DataMiner::Script#process
|
8
|
+
# @see DataMiner::Script#process Creating a process step by calling DataMiner::Script#process from inside a data miner script
|
9
9
|
class Process < Step
|
10
10
|
# @private
|
11
11
|
attr_reader :script
|
data/lib/data_miner/step/tap.rb
CHANGED
@@ -7,7 +7,7 @@ class DataMiner
|
|
7
7
|
# Create these by calling +tap+ inside a +data_miner+ block.
|
8
8
|
#
|
9
9
|
# @see DataMiner::ActiveRecordClassMethods#data_miner Overview of how to define data miner scripts inside of ActiveRecord models.
|
10
|
-
# @see DataMiner::Script#tap
|
10
|
+
# @see DataMiner::Script#tap Creating a tap step by calling DataMiner::Script#tap from inside a data miner script
|
11
11
|
class Tap < Step
|
12
12
|
DEFAULT_PORTS = {
|
13
13
|
:mysql => 3306,
|
data/lib/data_miner/version.rb
CHANGED
data/test/helper.rb
CHANGED
data/test/support/pets.csv
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
name,breed,color,age
|
2
|
-
Pierre,Tabby,GO,4
|
3
|
-
Jerry,Beagle,BR/BL,5
|
4
|
-
Amigo,Spanish Lizarto,GR/BU,17
|
5
|
-
Johnny,Beagle,BR/BL,2
|
1
|
+
name,breed,color,age,age_units,weight,height,favorite_food,command_phrase
|
2
|
+
Pierre,Tabby,GO,4,years,4.4,30,tomato,"eh"
|
3
|
+
Jerry,Beagle,BR/BL,5,years,10,30,cheese,"che"
|
4
|
+
Amigo,Spanish Lizarto,GR/BU,17,years," ",3,crickets," "
|
5
|
+
Johnny,Beagle,BR/BL,2,years,20,45," ",
|
data/test/test_data_miner.rb
CHANGED
@@ -13,15 +13,26 @@ class Pet < ActiveRecord::Base
|
|
13
13
|
col :breed_id
|
14
14
|
col :color_id
|
15
15
|
col :age, :type => :integer
|
16
|
+
col :age_units
|
17
|
+
col :weight, :type => :float
|
18
|
+
col :weight_units
|
19
|
+
col :height, :type => :integer
|
20
|
+
col :height_units
|
21
|
+
col :favorite_food
|
22
|
+
col :command_phrase
|
16
23
|
belongs_to :breed
|
17
24
|
data_miner do
|
18
25
|
process :auto_upgrade!
|
19
26
|
process :run_data_miner_on_parent_associations!
|
20
27
|
import("A list of pets", :url => "file://#{PETS}") do
|
21
28
|
key :name
|
22
|
-
store :age
|
29
|
+
store :age, :units_field_name => 'age_units'
|
23
30
|
store :breed_id, :field_name => :breed
|
24
31
|
store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
|
32
|
+
store :weight, :from_units => :pounds, :to_units => :kilograms
|
33
|
+
store :favorite_food, :nullify_blank_strings => true
|
34
|
+
store :command_phrase
|
35
|
+
store :height, :units => :centimetres
|
25
36
|
end
|
26
37
|
end
|
27
38
|
end
|
@@ -46,6 +57,11 @@ class Breed < ActiveRecord::Base
|
|
46
57
|
end
|
47
58
|
end
|
48
59
|
|
60
|
+
ActiveRecord::Base.mass_assignment_sanitizer = :strict
|
61
|
+
ActiveRecord::Base.descendants.each do |model|
|
62
|
+
model.attr_accessible nil
|
63
|
+
end
|
64
|
+
|
49
65
|
Pet.auto_upgrade!
|
50
66
|
|
51
67
|
describe DataMiner do
|
@@ -53,6 +69,17 @@ describe DataMiner do
|
|
53
69
|
before do
|
54
70
|
Pet.delete_all
|
55
71
|
end
|
72
|
+
it "it does not depend on mass-assignment" do
|
73
|
+
lambda do
|
74
|
+
Pet.new(:name => 'hello').save!
|
75
|
+
end.must_raise(ActiveModel::MassAssignmentSecurity::Error)
|
76
|
+
lambda do
|
77
|
+
Pet.new(:color_id => 'hello').save!
|
78
|
+
end.must_raise(ActiveModel::MassAssignmentSecurity::Error)
|
79
|
+
lambda do
|
80
|
+
Pet.new(:age => 'hello').save!
|
81
|
+
end.must_raise(ActiveModel::MassAssignmentSecurity::Error)
|
82
|
+
end
|
56
83
|
it "is idempotent given a key" do
|
57
84
|
Pet.run_data_miner!
|
58
85
|
first_count = Pet.count
|
@@ -99,5 +126,34 @@ describe DataMiner do
|
|
99
126
|
Breed.run_data_miner!
|
100
127
|
Breed.find('Beagle').average_age.must_equal((5+2)/2.0)
|
101
128
|
end
|
129
|
+
it "performs unit conversions" do
|
130
|
+
Pet.run_data_miner!
|
131
|
+
Pet.find('Pierre').weight.must_be_close_to(4.4.pounds.to(:kilograms), 0.00001)
|
132
|
+
end
|
133
|
+
it "sets units" do
|
134
|
+
Pet.run_data_miner!
|
135
|
+
Pet.find('Pierre').age_units.must_equal 'years'
|
136
|
+
Pet.find('Pierre').weight_units.must_equal 'kilograms'
|
137
|
+
Pet.find('Pierre').height_units.must_equal 'centimetres'
|
138
|
+
end
|
139
|
+
it "always nullifies numeric columns when blank/nil is the input" do
|
140
|
+
Pet.run_data_miner!
|
141
|
+
Pet.find('Amigo').weight.must_be_nil
|
142
|
+
end
|
143
|
+
it "doesn't nullify string columns by default" do
|
144
|
+
Pet.run_data_miner!
|
145
|
+
Pet.find('Amigo').command_phrase.must_equal ''
|
146
|
+
Pet.find('Johnny').command_phrase.must_equal ''
|
147
|
+
end
|
148
|
+
it "nullifies string columns on demand" do
|
149
|
+
Pet.run_data_miner!
|
150
|
+
Pet.find('Jerry').favorite_food.must_equal 'cheese'
|
151
|
+
Pet.find('Johnny').favorite_food.must_be_nil
|
152
|
+
end
|
153
|
+
it "doesn't set units if the input was blank/null" do
|
154
|
+
Pet.run_data_miner!
|
155
|
+
Pet.find('Amigo').weight.must_be_nil
|
156
|
+
Pet.find('Amigo').weight_units.must_be_nil
|
157
|
+
end
|
102
158
|
end
|
103
159
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-05-
|
14
|
+
date: 2012-05-07 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: remote_table
|
@@ -151,6 +151,7 @@ extensions: []
|
|
151
151
|
extra_rdoc_files: []
|
152
152
|
files:
|
153
153
|
- .gitignore
|
154
|
+
- .yardopts
|
154
155
|
- CHANGELOG
|
155
156
|
- Gemfile
|
156
157
|
- LICENSE
|