data_miner 2.5.2 → 3.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -202,7 +202,7 @@ class DataMiner
202
202
  # @note Normally you should use +Country.run_data_miner!+
203
203
  # @note A primitive "call stack" is kept that will prevent infinite loops. So, if Country's data miner script calls Province's AND vice-versa, each one will only be run once.
204
204
  #
205
- # @return [DataMiner::Run]
205
+ # @return nil
206
206
  def start
207
207
  model_name = model.name
208
208
  # $stderr.write "0 - #{model_name}\n"
@@ -217,17 +217,11 @@ class DataMiner
217
217
  Script.current_stack.clear
218
218
  end
219
219
  Script.current_stack << model_name
220
- unless Run.table_exists?
221
- Run.auto_upgrade!
222
- end
223
- run = Run.new
224
- run.model_name = model_name
225
- run.start do
226
- steps.each do |step|
227
- step.start
228
- model.reset_column_information
229
- end
220
+ steps.each do |step|
221
+ step.start
222
+ model.reset_column_information
230
223
  end
224
+ nil
231
225
  end
232
226
 
233
227
  private
@@ -22,24 +22,24 @@ class DataMiner
22
22
 
23
23
  # @private
24
24
  def initialize(script, description, settings, &blk)
25
- settings = settings.symbolize_keys
26
- if settings.has_key?(:table)
25
+ settings = settings.stringify_keys
26
+ if settings.has_key?('table')
27
27
  raise ::ArgumentError, %{[data_miner] :table is no longer an allowed setting.}
28
28
  end
29
- if (errata_settings = settings[:errata]) and not errata_settings.is_a?(::Hash)
29
+ if (errata_settings = settings['errata']) and not errata_settings.is_a?(::Hash)
30
30
  raise ::ArgumentError, %{[data_miner] :errata must be a hash of initialization settings to Errata}
31
31
  end
32
32
  @script = script
33
33
  @attributes = ::ActiveSupport::OrderedHash.new
34
- @validate_query = !!settings[:validate]
34
+ @validate_query = !!settings['validate']
35
35
  @description = description
36
- if settings.has_key? :errata
37
- errata_settings = settings[:errata].symbolize_keys
38
- errata_settings[:responder] ||= model
39
- settings[:errata] = errata_settings
36
+ if settings.has_key? 'errata'
37
+ errata_settings = settings['errata'].stringify_keys
38
+ errata_settings['responder'] ||= model
39
+ settings['errata'] = errata_settings
40
40
  end
41
41
  @table_settings = settings.dup
42
- @table_settings[:streaming] = true
42
+ @table_settings['streaming'] = true
43
43
  @table_mutex = ::Mutex.new
44
44
  instance_eval(&blk)
45
45
  end
@@ -48,17 +48,17 @@ class DataMiner
48
48
  #
49
49
  # @see DataMiner::Attribute The actual Attribute class.
50
50
  #
51
- # @param [Symbol] attr_name The name of the local model column.
51
+ # @param [String] attr_name The name of the local model column.
52
52
  # @param [optional, Hash] attr_options Options that will be passed to +DataMiner::Attribute.new+
53
53
  # @option attr_options [*] anything Any option for +DataMiner::Attribute+.
54
54
  #
55
55
  # @return [nil]
56
- def store(attr_name, attr_options = {})
57
- attr_name = attr_name.to_sym
56
+ def store(attr_name, attr_options = {}, &blk)
57
+ attr_name = attr_name.to_s
58
58
  if attributes.has_key? attr_name
59
59
  raise "You should only call store or key once for #{model.name}##{attr_name}"
60
60
  end
61
- attributes[attr_name] = DataMiner::Attribute.new self, attr_name, attr_options
61
+ attributes[attr_name] = DataMiner::Attribute.new self, attr_name, attr_options, &blk
62
62
  end
63
63
 
64
64
  # Store data into a model column AND use it as the key.
@@ -67,13 +67,13 @@ class DataMiner
67
67
  #
68
68
  # Enables idempotency. In other words, you can run the data miner script multiple times, get updated data, and not get duplicate rows.
69
69
  #
70
- # @param [Symbol] attr_name The name of the local model column.
70
+ # @param [String] attr_name The name of the local model column.
71
71
  # @param [optional, Hash] attr_options Options that will be passed to +DataMiner::Attribute.new+
72
72
  # @option attr_options [*] anything Any option for +DataMiner::Attribute+.
73
73
  #
74
74
  # @return [nil]
75
75
  def key(attr_name, attr_options = {})
76
- attr_name = attr_name.to_sym
76
+ attr_name = attr_name.to_s
77
77
  if attributes.has_key? attr_name
78
78
  raise "You should only call store or key once for #{model.name}##{attr_name}"
79
79
  end
@@ -83,9 +83,8 @@ class DataMiner
83
83
 
84
84
  # @private
85
85
  def start
86
- upsert_enabled? ? save_with_upsert : save_with_activerecord
86
+ upsert_enabled? ? save_with_upsert : save_with_find_or_initialize
87
87
  refresh
88
-
89
88
  nil
90
89
  end
91
90
 
@@ -101,13 +100,28 @@ class DataMiner
101
100
  (not validate?) and (storing_primary_key? or table_has_autoincrementing_primary_key?)
102
101
  end
103
102
 
103
+ def count_every
104
+ @count_every ||= ENV.fetch('DATA_MINER_COUNT_EVERY', -1).to_i
105
+ end
106
+
104
107
  def save_with_upsert
105
108
  c = model.connection_pool.checkout
109
+ attrs_except_key = attributes.except(@key).values
110
+ count = 0
106
111
  Upsert.stream(c, model.table_name) do |upsert|
107
112
  table.each do |row|
113
+ $stderr.puts "#{count}..." if count_every > 0 and count % count_every == 0
114
+ count += 1
108
115
  selector = @key ? { @key => attributes[@key].read(row) } : { model.primary_key => nil }
109
- document = attributes.except(@key).inject({}) do |memo, (_, attr)|
110
- memo.merge! attr.updates(row)
116
+ document = attrs_except_key.inject({}) do |memo, attr|
117
+ attr.updates(row).each do |k, v|
118
+ case memo[k]
119
+ when ::Hash
120
+ memo[k] = memo[k].merge v
121
+ else
122
+ memo[k] = v
123
+ end
124
+ end
111
125
  memo
112
126
  end
113
127
  upsert.row selector, document
@@ -116,8 +130,11 @@ class DataMiner
116
130
  model.connection_pool.checkin c
117
131
  end
118
132
 
119
- def save_with_activerecord
133
+ def save_with_find_or_initialize
134
+ count = 0
120
135
  table.each do |row|
136
+ $stderr.puts "#{count}..." if count_every > 0 and count % count_every == 0
137
+ count += 1
121
138
  record = @key ? model.send("find_or_initialize_by_#{@key}", attributes[@key].read(row)) : model.new
122
139
  attributes.each { |_, attr| attr.set_from_row record, row }
123
140
  record.save!
@@ -125,7 +142,7 @@ class DataMiner
125
142
  end
126
143
 
127
144
  def table_has_autoincrementing_primary_key?
128
- return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
145
+ return @table_has_autoincrementing_primary_key_query if defined?(@table_has_autoincrementing_primary_key_query)
129
146
  c = model.connection_pool.checkout
130
147
  answer = if (pk = model.primary_key) and model.columns_hash[pk].type == :integer
131
148
  case c.adapter_name
@@ -143,14 +160,12 @@ class DataMiner
143
160
  end
144
161
  end
145
162
  model.connection_pool.checkin c
146
- @table_has_autoincrementing_primary_key_query = [answer]
147
- answer
163
+ @table_has_autoincrementing_primary_key_query = answer
148
164
  end
149
165
 
150
166
  def storing_primary_key?
151
- return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
152
- @storing_primary_key_query = [model.primary_key && attributes.has_key?(model.primary_key.to_sym)]
153
- @storing_primary_key_query.first
167
+ return @storing_primary_key_query if defined?(@storing_primary_key_query)
168
+ @storing_primary_key_query = model.primary_key && attributes.has_key?(model.primary_key)
154
169
  end
155
170
 
156
171
  def table
@@ -161,7 +176,6 @@ class DataMiner
161
176
 
162
177
  def refresh
163
178
  @table = nil
164
- attributes.each { |_, attr| attr.refresh }
165
179
  nil
166
180
  end
167
181
  end
@@ -43,7 +43,7 @@ class DataMiner
43
43
  ActiveRecord::Base.connection.execute statement
44
44
  else
45
45
  tmp_path = UnixUtils.curl url
46
- send config[:adapter], tmp_path
46
+ send config['adapter'], tmp_path
47
47
  File.unlink tmp_path
48
48
  end
49
49
  end
@@ -55,24 +55,24 @@ class DataMiner
55
55
  ActiveRecord::Base.connection_config
56
56
  else
57
57
  ActiveRecord::Base.connection_pool.spec.config
58
- end
58
+ end.stringify_keys
59
59
  end
60
60
 
61
61
  def mysql(path)
62
- connect = if config[:socket]
63
- [ '--socket', config[:socket] ]
62
+ connect = if config['socket']
63
+ [ '--socket', config['socket'] ]
64
64
  else
65
- [ '--host', config.fetch(:host, '127.0.0.1'), '--port', config.fetch(:port, 3306).to_s ]
65
+ [ '--host', config.fetch('host', '127.0.0.1'), '--port', config.fetch('port', 3306).to_s ]
66
66
  end
67
67
 
68
68
  argv = [
69
69
  'mysql',
70
70
  '--compress',
71
- '--user', config[:username],
72
- "-p#{config[:password]}",
71
+ '--user', config['username'],
72
+ "-p#{config['password']}",
73
73
  connect,
74
74
  '--default-character-set', 'utf8',
75
- config[:database]
75
+ config['database']
76
76
  ].flatten
77
77
 
78
78
  File.open(path) do |f|
@@ -97,7 +97,7 @@ class DataMiner
97
97
  argv = [
98
98
  'psql',
99
99
  '--quiet',
100
- '--dbname', config[:database],
100
+ '--dbname', config['database'],
101
101
  '--file', path
102
102
  ].flatten
103
103
 
@@ -113,7 +113,7 @@ class DataMiner
113
113
  def sqlite3(path)
114
114
  argv = [
115
115
  'sqlite3',
116
- config[:database]
116
+ config['database']
117
117
  ]
118
118
  File.open(path) do |f|
119
119
  pid = POSIX::Spawn.spawn(*(argv+[{:in => f}]))
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.5.2'
2
+ VERSION = '3.0.0.alpha'
3
3
  end
@@ -2,37 +2,33 @@
2
2
  require 'helper'
3
3
  init_database
4
4
 
5
- class BreedBlue < ActiveRecord::Base
6
- self.table_name = 'breeds'
7
- self.primary_key = 'name'
5
+ class StateBlue < ActiveRecord::Base
6
+ self.table_name = 'states'
7
+ self.primary_key = 'postal_abbreviation'
8
8
  data_miner do
9
- sql "Brighter Planet's list of breeds (as a URL)", 'http://data.brighterplanet.com/breeds.sql'
9
+ sql "Brighter Planet's list of states (as a URL)", 'http://data.brighterplanet.com/states.sql'
10
10
  end
11
11
  end
12
12
 
13
- class BreedRed < ActiveRecord::Base
14
- self.table_name = 'breeds'
15
- self.primary_key = 'name'
13
+ class StateRed < ActiveRecord::Base
14
+ self.table_name = 'states'
15
+ self.primary_key = 'postal_abbreviation'
16
16
  data_miner do
17
- sql "Brighter Planet's list of breeds (as a URL)", 'http://data.brighterplanet.com/breeds.sql'
18
- sql "Mess up weights", %{UPDATE breeds SET weight = 999}
17
+ sql "Brighter Planet's list of states (as a URL)", 'http://data.brighterplanet.com/states.sql'
18
+ sql "Mess up weights", %{UPDATE states SET name = 'Foobar'}
19
19
  end
20
20
  end
21
21
 
22
22
  describe DataMiner::Step::Sql do
23
23
  before do
24
- BreedBlue.delete_all rescue nil
24
+ StateBlue.delete_all rescue nil
25
25
  end
26
26
  it "can be provided as a URL" do
27
- BreedBlue.run_data_miner!
28
- BreedBlue.where(:name => 'Affenpinscher').count.must_equal 1
29
- BreedBlue.where(:name => 'Württemberger').count.must_equal 1
30
- BreedBlue.find('Afghan Hound').weight.must_be_close_to 24.9476
27
+ StateBlue.run_data_miner!
28
+ StateBlue.where(:name => 'Wisconsin').count.must_equal 1
31
29
  end
32
30
  it "can be provided as a string" do
33
- BreedRed.run_data_miner!
34
- BreedRed.where(:name => 'Affenpinscher').count.must_equal 1
35
- BreedRed.where(:name => 'Württemberger').count.must_equal 1
36
- BreedRed.find('Afghan Hound').weight.must_be_close_to 999
31
+ StateRed.run_data_miner!
32
+ StateRed.find('NJ').name.must_equal 'Foobar'
37
33
  end
38
34
  end
@@ -1,36 +1,4 @@
1
1
  require 'helper'
2
2
 
3
3
  describe DataMiner::Attribute do
4
- before do
5
- DataMiner.unit_converter = :alchemist
6
- end
7
-
8
- describe '#convert?' do
9
- it 'returns true if from_units is set' do
10
- attribute = DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
11
- assert attribute.send(:convert?)
12
- end
13
- it 'returns true if to_units and units_field_name are set' do
14
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar', :to_units => :kilograms
15
- assert attribute.send(:convert?)
16
- end
17
- it 'returns true if to_units and units_field_number are set' do
18
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 3, :to_units => :kilograms
19
- assert attribute.send(:convert?)
20
- end
21
- it 'returns false if units_field_name only is set' do
22
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar'
23
- refute attribute.send(:convert?)
24
- end
25
- it 'returns false if units_field_number only is set' do
26
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 'bar'
27
- refute attribute.send(:convert?)
28
- end
29
- it 'raises if no converter and units are used' do
30
- DataMiner.unit_converter = nil
31
- lambda {
32
- DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
33
- }.must_raise ArgumentError, /unit_converter/
34
- end
35
- end
36
4
  end
data/test/helper.rb CHANGED
@@ -15,14 +15,15 @@ MiniTest::Reporters.use!
15
15
  require 'active_record'
16
16
  require 'logger'
17
17
  ActiveRecord::Base.logger = Logger.new $stderr
18
- ActiveRecord::Base.logger.level = Logger::INFO
19
- # ActiveRecord::Base.logger.level = Logger::DEBUG
18
+ ActiveRecord::Base.logger.level = (ENV['VERBOSE'] == 'true') ? Logger::DEBUG : Logger::INFO
20
19
 
21
20
  ActiveRecord::Base.mass_assignment_sanitizer = :strict
22
21
 
22
+ require 'active_record_inline_schema'
23
+
23
24
  require 'data_miner'
24
25
 
25
- def init_database(unit_converter = :conversions)
26
+ def init_database
26
27
  case ENV['DATABASE']
27
28
  when /postgr/i
28
29
  system %{dropdb test_data_miner}
@@ -46,12 +47,6 @@ def init_database(unit_converter = :conversions)
46
47
  'password' => 'password'
47
48
  )
48
49
  end
49
-
50
- DataMiner::Run.auto_upgrade!
51
- DataMiner::Run::ColumnStatistic.auto_upgrade!
52
- DataMiner::Run.clear_locks
53
-
54
- DataMiner.unit_converter = unit_converter
55
50
  end
56
51
 
57
52
  def init_models
@@ -2,12 +2,8 @@ require 'helper'
2
2
 
3
3
  describe 'DataMiner with Alchemist' do
4
4
  before do
5
- init_database(:alchemist)
5
+ init_database
6
6
  init_models
7
7
  Pet.run_data_miner!
8
8
  end
9
-
10
- it 'converts convertible units' do
11
- Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to.kilograms.to_f
12
- end
13
9
  end
data/test/support/pet.rb CHANGED
@@ -9,26 +9,27 @@ class Pet < ActiveRecord::Base
9
9
  col :breed_id
10
10
  col :color_id
11
11
  col :age, :type => :integer
12
- col :age_units
13
12
  col :weight, :type => :float
14
- col :weight_units
15
13
  col :height, :type => :float
16
- col :height_units
17
14
  col :favorite_food
18
15
  col :command_phrase
16
+ col :emphatic_command_phrase
19
17
  belongs_to :breed
20
18
  data_miner do
21
19
  process :auto_upgrade!
22
20
  process :run_data_miner_on_parent_associations!
23
21
  import("A list of pets", :url => "file://#{PETS}") do
24
22
  key :name
25
- store :age, :units_field_name => 'age_units'
26
- store :breed_id, :field_name => :breed, :nullify_blank_strings => true
27
- store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
28
- store :weight, :from_units => :pounds, :to_units => :kilograms
29
- store :favorite_food, :nullify_blank_strings => true
23
+ store :age
24
+ store :breed_id, :field_name => :breed
25
+ store :color_id, :field_name => :color, :dictionary => RemoteTable.new("file://#{COLOR_DICTIONARY_ENGLISH}").inject({}) { |memo, row| memo[row['input']] = row['output']; memo }
26
+ store :weight
27
+ store :favorite_food
30
28
  store :command_phrase
31
- store :height, :units => :millimetres
29
+ store :height
30
+ store :emphatic_command_phrase do |row|
31
+ (row['command_phrase'] + "!!!!!") if row['command_phrase']
32
+ end
32
33
  end
33
34
  end
34
35
  end
data/test/support/pet2.rb CHANGED
@@ -15,7 +15,7 @@ class Pet2 < ActiveRecord::Base
15
15
  end
16
16
  import("Breed numbers based on license number", :url => "file://#{BREED_BY_LICENSE_NUMBER}") do
17
17
  key :license_number
18
- store :breed_id, :field_name => :breed, :nullify_blank_strings => true
18
+ store :breed_id, :field_name => :breed
19
19
  end
20
20
  end
21
21
  end
@@ -2,5 +2,5 @@ license_number,name,breed,color,age,age_units,weight,height,favorite_food,comman
2
2
  111,Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
3
  222,Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
4
  333,Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
- 444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
- 555,Nemo,,,,,,,,
5
+ 444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," "," oh ok "
6
+ 555,Nemo,,,,,,,,