data_miner 2.5.2 → 3.0.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -202,7 +202,7 @@ class DataMiner
202
202
  # @note Normally you should use +Country.run_data_miner!+
203
203
  # @note A primitive "call stack" is kept that will prevent infinite loops. So, if Country's data miner script calls Province's AND vice-versa, each one will only be run once.
204
204
  #
205
- # @return [DataMiner::Run]
205
+ # @return nil
206
206
  def start
207
207
  model_name = model.name
208
208
  # $stderr.write "0 - #{model_name}\n"
@@ -217,17 +217,11 @@ class DataMiner
217
217
  Script.current_stack.clear
218
218
  end
219
219
  Script.current_stack << model_name
220
- unless Run.table_exists?
221
- Run.auto_upgrade!
222
- end
223
- run = Run.new
224
- run.model_name = model_name
225
- run.start do
226
- steps.each do |step|
227
- step.start
228
- model.reset_column_information
229
- end
220
+ steps.each do |step|
221
+ step.start
222
+ model.reset_column_information
230
223
  end
224
+ nil
231
225
  end
232
226
 
233
227
  private
@@ -22,24 +22,24 @@ class DataMiner
22
22
 
23
23
  # @private
24
24
  def initialize(script, description, settings, &blk)
25
- settings = settings.symbolize_keys
26
- if settings.has_key?(:table)
25
+ settings = settings.stringify_keys
26
+ if settings.has_key?('table')
27
27
  raise ::ArgumentError, %{[data_miner] :table is no longer an allowed setting.}
28
28
  end
29
- if (errata_settings = settings[:errata]) and not errata_settings.is_a?(::Hash)
29
+ if (errata_settings = settings['errata']) and not errata_settings.is_a?(::Hash)
30
30
  raise ::ArgumentError, %{[data_miner] :errata must be a hash of initialization settings to Errata}
31
31
  end
32
32
  @script = script
33
33
  @attributes = ::ActiveSupport::OrderedHash.new
34
- @validate_query = !!settings[:validate]
34
+ @validate_query = !!settings['validate']
35
35
  @description = description
36
- if settings.has_key? :errata
37
- errata_settings = settings[:errata].symbolize_keys
38
- errata_settings[:responder] ||= model
39
- settings[:errata] = errata_settings
36
+ if settings.has_key? 'errata'
37
+ errata_settings = settings['errata'].stringify_keys
38
+ errata_settings['responder'] ||= model
39
+ settings['errata'] = errata_settings
40
40
  end
41
41
  @table_settings = settings.dup
42
- @table_settings[:streaming] = true
42
+ @table_settings['streaming'] = true
43
43
  @table_mutex = ::Mutex.new
44
44
  instance_eval(&blk)
45
45
  end
@@ -48,17 +48,17 @@ class DataMiner
48
48
  #
49
49
  # @see DataMiner::Attribute The actual Attribute class.
50
50
  #
51
- # @param [Symbol] attr_name The name of the local model column.
51
+ # @param [String] attr_name The name of the local model column.
52
52
  # @param [optional, Hash] attr_options Options that will be passed to +DataMiner::Attribute.new+
53
53
  # @option attr_options [*] anything Any option for +DataMiner::Attribute+.
54
54
  #
55
55
  # @return [nil]
56
- def store(attr_name, attr_options = {})
57
- attr_name = attr_name.to_sym
56
+ def store(attr_name, attr_options = {}, &blk)
57
+ attr_name = attr_name.to_s
58
58
  if attributes.has_key? attr_name
59
59
  raise "You should only call store or key once for #{model.name}##{attr_name}"
60
60
  end
61
- attributes[attr_name] = DataMiner::Attribute.new self, attr_name, attr_options
61
+ attributes[attr_name] = DataMiner::Attribute.new self, attr_name, attr_options, &blk
62
62
  end
63
63
 
64
64
  # Store data into a model column AND use it as the key.
@@ -67,13 +67,13 @@ class DataMiner
67
67
  #
68
68
  # Enables idempotency. In other words, you can run the data miner script multiple times, get updated data, and not get duplicate rows.
69
69
  #
70
- # @param [Symbol] attr_name The name of the local model column.
70
+ # @param [String] attr_name The name of the local model column.
71
71
  # @param [optional, Hash] attr_options Options that will be passed to +DataMiner::Attribute.new+
72
72
  # @option attr_options [*] anything Any option for +DataMiner::Attribute+.
73
73
  #
74
74
  # @return [nil]
75
75
  def key(attr_name, attr_options = {})
76
- attr_name = attr_name.to_sym
76
+ attr_name = attr_name.to_s
77
77
  if attributes.has_key? attr_name
78
78
  raise "You should only call store or key once for #{model.name}##{attr_name}"
79
79
  end
@@ -83,9 +83,8 @@ class DataMiner
83
83
 
84
84
  # @private
85
85
  def start
86
- upsert_enabled? ? save_with_upsert : save_with_activerecord
86
+ upsert_enabled? ? save_with_upsert : save_with_find_or_initialize
87
87
  refresh
88
-
89
88
  nil
90
89
  end
91
90
 
@@ -101,13 +100,28 @@ class DataMiner
101
100
  (not validate?) and (storing_primary_key? or table_has_autoincrementing_primary_key?)
102
101
  end
103
102
 
103
+ def count_every
104
+ @count_every ||= ENV.fetch('DATA_MINER_COUNT_EVERY', -1).to_i
105
+ end
106
+
104
107
  def save_with_upsert
105
108
  c = model.connection_pool.checkout
109
+ attrs_except_key = attributes.except(@key).values
110
+ count = 0
106
111
  Upsert.stream(c, model.table_name) do |upsert|
107
112
  table.each do |row|
113
+ $stderr.puts "#{count}..." if count_every > 0 and count % count_every == 0
114
+ count += 1
108
115
  selector = @key ? { @key => attributes[@key].read(row) } : { model.primary_key => nil }
109
- document = attributes.except(@key).inject({}) do |memo, (_, attr)|
110
- memo.merge! attr.updates(row)
116
+ document = attrs_except_key.inject({}) do |memo, attr|
117
+ attr.updates(row).each do |k, v|
118
+ case memo[k]
119
+ when ::Hash
120
+ memo[k] = memo[k].merge v
121
+ else
122
+ memo[k] = v
123
+ end
124
+ end
111
125
  memo
112
126
  end
113
127
  upsert.row selector, document
@@ -116,8 +130,11 @@ class DataMiner
116
130
  model.connection_pool.checkin c
117
131
  end
118
132
 
119
- def save_with_activerecord
133
+ def save_with_find_or_initialize
134
+ count = 0
120
135
  table.each do |row|
136
+ $stderr.puts "#{count}..." if count_every > 0 and count % count_every == 0
137
+ count += 1
121
138
  record = @key ? model.send("find_or_initialize_by_#{@key}", attributes[@key].read(row)) : model.new
122
139
  attributes.each { |_, attr| attr.set_from_row record, row }
123
140
  record.save!
@@ -125,7 +142,7 @@ class DataMiner
125
142
  end
126
143
 
127
144
  def table_has_autoincrementing_primary_key?
128
- return @table_has_autoincrementing_primary_key_query.first if @table_has_autoincrementing_primary_key_query.is_a?(Array)
145
+ return @table_has_autoincrementing_primary_key_query if defined?(@table_has_autoincrementing_primary_key_query)
129
146
  c = model.connection_pool.checkout
130
147
  answer = if (pk = model.primary_key) and model.columns_hash[pk].type == :integer
131
148
  case c.adapter_name
@@ -143,14 +160,12 @@ class DataMiner
143
160
  end
144
161
  end
145
162
  model.connection_pool.checkin c
146
- @table_has_autoincrementing_primary_key_query = [answer]
147
- answer
163
+ @table_has_autoincrementing_primary_key_query = answer
148
164
  end
149
165
 
150
166
  def storing_primary_key?
151
- return @storing_primary_key_query.first if @storing_primary_key_query.is_a?(Array)
152
- @storing_primary_key_query = [model.primary_key && attributes.has_key?(model.primary_key.to_sym)]
153
- @storing_primary_key_query.first
167
+ return @storing_primary_key_query if defined?(@storing_primary_key_query)
168
+ @storing_primary_key_query = model.primary_key && attributes.has_key?(model.primary_key)
154
169
  end
155
170
 
156
171
  def table
@@ -161,7 +176,6 @@ class DataMiner
161
176
 
162
177
  def refresh
163
178
  @table = nil
164
- attributes.each { |_, attr| attr.refresh }
165
179
  nil
166
180
  end
167
181
  end
@@ -43,7 +43,7 @@ class DataMiner
43
43
  ActiveRecord::Base.connection.execute statement
44
44
  else
45
45
  tmp_path = UnixUtils.curl url
46
- send config[:adapter], tmp_path
46
+ send config['adapter'], tmp_path
47
47
  File.unlink tmp_path
48
48
  end
49
49
  end
@@ -55,24 +55,24 @@ class DataMiner
55
55
  ActiveRecord::Base.connection_config
56
56
  else
57
57
  ActiveRecord::Base.connection_pool.spec.config
58
- end
58
+ end.stringify_keys
59
59
  end
60
60
 
61
61
  def mysql(path)
62
- connect = if config[:socket]
63
- [ '--socket', config[:socket] ]
62
+ connect = if config['socket']
63
+ [ '--socket', config['socket'] ]
64
64
  else
65
- [ '--host', config.fetch(:host, '127.0.0.1'), '--port', config.fetch(:port, 3306).to_s ]
65
+ [ '--host', config.fetch('host', '127.0.0.1'), '--port', config.fetch('port', 3306).to_s ]
66
66
  end
67
67
 
68
68
  argv = [
69
69
  'mysql',
70
70
  '--compress',
71
- '--user', config[:username],
72
- "-p#{config[:password]}",
71
+ '--user', config['username'],
72
+ "-p#{config['password']}",
73
73
  connect,
74
74
  '--default-character-set', 'utf8',
75
- config[:database]
75
+ config['database']
76
76
  ].flatten
77
77
 
78
78
  File.open(path) do |f|
@@ -97,7 +97,7 @@ class DataMiner
97
97
  argv = [
98
98
  'psql',
99
99
  '--quiet',
100
- '--dbname', config[:database],
100
+ '--dbname', config['database'],
101
101
  '--file', path
102
102
  ].flatten
103
103
 
@@ -113,7 +113,7 @@ class DataMiner
113
113
  def sqlite3(path)
114
114
  argv = [
115
115
  'sqlite3',
116
- config[:database]
116
+ config['database']
117
117
  ]
118
118
  File.open(path) do |f|
119
119
  pid = POSIX::Spawn.spawn(*(argv+[{:in => f}]))
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '2.5.2'
2
+ VERSION = '3.0.0.alpha'
3
3
  end
@@ -2,37 +2,33 @@
2
2
  require 'helper'
3
3
  init_database
4
4
 
5
- class BreedBlue < ActiveRecord::Base
6
- self.table_name = 'breeds'
7
- self.primary_key = 'name'
5
+ class StateBlue < ActiveRecord::Base
6
+ self.table_name = 'states'
7
+ self.primary_key = 'postal_abbreviation'
8
8
  data_miner do
9
- sql "Brighter Planet's list of breeds (as a URL)", 'http://data.brighterplanet.com/breeds.sql'
9
+ sql "Brighter Planet's list of states (as a URL)", 'http://data.brighterplanet.com/states.sql'
10
10
  end
11
11
  end
12
12
 
13
- class BreedRed < ActiveRecord::Base
14
- self.table_name = 'breeds'
15
- self.primary_key = 'name'
13
+ class StateRed < ActiveRecord::Base
14
+ self.table_name = 'states'
15
+ self.primary_key = 'postal_abbreviation'
16
16
  data_miner do
17
- sql "Brighter Planet's list of breeds (as a URL)", 'http://data.brighterplanet.com/breeds.sql'
18
- sql "Mess up weights", %{UPDATE breeds SET weight = 999}
17
+ sql "Brighter Planet's list of states (as a URL)", 'http://data.brighterplanet.com/states.sql'
18
+ sql "Mess up weights", %{UPDATE states SET name = 'Foobar'}
19
19
  end
20
20
  end
21
21
 
22
22
  describe DataMiner::Step::Sql do
23
23
  before do
24
- BreedBlue.delete_all rescue nil
24
+ StateBlue.delete_all rescue nil
25
25
  end
26
26
  it "can be provided as a URL" do
27
- BreedBlue.run_data_miner!
28
- BreedBlue.where(:name => 'Affenpinscher').count.must_equal 1
29
- BreedBlue.where(:name => 'Württemberger').count.must_equal 1
30
- BreedBlue.find('Afghan Hound').weight.must_be_close_to 24.9476
27
+ StateBlue.run_data_miner!
28
+ StateBlue.where(:name => 'Wisconsin').count.must_equal 1
31
29
  end
32
30
  it "can be provided as a string" do
33
- BreedRed.run_data_miner!
34
- BreedRed.where(:name => 'Affenpinscher').count.must_equal 1
35
- BreedRed.where(:name => 'Württemberger').count.must_equal 1
36
- BreedRed.find('Afghan Hound').weight.must_be_close_to 999
31
+ StateRed.run_data_miner!
32
+ StateRed.find('NJ').name.must_equal 'Foobar'
37
33
  end
38
34
  end
@@ -1,36 +1,4 @@
1
1
  require 'helper'
2
2
 
3
3
  describe DataMiner::Attribute do
4
- before do
5
- DataMiner.unit_converter = :alchemist
6
- end
7
-
8
- describe '#convert?' do
9
- it 'returns true if from_units is set' do
10
- attribute = DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
11
- assert attribute.send(:convert?)
12
- end
13
- it 'returns true if to_units and units_field_name are set' do
14
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar', :to_units => :kilograms
15
- assert attribute.send(:convert?)
16
- end
17
- it 'returns true if to_units and units_field_number are set' do
18
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 3, :to_units => :kilograms
19
- assert attribute.send(:convert?)
20
- end
21
- it 'returns false if units_field_name only is set' do
22
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_name => 'bar'
23
- refute attribute.send(:convert?)
24
- end
25
- it 'returns false if units_field_number only is set' do
26
- attribute = DataMiner::Attribute.new :foo, 'bar', :units_field_number => 'bar'
27
- refute attribute.send(:convert?)
28
- end
29
- it 'raises if no converter and units are used' do
30
- DataMiner.unit_converter = nil
31
- lambda {
32
- DataMiner::Attribute.new :foo, 'bar', :from_units => :pounds, :to_units => :kilograms
33
- }.must_raise ArgumentError, /unit_converter/
34
- end
35
- end
36
4
  end
data/test/helper.rb CHANGED
@@ -15,14 +15,15 @@ MiniTest::Reporters.use!
15
15
  require 'active_record'
16
16
  require 'logger'
17
17
  ActiveRecord::Base.logger = Logger.new $stderr
18
- ActiveRecord::Base.logger.level = Logger::INFO
19
- # ActiveRecord::Base.logger.level = Logger::DEBUG
18
+ ActiveRecord::Base.logger.level = (ENV['VERBOSE'] == 'true') ? Logger::DEBUG : Logger::INFO
20
19
 
21
20
  ActiveRecord::Base.mass_assignment_sanitizer = :strict
22
21
 
22
+ require 'active_record_inline_schema'
23
+
23
24
  require 'data_miner'
24
25
 
25
- def init_database(unit_converter = :conversions)
26
+ def init_database
26
27
  case ENV['DATABASE']
27
28
  when /postgr/i
28
29
  system %{dropdb test_data_miner}
@@ -46,12 +47,6 @@ def init_database(unit_converter = :conversions)
46
47
  'password' => 'password'
47
48
  )
48
49
  end
49
-
50
- DataMiner::Run.auto_upgrade!
51
- DataMiner::Run::ColumnStatistic.auto_upgrade!
52
- DataMiner::Run.clear_locks
53
-
54
- DataMiner.unit_converter = unit_converter
55
50
  end
56
51
 
57
52
  def init_models
@@ -2,12 +2,8 @@ require 'helper'
2
2
 
3
3
  describe 'DataMiner with Alchemist' do
4
4
  before do
5
- init_database(:alchemist)
5
+ init_database
6
6
  init_models
7
7
  Pet.run_data_miner!
8
8
  end
9
-
10
- it 'converts convertible units' do
11
- Pet.find('Pierre').weight.must_be_close_to 4.4.pounds.to.kilograms.to_f
12
- end
13
9
  end
data/test/support/pet.rb CHANGED
@@ -9,26 +9,27 @@ class Pet < ActiveRecord::Base
9
9
  col :breed_id
10
10
  col :color_id
11
11
  col :age, :type => :integer
12
- col :age_units
13
12
  col :weight, :type => :float
14
- col :weight_units
15
13
  col :height, :type => :float
16
- col :height_units
17
14
  col :favorite_food
18
15
  col :command_phrase
16
+ col :emphatic_command_phrase
19
17
  belongs_to :breed
20
18
  data_miner do
21
19
  process :auto_upgrade!
22
20
  process :run_data_miner_on_parent_associations!
23
21
  import("A list of pets", :url => "file://#{PETS}") do
24
22
  key :name
25
- store :age, :units_field_name => 'age_units'
26
- store :breed_id, :field_name => :breed, :nullify_blank_strings => true
27
- store :color_id, :field_name => :color, :dictionary => { :url => "file://#{COLOR_DICTIONARY_ENGLISH}", :input => :input, :output => :output }
28
- store :weight, :from_units => :pounds, :to_units => :kilograms
29
- store :favorite_food, :nullify_blank_strings => true
23
+ store :age
24
+ store :breed_id, :field_name => :breed
25
+ store :color_id, :field_name => :color, :dictionary => RemoteTable.new("file://#{COLOR_DICTIONARY_ENGLISH}").inject({}) { |memo, row| memo[row['input']] = row['output']; memo }
26
+ store :weight
27
+ store :favorite_food
30
28
  store :command_phrase
31
- store :height, :units => :millimetres
29
+ store :height
30
+ store :emphatic_command_phrase do |row|
31
+ (row['command_phrase'] + "!!!!!") if row['command_phrase']
32
+ end
32
33
  end
33
34
  end
34
35
  end
data/test/support/pet2.rb CHANGED
@@ -15,7 +15,7 @@ class Pet2 < ActiveRecord::Base
15
15
  end
16
16
  import("Breed numbers based on license number", :url => "file://#{BREED_BY_LICENSE_NUMBER}") do
17
17
  key :license_number
18
- store :breed_id, :field_name => :breed, :nullify_blank_strings => true
18
+ store :breed_id, :field_name => :breed
19
19
  end
20
20
  end
21
21
  end
@@ -2,5 +2,5 @@ license_number,name,breed,color,age,age_units,weight,height,favorite_food,comman
2
2
  111,Pierre,Tabby,GO,4,years,4.4,"3.000,5",tomato,"eh"
3
3
  222,Jerry,Beagle,BR/BL,5,years,10,"3,000.0",cheese,"che"
4
4
  333,Amigo,Spanish Lizarto,GR/BU,17,years," ","300,5",crickets," "
5
- 444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," ",
6
- 555,Nemo,,,,,,,,
5
+ 444,Johnny,Beagle,BR/BL,2,years,20,"4,000"," "," oh ok "
6
+ 555,Nemo,,,,,,,,