data_miner 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -50,7 +50,8 @@ Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't kno
50
50
 
51
51
  namespace :data_miner do
52
52
  task :run => :environment do
53
- DataMiner.run :resource_names => ENV['RESOURCES'].to_s.split(/\s*,\s*/).flatten.compact
53
+ resource_names = %w{R RESOURCES RESOURCE RESOURCE_NAMES}.map { |possible_key| ENV[possible_key].to_s }.join.split(/\s*,\s*/).flatten.compact
54
+ DataMiner.run :resource_names => resource_names
54
55
  end
55
56
  end
56
57
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.4.1
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.0"
8
+ s.version = "0.4.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -63,7 +63,10 @@ module DataMiner
63
63
  return false if !wants_overwriting? and !record.send(name).nil?
64
64
  what_it_was = record.send name
65
65
  what_it_should_be = value_from_row row
66
+
66
67
  record.send "#{name}=", what_it_should_be
68
+ record.send "#{name}_units=", (to_units || unit_from_source(row)).to_s if wants_units?
69
+
67
70
  what_it_is = record.send name
68
71
  if what_it_is.nil? and !what_it_should_be.nil?
69
72
  DataMiner.logger.info "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
@@ -76,10 +79,11 @@ module DataMiner
76
79
  end
77
80
 
78
81
  def unit_from_source(row)
79
- row[units_field_name].to_s.strip.underscore.to_sym
82
+ row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
80
83
  end
81
84
 
82
85
  def do_convert(row, value)
86
+ logger.error "[data_miner gem] If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
83
87
  value.to_f.convert((from_units || unit_from_source(row)), to_units)
84
88
  end
85
89
 
@@ -125,7 +129,10 @@ module DataMiner
125
129
  overwrite != false
126
130
  end
127
131
  def wants_conversion?
128
- from_units.present? or units_field_name.present?
132
+ from_units.present? or units_field_name.present? or units_field_number.present?
133
+ end
134
+ def wants_units?
135
+ to_units.present? or units_field_name.present? or units_field_number.present?
129
136
  end
130
137
  def wants_dictionary?
131
138
  options[:dictionary].present?
@@ -156,7 +163,7 @@ module DataMiner
156
163
  options[:from_units]
157
164
  end
158
165
  def to_units
159
- options[:to_units]
166
+ options[:to_units] || options[:units]
160
167
  end
161
168
  def conditions
162
169
  options[:conditions]
@@ -176,6 +183,9 @@ module DataMiner
176
183
  def units_field_name
177
184
  options[:units_field_name]
178
185
  end
186
+ def units_field_number
187
+ options[:units_field_number]
188
+ end
179
189
  def field_number
180
190
  options[:field_number]
181
191
  end
@@ -2,18 +2,17 @@ module DataMiner
2
2
  class Configuration
3
3
  include Blockenspiel::DSL
4
4
 
5
- attr_accessor :resource, :runnables, :runnable_counter, :attributes, :unique_indices
5
+ attr_accessor :resource, :runnables, :runnable_counter, :attributes
6
6
 
7
7
  def initialize(resource)
8
8
  @runnables = Array.new
9
- @unique_indices = Set.new
10
9
  @resource = resource
11
10
  @runnable_counter = 0
12
11
  @attributes = HashWithIndifferentAccess.new
13
12
  end
14
-
15
- def unique_index(*args)
16
- args.each { |arg| unique_indices.add arg }
13
+
14
+ def logger
15
+ DataMiner.logger
17
16
  end
18
17
 
19
18
  def process(method_name_or_block_description, &block)
@@ -51,8 +50,97 @@ module DataMiner
51
50
  nil
52
51
  end
53
52
 
53
+ def import_runnables
54
+ runnables.select { |runnable| runnable.is_a? Import }
55
+ end
56
+
57
+ def before_invoke
58
+
59
+ end
60
+
61
+ def after_invoke
62
+ make_sure_unit_definitions_make_sense
63
+ suggest_missing_column_migrations
64
+ end
65
+
66
+ COMPLETE_UNIT_DEFINITIONS = [
67
+ [:units],
68
+ [:from_units, :to_units],
69
+ [:units_field_name],
70
+ [:units_field_name, :to_units],
71
+ [:units_field_number],
72
+ [:units_field_number, :to_units]
73
+ ]
74
+
75
+ def make_sure_unit_definitions_make_sense
76
+ import_runnables.each do |runnable|
77
+ runnable.attributes.each do |_, attribute|
78
+ if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
79
+ logger.error %{
80
+
81
+ ================================
82
+
83
+ [data_miner gem] You don't have a valid unit definition for #{resource.name}##{attribute.name}.
84
+
85
+ You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
86
+
87
+ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence}".
88
+
89
+ ================================
90
+ }
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ def suggest_missing_column_migrations
97
+ missing_columns = Array.new
98
+ import_runnables.each do |runnable|
99
+ runnable.attributes.each do |_, attribute|
100
+ logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
101
+ unless resource.column_names.include? attribute.name
102
+ missing_columns << attribute.name
103
+ end
104
+ if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
105
+ missing_columns << units_column
106
+ end
107
+ end
108
+ end
109
+ missing_columns.uniq!
110
+ if missing_columns.any?
111
+ logger.error %{
112
+
113
+ ================================
114
+
115
+ [data_miner gem] On #{resource}, it looks like you're missing some columns...
116
+
117
+ Please run this...
118
+
119
+ ./script/generate migration AddMissingColumnsTo#{resource.name}
120
+
121
+ and **replace** the resulting file with this:
122
+
123
+ class AddMissingColumnsTo#{resource.name} < ActiveRecord::Migration
124
+ def self.up
125
+ #{missing_columns.map { |column_name| " add_column :#{resource.table_name}, :#{column_name}, :#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' }" }.join("\n") }
126
+ end
127
+
128
+ def self.down
129
+ #{missing_columns.map { |column_name| " remove_column :#{resource.table_name}, :#{column_name}" }.join("\n") }
130
+ end
131
+ end
132
+
133
+ On the other hand, if you're working directly with create_table, this might be helpful:
134
+
135
+ #{missing_columns.map { |column_name| "t.#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
136
+
137
+ ================================
138
+ }
139
+ end
140
+ end
141
+
54
142
  cattr_accessor :resource_names
55
- self.resource_names = Set.new
143
+ self.resource_names = Array.new
56
144
  class << self
57
145
  # Mine data. Defaults to all resource_names touched by DataMiner.
58
146
  #
@@ -28,7 +28,6 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
- raise "[data_miner gem] Column #{attr_name} doesn't exist on table #{resource.table_name}" unless resource.column_names.include?(attr_name)
32
31
  attributes[attr_name] = Attribute.new self, attr_name, attr_options
33
32
  end
34
33
 
data/lib/data_miner.rb CHANGED
@@ -17,16 +17,21 @@ require 'data_miner/run'
17
17
  module DataMiner
18
18
  class MissingHashColumn < RuntimeError; end
19
19
 
20
- include Log4r
20
+ include Log4r unless defined? Rails
21
21
 
22
22
  mattr_accessor :logger
23
23
 
24
24
  def self.start_logging
25
- if defined?(Rails)
25
+ if defined? Rails
26
26
  self.logger = Rails.logger
27
27
  else
28
+ info_outputter = FileOutputter.new 'f1', :filename => 'data_miner.log'
29
+ error_outputter = Outputter.stderr
30
+ info_outputter.only_at DEBUG, INFO
31
+ error_outputter.only_at WARN, ERROR, FATAL
32
+
28
33
  self.logger = Logger.new 'data_miner'
29
- logger.outputters = FileOutputter.new 'f1', :filename => 'data_miner.log'
34
+ logger.add info_outputter, error_outputter
30
35
  end
31
36
  end
32
37
 
@@ -50,7 +55,7 @@ ActiveRecord::Base.class_eval do
50
55
  return
51
56
  end
52
57
 
53
- DataMiner.resource_names.add self.name
58
+ DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
54
59
  DataMiner.create_tables
55
60
 
56
61
  belongs_to :data_miner_last_run, :class_name => 'DataMiner::Run'
@@ -68,6 +73,8 @@ ActiveRecord::Base.class_eval do
68
73
  self.data_miner_config = DataMiner::Configuration.new self
69
74
 
70
75
  Blockenspiel.invoke block, data_miner_config
76
+
77
+ data_miner_config.after_invoke
71
78
  end
72
79
  end
73
80
 
@@ -233,6 +233,8 @@ class AutomobileVariant < ActiveRecord::Base
233
233
  store 'model_name', :field_name => 'model'
234
234
  store 'year'
235
235
  store 'fuel_type_code', :field_name => 'fuel_type'
236
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
237
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
236
238
  store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
237
239
  store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
238
240
  store 'cylinders', :field_name => 'no_cyc'
@@ -267,6 +269,8 @@ class AutomobileVariant < ActiveRecord::Base
267
269
  store 'make_name', :field_name => 'make'
268
270
  store 'model_name', :field_name => 'model'
269
271
  store 'fuel_type_code', :field_name => 'fl'
272
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
273
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
270
274
  store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
271
275
  store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
272
276
  store 'cylinders', :field_name => 'cyl'
@@ -297,6 +301,8 @@ class AutomobileVariant < ActiveRecord::Base
297
301
  store 'make_name', :field_name => 'make'
298
302
  store 'model_name', :field_name => 'model'
299
303
  store 'fuel_type_code', :field_name => 'FUEL TYPE'
304
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
305
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
300
306
  store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
301
307
  store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
302
308
  store 'cylinders', :field_name => 'NUMB CYL'
@@ -668,7 +674,7 @@ class DataMinerTest < Test::Unit::TestCase
668
674
  assert_equal a, b
669
675
  end
670
676
 
671
- should "hash things if no unique index is listed" do
677
+ should "hash things" do
672
678
  AutomobileVariant.data_miner_config.runnables[0].run(nil)
673
679
  assert AutomobileVariant.first.row_hash.present?
674
680
  end
@@ -734,9 +740,9 @@ class DataMinerTest < Test::Unit::TestCase
734
740
  assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
735
741
  end
736
742
 
737
- should "mine residence survey day" do
738
- ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
739
- assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
740
- end
743
+ # should "mine residence survey day" do
744
+ # ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
745
+ # assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
746
+ # end
741
747
  end
742
748
  end
data/test/test_helper.rb CHANGED
@@ -66,11 +66,6 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
66
66
  execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
67
67
 
68
68
  create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
69
- t.string "automobile_make_id"
70
- t.string "automobile_model_id"
71
- t.string "automobile_model_year_id"
72
- t.string "automobile_fuel_type_id"
73
-
74
69
  t.float "fuel_efficiency_city"
75
70
  t.float "fuel_efficiency_highway"
76
71
  t.string "make_name"
@@ -94,6 +89,12 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
94
89
  t.boolean "injection"
95
90
  t.string "carline_class_name"
96
91
  t.string "speeds"
92
+
93
+ t.string 'raw_fuel_efficiency_highway_units'
94
+ t.string 'raw_fuel_efficiency_city_units'
95
+ t.string 'fuel_efficiency_highway_units'
96
+ t.string 'fuel_efficiency_city_units'
97
+
97
98
  t.string "row_hash"
98
99
  t.integer 'data_miner_touch_count'
99
100
  t.integer 'data_miner_last_run_id'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere