data_miner 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -50,7 +50,8 @@ Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't kno
50
50
 
51
51
  namespace :data_miner do
52
52
  task :run => :environment do
53
- DataMiner.run :resource_names => ENV['RESOURCES'].to_s.split(/\s*,\s*/).flatten.compact
53
+ resource_names = %w{R RESOURCES RESOURCE RESOURCE_NAMES}.map { |possible_key| ENV[possible_key].to_s }.join.split(/\s*,\s*/).flatten.compact
54
+ DataMiner.run :resource_names => resource_names
54
55
  end
55
56
  end
56
57
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.4.1
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.0"
8
+ s.version = "0.4.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -63,7 +63,10 @@ module DataMiner
63
63
  return false if !wants_overwriting? and !record.send(name).nil?
64
64
  what_it_was = record.send name
65
65
  what_it_should_be = value_from_row row
66
+
66
67
  record.send "#{name}=", what_it_should_be
68
+ record.send "#{name}_units=", (to_units || unit_from_source(row)).to_s if wants_units?
69
+
67
70
  what_it_is = record.send name
68
71
  if what_it_is.nil? and !what_it_should_be.nil?
69
72
  DataMiner.logger.info "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
@@ -76,10 +79,11 @@ module DataMiner
76
79
  end
77
80
 
78
81
  def unit_from_source(row)
79
- row[units_field_name].to_s.strip.underscore.to_sym
82
+ row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
80
83
  end
81
84
 
82
85
  def do_convert(row, value)
86
+ logger.error "[data_miner gem] If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
83
87
  value.to_f.convert((from_units || unit_from_source(row)), to_units)
84
88
  end
85
89
 
@@ -125,7 +129,10 @@ module DataMiner
125
129
  overwrite != false
126
130
  end
127
131
  def wants_conversion?
128
- from_units.present? or units_field_name.present?
132
+ from_units.present? or units_field_name.present? or units_field_number.present?
133
+ end
134
+ def wants_units?
135
+ to_units.present? or units_field_name.present? or units_field_number.present?
129
136
  end
130
137
  def wants_dictionary?
131
138
  options[:dictionary].present?
@@ -156,7 +163,7 @@ module DataMiner
156
163
  options[:from_units]
157
164
  end
158
165
  def to_units
159
- options[:to_units]
166
+ options[:to_units] || options[:units]
160
167
  end
161
168
  def conditions
162
169
  options[:conditions]
@@ -176,6 +183,9 @@ module DataMiner
176
183
  def units_field_name
177
184
  options[:units_field_name]
178
185
  end
186
+ def units_field_number
187
+ options[:units_field_number]
188
+ end
179
189
  def field_number
180
190
  options[:field_number]
181
191
  end
@@ -2,18 +2,17 @@ module DataMiner
2
2
  class Configuration
3
3
  include Blockenspiel::DSL
4
4
 
5
- attr_accessor :resource, :runnables, :runnable_counter, :attributes, :unique_indices
5
+ attr_accessor :resource, :runnables, :runnable_counter, :attributes
6
6
 
7
7
  def initialize(resource)
8
8
  @runnables = Array.new
9
- @unique_indices = Set.new
10
9
  @resource = resource
11
10
  @runnable_counter = 0
12
11
  @attributes = HashWithIndifferentAccess.new
13
12
  end
14
-
15
- def unique_index(*args)
16
- args.each { |arg| unique_indices.add arg }
13
+
14
+ def logger
15
+ DataMiner.logger
17
16
  end
18
17
 
19
18
  def process(method_name_or_block_description, &block)
@@ -51,8 +50,97 @@ module DataMiner
51
50
  nil
52
51
  end
53
52
 
53
+ def import_runnables
54
+ runnables.select { |runnable| runnable.is_a? Import }
55
+ end
56
+
57
+ def before_invoke
58
+
59
+ end
60
+
61
+ def after_invoke
62
+ make_sure_unit_definitions_make_sense
63
+ suggest_missing_column_migrations
64
+ end
65
+
66
+ COMPLETE_UNIT_DEFINITIONS = [
67
+ [:units],
68
+ [:from_units, :to_units],
69
+ [:units_field_name],
70
+ [:units_field_name, :to_units],
71
+ [:units_field_number],
72
+ [:units_field_number, :to_units]
73
+ ]
74
+
75
+ def make_sure_unit_definitions_make_sense
76
+ import_runnables.each do |runnable|
77
+ runnable.attributes.each do |_, attribute|
78
+ if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
79
+ logger.error %{
80
+
81
+ ================================
82
+
83
+ [data_miner gem] You don't have a valid unit definition for #{resource.name}##{attribute.name}.
84
+
85
+ You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
86
+
87
+ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence}".
88
+
89
+ ================================
90
+ }
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ def suggest_missing_column_migrations
97
+ missing_columns = Array.new
98
+ import_runnables.each do |runnable|
99
+ runnable.attributes.each do |_, attribute|
100
+ logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
101
+ unless resource.column_names.include? attribute.name
102
+ missing_columns << attribute.name
103
+ end
104
+ if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
105
+ missing_columns << units_column
106
+ end
107
+ end
108
+ end
109
+ missing_columns.uniq!
110
+ if missing_columns.any?
111
+ logger.error %{
112
+
113
+ ================================
114
+
115
+ [data_miner gem] On #{resource}, it looks like you're missing some columns...
116
+
117
+ Please run this...
118
+
119
+ ./script/generate migration AddMissingColumnsTo#{resource.name}
120
+
121
+ and **replace** the resulting file with this:
122
+
123
+ class AddMissingColumnsTo#{resource.name} < ActiveRecord::Migration
124
+ def self.up
125
+ #{missing_columns.map { |column_name| " add_column :#{resource.table_name}, :#{column_name}, :#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' }" }.join("\n") }
126
+ end
127
+
128
+ def self.down
129
+ #{missing_columns.map { |column_name| " remove_column :#{resource.table_name}, :#{column_name}" }.join("\n") }
130
+ end
131
+ end
132
+
133
+ On the other hand, if you're working directly with create_table, this might be helpful:
134
+
135
+ #{missing_columns.map { |column_name| "t.#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
136
+
137
+ ================================
138
+ }
139
+ end
140
+ end
141
+
54
142
  cattr_accessor :resource_names
55
- self.resource_names = Set.new
143
+ self.resource_names = Array.new
56
144
  class << self
57
145
  # Mine data. Defaults to all resource_names touched by DataMiner.
58
146
  #
@@ -28,7 +28,6 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
- raise "[data_miner gem] Column #{attr_name} doesn't exist on table #{resource.table_name}" unless resource.column_names.include?(attr_name)
32
31
  attributes[attr_name] = Attribute.new self, attr_name, attr_options
33
32
  end
34
33
 
data/lib/data_miner.rb CHANGED
@@ -17,16 +17,21 @@ require 'data_miner/run'
17
17
  module DataMiner
18
18
  class MissingHashColumn < RuntimeError; end
19
19
 
20
- include Log4r
20
+ include Log4r unless defined? Rails
21
21
 
22
22
  mattr_accessor :logger
23
23
 
24
24
  def self.start_logging
25
- if defined?(Rails)
25
+ if defined? Rails
26
26
  self.logger = Rails.logger
27
27
  else
28
+ info_outputter = FileOutputter.new 'f1', :filename => 'data_miner.log'
29
+ error_outputter = Outputter.stderr
30
+ info_outputter.only_at DEBUG, INFO
31
+ error_outputter.only_at WARN, ERROR, FATAL
32
+
28
33
  self.logger = Logger.new 'data_miner'
29
- logger.outputters = FileOutputter.new 'f1', :filename => 'data_miner.log'
34
+ logger.add info_outputter, error_outputter
30
35
  end
31
36
  end
32
37
 
@@ -50,7 +55,7 @@ ActiveRecord::Base.class_eval do
50
55
  return
51
56
  end
52
57
 
53
- DataMiner.resource_names.add self.name
58
+ DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
54
59
  DataMiner.create_tables
55
60
 
56
61
  belongs_to :data_miner_last_run, :class_name => 'DataMiner::Run'
@@ -68,6 +73,8 @@ ActiveRecord::Base.class_eval do
68
73
  self.data_miner_config = DataMiner::Configuration.new self
69
74
 
70
75
  Blockenspiel.invoke block, data_miner_config
76
+
77
+ data_miner_config.after_invoke
71
78
  end
72
79
  end
73
80
 
@@ -233,6 +233,8 @@ class AutomobileVariant < ActiveRecord::Base
233
233
  store 'model_name', :field_name => 'model'
234
234
  store 'year'
235
235
  store 'fuel_type_code', :field_name => 'fuel_type'
236
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
237
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
236
238
  store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
237
239
  store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
238
240
  store 'cylinders', :field_name => 'no_cyc'
@@ -267,6 +269,8 @@ class AutomobileVariant < ActiveRecord::Base
267
269
  store 'make_name', :field_name => 'make'
268
270
  store 'model_name', :field_name => 'model'
269
271
  store 'fuel_type_code', :field_name => 'fl'
272
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
273
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
270
274
  store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
271
275
  store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
272
276
  store 'cylinders', :field_name => 'cyl'
@@ -297,6 +301,8 @@ class AutomobileVariant < ActiveRecord::Base
297
301
  store 'make_name', :field_name => 'make'
298
302
  store 'model_name', :field_name => 'model'
299
303
  store 'fuel_type_code', :field_name => 'FUEL TYPE'
304
+ store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
305
+ store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
300
306
  store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
301
307
  store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
302
308
  store 'cylinders', :field_name => 'NUMB CYL'
@@ -668,7 +674,7 @@ class DataMinerTest < Test::Unit::TestCase
668
674
  assert_equal a, b
669
675
  end
670
676
 
671
- should "hash things if no unique index is listed" do
677
+ should "hash things" do
672
678
  AutomobileVariant.data_miner_config.runnables[0].run(nil)
673
679
  assert AutomobileVariant.first.row_hash.present?
674
680
  end
@@ -734,9 +740,9 @@ class DataMinerTest < Test::Unit::TestCase
734
740
  assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
735
741
  end
736
742
 
737
- should "mine residence survey day" do
738
- ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
739
- assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
740
- end
743
+ # should "mine residence survey day" do
744
+ # ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
745
+ # assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
746
+ # end
741
747
  end
742
748
  end
data/test/test_helper.rb CHANGED
@@ -66,11 +66,6 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
66
66
  execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
67
67
 
68
68
  create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
69
- t.string "automobile_make_id"
70
- t.string "automobile_model_id"
71
- t.string "automobile_model_year_id"
72
- t.string "automobile_fuel_type_id"
73
-
74
69
  t.float "fuel_efficiency_city"
75
70
  t.float "fuel_efficiency_highway"
76
71
  t.string "make_name"
@@ -94,6 +89,12 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
94
89
  t.boolean "injection"
95
90
  t.string "carline_class_name"
96
91
  t.string "speeds"
92
+
93
+ t.string 'raw_fuel_efficiency_highway_units'
94
+ t.string 'raw_fuel_efficiency_city_units'
95
+ t.string 'fuel_efficiency_highway_units'
96
+ t.string 'fuel_efficiency_city_units'
97
+
97
98
  t.string "row_hash"
98
99
  t.integer 'data_miner_touch_count'
99
100
  t.integer 'data_miner_last_run_id'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere