data_miner 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +2 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/lib/data_miner/attribute.rb +13 -3
- data/lib/data_miner/configuration.rb +94 -6
- data/lib/data_miner/import.rb +0 -1
- data/lib/data_miner.rb +11 -4
- data/test/data_miner_test.rb +11 -5
- data/test/test_helper.rb +6 -5
- metadata +1 -1
data/README.rdoc
CHANGED
@@ -50,7 +50,8 @@ Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't kno
|
|
50
50
|
|
51
51
|
namespace :data_miner do
|
52
52
|
task :run => :environment do
|
53
|
-
|
53
|
+
resource_names = %w{R RESOURCES RESOURCE RESOURCE_NAMES}.map { |possible_key| ENV[possible_key].to_s }.join.split(/\s*,\s*/).flatten.compact
|
54
|
+
DataMiner.run :resource_names => resource_names
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/data_miner.gemspec
CHANGED
data/lib/data_miner/attribute.rb
CHANGED
@@ -63,7 +63,10 @@ module DataMiner
|
|
63
63
|
return false if !wants_overwriting? and !record.send(name).nil?
|
64
64
|
what_it_was = record.send name
|
65
65
|
what_it_should_be = value_from_row row
|
66
|
+
|
66
67
|
record.send "#{name}=", what_it_should_be
|
68
|
+
record.send "#{name}_units=", (to_units || unit_from_source(row)).to_s if wants_units?
|
69
|
+
|
67
70
|
what_it_is = record.send name
|
68
71
|
if what_it_is.nil? and !what_it_should_be.nil?
|
69
72
|
DataMiner.logger.info "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
|
@@ -76,10 +79,11 @@ module DataMiner
|
|
76
79
|
end
|
77
80
|
|
78
81
|
def unit_from_source(row)
|
79
|
-
row[units_field_name].to_s.strip.underscore.to_sym
|
82
|
+
row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
|
80
83
|
end
|
81
84
|
|
82
85
|
def do_convert(row, value)
|
86
|
+
logger.error "[data_miner gem] If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
|
83
87
|
value.to_f.convert((from_units || unit_from_source(row)), to_units)
|
84
88
|
end
|
85
89
|
|
@@ -125,7 +129,10 @@ module DataMiner
|
|
125
129
|
overwrite != false
|
126
130
|
end
|
127
131
|
def wants_conversion?
|
128
|
-
from_units.present? or units_field_name.present?
|
132
|
+
from_units.present? or units_field_name.present? or units_field_number.present?
|
133
|
+
end
|
134
|
+
def wants_units?
|
135
|
+
to_units.present? or units_field_name.present? or units_field_number.present?
|
129
136
|
end
|
130
137
|
def wants_dictionary?
|
131
138
|
options[:dictionary].present?
|
@@ -156,7 +163,7 @@ module DataMiner
|
|
156
163
|
options[:from_units]
|
157
164
|
end
|
158
165
|
def to_units
|
159
|
-
options[:to_units]
|
166
|
+
options[:to_units] || options[:units]
|
160
167
|
end
|
161
168
|
def conditions
|
162
169
|
options[:conditions]
|
@@ -176,6 +183,9 @@ module DataMiner
|
|
176
183
|
def units_field_name
|
177
184
|
options[:units_field_name]
|
178
185
|
end
|
186
|
+
def units_field_number
|
187
|
+
options[:units_field_number]
|
188
|
+
end
|
179
189
|
def field_number
|
180
190
|
options[:field_number]
|
181
191
|
end
|
@@ -2,18 +2,17 @@ module DataMiner
|
|
2
2
|
class Configuration
|
3
3
|
include Blockenspiel::DSL
|
4
4
|
|
5
|
-
attr_accessor :resource, :runnables, :runnable_counter, :attributes
|
5
|
+
attr_accessor :resource, :runnables, :runnable_counter, :attributes
|
6
6
|
|
7
7
|
def initialize(resource)
|
8
8
|
@runnables = Array.new
|
9
|
-
@unique_indices = Set.new
|
10
9
|
@resource = resource
|
11
10
|
@runnable_counter = 0
|
12
11
|
@attributes = HashWithIndifferentAccess.new
|
13
12
|
end
|
14
|
-
|
15
|
-
def
|
16
|
-
|
13
|
+
|
14
|
+
def logger
|
15
|
+
DataMiner.logger
|
17
16
|
end
|
18
17
|
|
19
18
|
def process(method_name_or_block_description, &block)
|
@@ -51,8 +50,97 @@ module DataMiner
|
|
51
50
|
nil
|
52
51
|
end
|
53
52
|
|
53
|
+
def import_runnables
|
54
|
+
runnables.select { |runnable| runnable.is_a? Import }
|
55
|
+
end
|
56
|
+
|
57
|
+
def before_invoke
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def after_invoke
|
62
|
+
make_sure_unit_definitions_make_sense
|
63
|
+
suggest_missing_column_migrations
|
64
|
+
end
|
65
|
+
|
66
|
+
COMPLETE_UNIT_DEFINITIONS = [
|
67
|
+
[:units],
|
68
|
+
[:from_units, :to_units],
|
69
|
+
[:units_field_name],
|
70
|
+
[:units_field_name, :to_units],
|
71
|
+
[:units_field_number],
|
72
|
+
[:units_field_number, :to_units]
|
73
|
+
]
|
74
|
+
|
75
|
+
def make_sure_unit_definitions_make_sense
|
76
|
+
import_runnables.each do |runnable|
|
77
|
+
runnable.attributes.each do |_, attribute|
|
78
|
+
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
79
|
+
logger.error %{
|
80
|
+
|
81
|
+
================================
|
82
|
+
|
83
|
+
[data_miner gem] You don't have a valid unit definition for #{resource.name}##{attribute.name}.
|
84
|
+
|
85
|
+
You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
|
86
|
+
|
87
|
+
You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence}".
|
88
|
+
|
89
|
+
================================
|
90
|
+
}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def suggest_missing_column_migrations
|
97
|
+
missing_columns = Array.new
|
98
|
+
import_runnables.each do |runnable|
|
99
|
+
runnable.attributes.each do |_, attribute|
|
100
|
+
logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
101
|
+
unless resource.column_names.include? attribute.name
|
102
|
+
missing_columns << attribute.name
|
103
|
+
end
|
104
|
+
if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
|
105
|
+
missing_columns << units_column
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
missing_columns.uniq!
|
110
|
+
if missing_columns.any?
|
111
|
+
logger.error %{
|
112
|
+
|
113
|
+
================================
|
114
|
+
|
115
|
+
[data_miner gem] On #{resource}, it looks like you're missing some columns...
|
116
|
+
|
117
|
+
Please run this...
|
118
|
+
|
119
|
+
./script/generate migration AddMissingColumnsTo#{resource.name}
|
120
|
+
|
121
|
+
and **replace** the resulting file with this:
|
122
|
+
|
123
|
+
class AddMissingColumnsTo#{resource.name} < ActiveRecord::Migration
|
124
|
+
def self.up
|
125
|
+
#{missing_columns.map { |column_name| " add_column :#{resource.table_name}, :#{column_name}, :#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' }" }.join("\n") }
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.down
|
129
|
+
#{missing_columns.map { |column_name| " remove_column :#{resource.table_name}, :#{column_name}" }.join("\n") }
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
On the other hand, if you're working directly with create_table, this might be helpful:
|
134
|
+
|
135
|
+
#{missing_columns.map { |column_name| "t.#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
|
136
|
+
|
137
|
+
================================
|
138
|
+
}
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
54
142
|
cattr_accessor :resource_names
|
55
|
-
self.resource_names =
|
143
|
+
self.resource_names = Array.new
|
56
144
|
class << self
|
57
145
|
# Mine data. Defaults to all resource_names touched by DataMiner.
|
58
146
|
#
|
data/lib/data_miner/import.rb
CHANGED
@@ -28,7 +28,6 @@ module DataMiner
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def store(attr_name, attr_options = {})
|
31
|
-
raise "[data_miner gem] Column #{attr_name} doesn't exist on table #{resource.table_name}" unless resource.column_names.include?(attr_name)
|
32
31
|
attributes[attr_name] = Attribute.new self, attr_name, attr_options
|
33
32
|
end
|
34
33
|
|
data/lib/data_miner.rb
CHANGED
@@ -17,16 +17,21 @@ require 'data_miner/run'
|
|
17
17
|
module DataMiner
|
18
18
|
class MissingHashColumn < RuntimeError; end
|
19
19
|
|
20
|
-
include Log4r
|
20
|
+
include Log4r unless defined? Rails
|
21
21
|
|
22
22
|
mattr_accessor :logger
|
23
23
|
|
24
24
|
def self.start_logging
|
25
|
-
if defined?
|
25
|
+
if defined? Rails
|
26
26
|
self.logger = Rails.logger
|
27
27
|
else
|
28
|
+
info_outputter = FileOutputter.new 'f1', :filename => 'data_miner.log'
|
29
|
+
error_outputter = Outputter.stderr
|
30
|
+
info_outputter.only_at DEBUG, INFO
|
31
|
+
error_outputter.only_at WARN, ERROR, FATAL
|
32
|
+
|
28
33
|
self.logger = Logger.new 'data_miner'
|
29
|
-
logger.
|
34
|
+
logger.add info_outputter, error_outputter
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
@@ -50,7 +55,7 @@ ActiveRecord::Base.class_eval do
|
|
50
55
|
return
|
51
56
|
end
|
52
57
|
|
53
|
-
DataMiner.resource_names.
|
58
|
+
DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
|
54
59
|
DataMiner.create_tables
|
55
60
|
|
56
61
|
belongs_to :data_miner_last_run, :class_name => 'DataMiner::Run'
|
@@ -68,6 +73,8 @@ ActiveRecord::Base.class_eval do
|
|
68
73
|
self.data_miner_config = DataMiner::Configuration.new self
|
69
74
|
|
70
75
|
Blockenspiel.invoke block, data_miner_config
|
76
|
+
|
77
|
+
data_miner_config.after_invoke
|
71
78
|
end
|
72
79
|
end
|
73
80
|
|
data/test/data_miner_test.rb
CHANGED
@@ -233,6 +233,8 @@ class AutomobileVariant < ActiveRecord::Base
|
|
233
233
|
store 'model_name', :field_name => 'model'
|
234
234
|
store 'year'
|
235
235
|
store 'fuel_type_code', :field_name => 'fuel_type'
|
236
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
237
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
236
238
|
store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
237
239
|
store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
238
240
|
store 'cylinders', :field_name => 'no_cyc'
|
@@ -267,6 +269,8 @@ class AutomobileVariant < ActiveRecord::Base
|
|
267
269
|
store 'make_name', :field_name => 'make'
|
268
270
|
store 'model_name', :field_name => 'model'
|
269
271
|
store 'fuel_type_code', :field_name => 'fl'
|
272
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
273
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
270
274
|
store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
271
275
|
store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
272
276
|
store 'cylinders', :field_name => 'cyl'
|
@@ -297,6 +301,8 @@ class AutomobileVariant < ActiveRecord::Base
|
|
297
301
|
store 'make_name', :field_name => 'make'
|
298
302
|
store 'model_name', :field_name => 'model'
|
299
303
|
store 'fuel_type_code', :field_name => 'FUEL TYPE'
|
304
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
305
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
300
306
|
store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
301
307
|
store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
302
308
|
store 'cylinders', :field_name => 'NUMB CYL'
|
@@ -668,7 +674,7 @@ class DataMinerTest < Test::Unit::TestCase
|
|
668
674
|
assert_equal a, b
|
669
675
|
end
|
670
676
|
|
671
|
-
should "hash things
|
677
|
+
should "hash things" do
|
672
678
|
AutomobileVariant.data_miner_config.runnables[0].run(nil)
|
673
679
|
assert AutomobileVariant.first.row_hash.present?
|
674
680
|
end
|
@@ -734,9 +740,9 @@ class DataMinerTest < Test::Unit::TestCase
|
|
734
740
|
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
735
741
|
end
|
736
742
|
|
737
|
-
should "mine residence survey day" do
|
738
|
-
|
739
|
-
|
740
|
-
end
|
743
|
+
# should "mine residence survey day" do
|
744
|
+
# ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
745
|
+
# assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
|
746
|
+
# end
|
741
747
|
end
|
742
748
|
end
|
data/test/test_helper.rb
CHANGED
@@ -66,11 +66,6 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
66
66
|
execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
|
67
67
|
|
68
68
|
create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
69
|
-
t.string "automobile_make_id"
|
70
|
-
t.string "automobile_model_id"
|
71
|
-
t.string "automobile_model_year_id"
|
72
|
-
t.string "automobile_fuel_type_id"
|
73
|
-
|
74
69
|
t.float "fuel_efficiency_city"
|
75
70
|
t.float "fuel_efficiency_highway"
|
76
71
|
t.string "make_name"
|
@@ -94,6 +89,12 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
94
89
|
t.boolean "injection"
|
95
90
|
t.string "carline_class_name"
|
96
91
|
t.string "speeds"
|
92
|
+
|
93
|
+
t.string 'raw_fuel_efficiency_highway_units'
|
94
|
+
t.string 'raw_fuel_efficiency_city_units'
|
95
|
+
t.string 'fuel_efficiency_highway_units'
|
96
|
+
t.string 'fuel_efficiency_city_units'
|
97
|
+
|
97
98
|
t.string "row_hash"
|
98
99
|
t.integer 'data_miner_touch_count'
|
99
100
|
t.integer 'data_miner_last_run_id'
|