data_miner 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +2 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/lib/data_miner/attribute.rb +13 -3
- data/lib/data_miner/configuration.rb +94 -6
- data/lib/data_miner/import.rb +0 -1
- data/lib/data_miner.rb +11 -4
- data/test/data_miner_test.rb +11 -5
- data/test/test_helper.rb +6 -5
- metadata +1 -1
data/README.rdoc
CHANGED
@@ -50,7 +50,8 @@ Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't kno
|
|
50
50
|
|
51
51
|
namespace :data_miner do
|
52
52
|
task :run => :environment do
|
53
|
-
|
53
|
+
resource_names = %w{R RESOURCES RESOURCE RESOURCE_NAMES}.map { |possible_key| ENV[possible_key].to_s }.join.split(/\s*,\s*/).flatten.compact
|
54
|
+
DataMiner.run :resource_names => resource_names
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/data_miner.gemspec
CHANGED
data/lib/data_miner/attribute.rb
CHANGED
@@ -63,7 +63,10 @@ module DataMiner
|
|
63
63
|
return false if !wants_overwriting? and !record.send(name).nil?
|
64
64
|
what_it_was = record.send name
|
65
65
|
what_it_should_be = value_from_row row
|
66
|
+
|
66
67
|
record.send "#{name}=", what_it_should_be
|
68
|
+
record.send "#{name}_units=", (to_units || unit_from_source(row)).to_s if wants_units?
|
69
|
+
|
67
70
|
what_it_is = record.send name
|
68
71
|
if what_it_is.nil? and !what_it_should_be.nil?
|
69
72
|
DataMiner.logger.info "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
|
@@ -76,10 +79,11 @@ module DataMiner
|
|
76
79
|
end
|
77
80
|
|
78
81
|
def unit_from_source(row)
|
79
|
-
row[units_field_name].to_s.strip.underscore.to_sym
|
82
|
+
row[units_field_name || units_field_number].to_s.strip.underscore.to_sym
|
80
83
|
end
|
81
84
|
|
82
85
|
def do_convert(row, value)
|
86
|
+
logger.error "[data_miner gem] If you use :from_units, you need to set :to_units (#{resource.name}##{name})" unless wants_units?
|
83
87
|
value.to_f.convert((from_units || unit_from_source(row)), to_units)
|
84
88
|
end
|
85
89
|
|
@@ -125,7 +129,10 @@ module DataMiner
|
|
125
129
|
overwrite != false
|
126
130
|
end
|
127
131
|
def wants_conversion?
|
128
|
-
from_units.present? or units_field_name.present?
|
132
|
+
from_units.present? or units_field_name.present? or units_field_number.present?
|
133
|
+
end
|
134
|
+
def wants_units?
|
135
|
+
to_units.present? or units_field_name.present? or units_field_number.present?
|
129
136
|
end
|
130
137
|
def wants_dictionary?
|
131
138
|
options[:dictionary].present?
|
@@ -156,7 +163,7 @@ module DataMiner
|
|
156
163
|
options[:from_units]
|
157
164
|
end
|
158
165
|
def to_units
|
159
|
-
options[:to_units]
|
166
|
+
options[:to_units] || options[:units]
|
160
167
|
end
|
161
168
|
def conditions
|
162
169
|
options[:conditions]
|
@@ -176,6 +183,9 @@ module DataMiner
|
|
176
183
|
def units_field_name
|
177
184
|
options[:units_field_name]
|
178
185
|
end
|
186
|
+
def units_field_number
|
187
|
+
options[:units_field_number]
|
188
|
+
end
|
179
189
|
def field_number
|
180
190
|
options[:field_number]
|
181
191
|
end
|
@@ -2,18 +2,17 @@ module DataMiner
|
|
2
2
|
class Configuration
|
3
3
|
include Blockenspiel::DSL
|
4
4
|
|
5
|
-
attr_accessor :resource, :runnables, :runnable_counter, :attributes
|
5
|
+
attr_accessor :resource, :runnables, :runnable_counter, :attributes
|
6
6
|
|
7
7
|
def initialize(resource)
|
8
8
|
@runnables = Array.new
|
9
|
-
@unique_indices = Set.new
|
10
9
|
@resource = resource
|
11
10
|
@runnable_counter = 0
|
12
11
|
@attributes = HashWithIndifferentAccess.new
|
13
12
|
end
|
14
|
-
|
15
|
-
def
|
16
|
-
|
13
|
+
|
14
|
+
def logger
|
15
|
+
DataMiner.logger
|
17
16
|
end
|
18
17
|
|
19
18
|
def process(method_name_or_block_description, &block)
|
@@ -51,8 +50,97 @@ module DataMiner
|
|
51
50
|
nil
|
52
51
|
end
|
53
52
|
|
53
|
+
def import_runnables
|
54
|
+
runnables.select { |runnable| runnable.is_a? Import }
|
55
|
+
end
|
56
|
+
|
57
|
+
def before_invoke
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def after_invoke
|
62
|
+
make_sure_unit_definitions_make_sense
|
63
|
+
suggest_missing_column_migrations
|
64
|
+
end
|
65
|
+
|
66
|
+
COMPLETE_UNIT_DEFINITIONS = [
|
67
|
+
[:units],
|
68
|
+
[:from_units, :to_units],
|
69
|
+
[:units_field_name],
|
70
|
+
[:units_field_name, :to_units],
|
71
|
+
[:units_field_number],
|
72
|
+
[:units_field_number, :to_units]
|
73
|
+
]
|
74
|
+
|
75
|
+
def make_sure_unit_definitions_make_sense
|
76
|
+
import_runnables.each do |runnable|
|
77
|
+
runnable.attributes.each do |_, attribute|
|
78
|
+
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
79
|
+
logger.error %{
|
80
|
+
|
81
|
+
================================
|
82
|
+
|
83
|
+
[data_miner gem] You don't have a valid unit definition for #{resource.name}##{attribute.name}.
|
84
|
+
|
85
|
+
You supplied #{attribute.options.keys.select { |k, _| k.to_s =~ /unit/ }.map(&:to_sym).inspect }.
|
86
|
+
|
87
|
+
You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence}".
|
88
|
+
|
89
|
+
================================
|
90
|
+
}
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def suggest_missing_column_migrations
|
97
|
+
missing_columns = Array.new
|
98
|
+
import_runnables.each do |runnable|
|
99
|
+
runnable.attributes.each do |_, attribute|
|
100
|
+
logger.error "[data_miner gem] You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
101
|
+
unless resource.column_names.include? attribute.name
|
102
|
+
missing_columns << attribute.name
|
103
|
+
end
|
104
|
+
if attribute.wants_units? and !resource.column_names.include?(units_column = "#{attribute.name}_units")
|
105
|
+
missing_columns << units_column
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
missing_columns.uniq!
|
110
|
+
if missing_columns.any?
|
111
|
+
logger.error %{
|
112
|
+
|
113
|
+
================================
|
114
|
+
|
115
|
+
[data_miner gem] On #{resource}, it looks like you're missing some columns...
|
116
|
+
|
117
|
+
Please run this...
|
118
|
+
|
119
|
+
./script/generate migration AddMissingColumnsTo#{resource.name}
|
120
|
+
|
121
|
+
and **replace** the resulting file with this:
|
122
|
+
|
123
|
+
class AddMissingColumnsTo#{resource.name} < ActiveRecord::Migration
|
124
|
+
def self.up
|
125
|
+
#{missing_columns.map { |column_name| " add_column :#{resource.table_name}, :#{column_name}, :#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' }" }.join("\n") }
|
126
|
+
end
|
127
|
+
|
128
|
+
def self.down
|
129
|
+
#{missing_columns.map { |column_name| " remove_column :#{resource.table_name}, :#{column_name}" }.join("\n") }
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
On the other hand, if you're working directly with create_table, this might be helpful:
|
134
|
+
|
135
|
+
#{missing_columns.map { |column_name| "t.#{column_name.ends_with?('_units') ? 'string' : 'FIXME_WHAT_COLUMN_TYPE_AM_I' } '#{column_name}'" }.join("\n") }
|
136
|
+
|
137
|
+
================================
|
138
|
+
}
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
54
142
|
cattr_accessor :resource_names
|
55
|
-
self.resource_names =
|
143
|
+
self.resource_names = Array.new
|
56
144
|
class << self
|
57
145
|
# Mine data. Defaults to all resource_names touched by DataMiner.
|
58
146
|
#
|
data/lib/data_miner/import.rb
CHANGED
@@ -28,7 +28,6 @@ module DataMiner
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def store(attr_name, attr_options = {})
|
31
|
-
raise "[data_miner gem] Column #{attr_name} doesn't exist on table #{resource.table_name}" unless resource.column_names.include?(attr_name)
|
32
31
|
attributes[attr_name] = Attribute.new self, attr_name, attr_options
|
33
32
|
end
|
34
33
|
|
data/lib/data_miner.rb
CHANGED
@@ -17,16 +17,21 @@ require 'data_miner/run'
|
|
17
17
|
module DataMiner
|
18
18
|
class MissingHashColumn < RuntimeError; end
|
19
19
|
|
20
|
-
include Log4r
|
20
|
+
include Log4r unless defined? Rails
|
21
21
|
|
22
22
|
mattr_accessor :logger
|
23
23
|
|
24
24
|
def self.start_logging
|
25
|
-
if defined?
|
25
|
+
if defined? Rails
|
26
26
|
self.logger = Rails.logger
|
27
27
|
else
|
28
|
+
info_outputter = FileOutputter.new 'f1', :filename => 'data_miner.log'
|
29
|
+
error_outputter = Outputter.stderr
|
30
|
+
info_outputter.only_at DEBUG, INFO
|
31
|
+
error_outputter.only_at WARN, ERROR, FATAL
|
32
|
+
|
28
33
|
self.logger = Logger.new 'data_miner'
|
29
|
-
logger.
|
34
|
+
logger.add info_outputter, error_outputter
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
@@ -50,7 +55,7 @@ ActiveRecord::Base.class_eval do
|
|
50
55
|
return
|
51
56
|
end
|
52
57
|
|
53
|
-
DataMiner.resource_names.
|
58
|
+
DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
|
54
59
|
DataMiner.create_tables
|
55
60
|
|
56
61
|
belongs_to :data_miner_last_run, :class_name => 'DataMiner::Run'
|
@@ -68,6 +73,8 @@ ActiveRecord::Base.class_eval do
|
|
68
73
|
self.data_miner_config = DataMiner::Configuration.new self
|
69
74
|
|
70
75
|
Blockenspiel.invoke block, data_miner_config
|
76
|
+
|
77
|
+
data_miner_config.after_invoke
|
71
78
|
end
|
72
79
|
end
|
73
80
|
|
data/test/data_miner_test.rb
CHANGED
@@ -233,6 +233,8 @@ class AutomobileVariant < ActiveRecord::Base
|
|
233
233
|
store 'model_name', :field_name => 'model'
|
234
234
|
store 'year'
|
235
235
|
store 'fuel_type_code', :field_name => 'fuel_type'
|
236
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
237
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
236
238
|
store 'raw_fuel_efficiency_highway', :field_name => 'unadj_hwy_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
237
239
|
store 'raw_fuel_efficiency_city', :field_name => 'unadj_city_mpg', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
238
240
|
store 'cylinders', :field_name => 'no_cyc'
|
@@ -267,6 +269,8 @@ class AutomobileVariant < ActiveRecord::Base
|
|
267
269
|
store 'make_name', :field_name => 'make'
|
268
270
|
store 'model_name', :field_name => 'model'
|
269
271
|
store 'fuel_type_code', :field_name => 'fl'
|
272
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
273
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
270
274
|
store 'raw_fuel_efficiency_highway', :field_name => 'uhwy', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
271
275
|
store 'raw_fuel_efficiency_city', :field_name => 'ucty', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
272
276
|
store 'cylinders', :field_name => 'cyl'
|
@@ -297,6 +301,8 @@ class AutomobileVariant < ActiveRecord::Base
|
|
297
301
|
store 'make_name', :field_name => 'make'
|
298
302
|
store 'model_name', :field_name => 'model'
|
299
303
|
store 'fuel_type_code', :field_name => 'FUEL TYPE'
|
304
|
+
store 'fuel_efficiency_highway', :static => nil, :units => :kilometres_per_litre # we'll convert these in a later step, just setting the stage
|
305
|
+
store 'fuel_efficiency_city', :static => nil, :units => :kilometres_per_litre # ditto
|
300
306
|
store 'raw_fuel_efficiency_highway', :field_name => 'UNRND HWY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
301
307
|
store 'raw_fuel_efficiency_city', :field_name => 'UNRND CITY (EPA)', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
302
308
|
store 'cylinders', :field_name => 'NUMB CYL'
|
@@ -668,7 +674,7 @@ class DataMinerTest < Test::Unit::TestCase
|
|
668
674
|
assert_equal a, b
|
669
675
|
end
|
670
676
|
|
671
|
-
should "hash things
|
677
|
+
should "hash things" do
|
672
678
|
AutomobileVariant.data_miner_config.runnables[0].run(nil)
|
673
679
|
assert AutomobileVariant.first.row_hash.present?
|
674
680
|
end
|
@@ -734,9 +740,9 @@ class DataMinerTest < Test::Unit::TestCase
|
|
734
740
|
assert AutomobileVariant.count('make_name LIKE "%tesla"') > 0
|
735
741
|
end
|
736
742
|
|
737
|
-
should "mine residence survey day" do
|
738
|
-
|
739
|
-
|
740
|
-
end
|
743
|
+
# should "mine residence survey day" do
|
744
|
+
# ResidentialEnergyConsumptionSurveyResponse.run_data_miner!
|
745
|
+
# assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
|
746
|
+
# end
|
741
747
|
end
|
742
748
|
end
|
data/test/test_helper.rb
CHANGED
@@ -66,11 +66,6 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
66
66
|
execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
|
67
67
|
|
68
68
|
create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
69
|
-
t.string "automobile_make_id"
|
70
|
-
t.string "automobile_model_id"
|
71
|
-
t.string "automobile_model_year_id"
|
72
|
-
t.string "automobile_fuel_type_id"
|
73
|
-
|
74
69
|
t.float "fuel_efficiency_city"
|
75
70
|
t.float "fuel_efficiency_highway"
|
76
71
|
t.string "make_name"
|
@@ -94,6 +89,12 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
94
89
|
t.boolean "injection"
|
95
90
|
t.string "carline_class_name"
|
96
91
|
t.string "speeds"
|
92
|
+
|
93
|
+
t.string 'raw_fuel_efficiency_highway_units'
|
94
|
+
t.string 'raw_fuel_efficiency_city_units'
|
95
|
+
t.string 'fuel_efficiency_highway_units'
|
96
|
+
t.string 'fuel_efficiency_city_units'
|
97
|
+
|
97
98
|
t.string "row_hash"
|
98
99
|
t.integer 'data_miner_touch_count'
|
99
100
|
t.integer 'data_miner_last_run_id'
|