data_miner 0.4.27 → 0.4.28
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/data_miner.gemspec +2 -2
- data/lib/data_miner/attribute.rb +9 -1
- data/test/data_miner_test.rb +41 -0
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.28
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.28"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-14}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -23,7 +23,8 @@ module DataMiner
|
|
23
23
|
:units_field_name,
|
24
24
|
:units_field_number,
|
25
25
|
:field_number,
|
26
|
-
:chars
|
26
|
+
:chars,
|
27
|
+
:synthesize
|
27
28
|
]
|
28
29
|
|
29
30
|
def initialize(step, name, options = {})
|
@@ -82,6 +83,7 @@ module DataMiner
|
|
82
83
|
value = value_in_source row
|
83
84
|
return value if value.is_a? ActiveRecord::Base # carry through trapdoor
|
84
85
|
value = value_in_dictionary value if wants_dictionary?
|
86
|
+
value = synthesize.call(row) if wants_synthesize?
|
85
87
|
value
|
86
88
|
end
|
87
89
|
|
@@ -153,6 +155,9 @@ module DataMiner
|
|
153
155
|
def wants_chars?
|
154
156
|
chars.present?
|
155
157
|
end
|
158
|
+
def wants_synthesize?
|
159
|
+
synthesize.is_a?(Proc)
|
160
|
+
end
|
156
161
|
def wants_overwriting?
|
157
162
|
overwrite != false
|
158
163
|
end
|
@@ -212,6 +217,9 @@ module DataMiner
|
|
212
217
|
def chars
|
213
218
|
options[:chars]
|
214
219
|
end
|
220
|
+
def synthesize
|
221
|
+
options[:synthesize]
|
222
|
+
end
|
215
223
|
def static
|
216
224
|
options[:static]
|
217
225
|
end
|
data/test/data_miner_test.rb
CHANGED
@@ -1068,12 +1068,53 @@ class AircraftDeux < ActiveRecord::Base
|
|
1068
1068
|
end
|
1069
1069
|
end
|
1070
1070
|
|
1071
|
+
class AutomobileMakeFleetYear < ActiveRecord::Base
|
1072
|
+
set_primary_key :name
|
1073
|
+
|
1074
|
+
data_miner do
|
1075
|
+
process "create a table on the fly" do
|
1076
|
+
create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
1077
|
+
t.string "name"
|
1078
|
+
t.string "make_name"
|
1079
|
+
t.string "fleet"
|
1080
|
+
t.integer "year"
|
1081
|
+
t.float "fuel_efficiency"
|
1082
|
+
t.string "fuel_efficiency_units"
|
1083
|
+
t.integer "volume"
|
1084
|
+
t.string "make_year_name"
|
1085
|
+
t.datetime "created_at"
|
1086
|
+
t.datetime "updated_at"
|
1087
|
+
t.integer 'data_miner_touch_count'
|
1088
|
+
t.integer 'data_miner_last_run_id'
|
1089
|
+
end
|
1090
|
+
execute 'ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (name)'
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
# CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
|
1094
|
+
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
|
1095
|
+
:errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv',
|
1096
|
+
:select => lambda { |row| row['volume'].to_i > 0 } do
|
1097
|
+
key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
|
1098
|
+
store 'make_name', :field_name => 'manufacturer_name'
|
1099
|
+
store 'year', :field_name => 'year_content'
|
1100
|
+
store 'fleet', :chars => 2..3 # zero-based
|
1101
|
+
store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
1102
|
+
store 'volume'
|
1103
|
+
end
|
1104
|
+
end
|
1105
|
+
end
|
1106
|
+
|
1071
1107
|
# todo: have somebody properly organize these
|
1072
1108
|
class DataMinerTest < Test::Unit::TestCase
|
1073
1109
|
if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
|
1074
1110
|
end
|
1075
1111
|
|
1076
1112
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
1113
|
+
should "be able to synthesize rows without using a full parser class" do
|
1114
|
+
AutomobileMakeFleetYear.run_data_miner!
|
1115
|
+
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
1116
|
+
end
|
1117
|
+
|
1077
1118
|
should "keep a call stack so that you can call run_data_miner! on a child" do
|
1078
1119
|
CrosscallingCensusDivision.run_data_miner!
|
1079
1120
|
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 28
|
9
|
+
version: 0.4.28
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-14 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|