data_miner 0.4.27 → 0.4.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/data_miner.gemspec +2 -2
- data/lib/data_miner/attribute.rb +9 -1
- data/test/data_miner_test.rb +41 -0
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.28
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.28"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-14}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -23,7 +23,8 @@ module DataMiner
|
|
23
23
|
:units_field_name,
|
24
24
|
:units_field_number,
|
25
25
|
:field_number,
|
26
|
-
:chars
|
26
|
+
:chars,
|
27
|
+
:synthesize
|
27
28
|
]
|
28
29
|
|
29
30
|
def initialize(step, name, options = {})
|
@@ -82,6 +83,7 @@ module DataMiner
|
|
82
83
|
value = value_in_source row
|
83
84
|
return value if value.is_a? ActiveRecord::Base # carry through trapdoor
|
84
85
|
value = value_in_dictionary value if wants_dictionary?
|
86
|
+
value = synthesize.call(row) if wants_synthesize?
|
85
87
|
value
|
86
88
|
end
|
87
89
|
|
@@ -153,6 +155,9 @@ module DataMiner
|
|
153
155
|
def wants_chars?
|
154
156
|
chars.present?
|
155
157
|
end
|
158
|
+
def wants_synthesize?
|
159
|
+
synthesize.is_a?(Proc)
|
160
|
+
end
|
156
161
|
def wants_overwriting?
|
157
162
|
overwrite != false
|
158
163
|
end
|
@@ -212,6 +217,9 @@ module DataMiner
|
|
212
217
|
def chars
|
213
218
|
options[:chars]
|
214
219
|
end
|
220
|
+
def synthesize
|
221
|
+
options[:synthesize]
|
222
|
+
end
|
215
223
|
def static
|
216
224
|
options[:static]
|
217
225
|
end
|
data/test/data_miner_test.rb
CHANGED
@@ -1068,12 +1068,53 @@ class AircraftDeux < ActiveRecord::Base
|
|
1068
1068
|
end
|
1069
1069
|
end
|
1070
1070
|
|
1071
|
+
class AutomobileMakeFleetYear < ActiveRecord::Base
|
1072
|
+
set_primary_key :name
|
1073
|
+
|
1074
|
+
data_miner do
|
1075
|
+
process "create a table on the fly" do
|
1076
|
+
create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
1077
|
+
t.string "name"
|
1078
|
+
t.string "make_name"
|
1079
|
+
t.string "fleet"
|
1080
|
+
t.integer "year"
|
1081
|
+
t.float "fuel_efficiency"
|
1082
|
+
t.string "fuel_efficiency_units"
|
1083
|
+
t.integer "volume"
|
1084
|
+
t.string "make_year_name"
|
1085
|
+
t.datetime "created_at"
|
1086
|
+
t.datetime "updated_at"
|
1087
|
+
t.integer 'data_miner_touch_count'
|
1088
|
+
t.integer 'data_miner_last_run_id'
|
1089
|
+
end
|
1090
|
+
execute 'ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (name)'
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
# CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
|
1094
|
+
import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
|
1095
|
+
:errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv',
|
1096
|
+
:select => lambda { |row| row['volume'].to_i > 0 } do
|
1097
|
+
key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
|
1098
|
+
store 'make_name', :field_name => 'manufacturer_name'
|
1099
|
+
store 'year', :field_name => 'year_content'
|
1100
|
+
store 'fleet', :chars => 2..3 # zero-based
|
1101
|
+
store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
|
1102
|
+
store 'volume'
|
1103
|
+
end
|
1104
|
+
end
|
1105
|
+
end
|
1106
|
+
|
1071
1107
|
# todo: have somebody properly organize these
|
1072
1108
|
class DataMinerTest < Test::Unit::TestCase
|
1073
1109
|
if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
|
1074
1110
|
end
|
1075
1111
|
|
1076
1112
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
1113
|
+
should "be able to synthesize rows without using a full parser class" do
|
1114
|
+
AutomobileMakeFleetYear.run_data_miner!
|
1115
|
+
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
1116
|
+
end
|
1117
|
+
|
1077
1118
|
should "keep a call stack so that you can call run_data_miner! on a child" do
|
1078
1119
|
CrosscallingCensusDivision.run_data_miner!
|
1079
1120
|
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 28
|
9
|
+
version: 0.4.28
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-14 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|