data_miner 0.4.27 → 0.4.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.27
1
+ 0.4.28
data/data_miner.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.27"
8
+ s.version = "0.4.28"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-05-07}
12
+ s.date = %q{2010-05-14}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -23,7 +23,8 @@ module DataMiner
23
23
  :units_field_name,
24
24
  :units_field_number,
25
25
  :field_number,
26
- :chars
26
+ :chars,
27
+ :synthesize
27
28
  ]
28
29
 
29
30
  def initialize(step, name, options = {})
@@ -82,6 +83,7 @@ module DataMiner
82
83
  value = value_in_source row
83
84
  return value if value.is_a? ActiveRecord::Base # carry through trapdoor
84
85
  value = value_in_dictionary value if wants_dictionary?
86
+ value = synthesize.call(row) if wants_synthesize?
85
87
  value
86
88
  end
87
89
 
@@ -153,6 +155,9 @@ module DataMiner
153
155
  def wants_chars?
154
156
  chars.present?
155
157
  end
158
+ def wants_synthesize?
159
+ synthesize.is_a?(Proc)
160
+ end
156
161
  def wants_overwriting?
157
162
  overwrite != false
158
163
  end
@@ -212,6 +217,9 @@ module DataMiner
212
217
  def chars
213
218
  options[:chars]
214
219
  end
220
+ def synthesize
221
+ options[:synthesize]
222
+ end
215
223
  def static
216
224
  options[:static]
217
225
  end
@@ -1068,12 +1068,53 @@ class AircraftDeux < ActiveRecord::Base
1068
1068
  end
1069
1069
  end
1070
1070
 
1071
+ class AutomobileMakeFleetYear < ActiveRecord::Base
1072
+ set_primary_key :name
1073
+
1074
+ data_miner do
1075
+ process "create a table on the fly" do
1076
+ create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
1077
+ t.string "name"
1078
+ t.string "make_name"
1079
+ t.string "fleet"
1080
+ t.integer "year"
1081
+ t.float "fuel_efficiency"
1082
+ t.string "fuel_efficiency_units"
1083
+ t.integer "volume"
1084
+ t.string "make_year_name"
1085
+ t.datetime "created_at"
1086
+ t.datetime "updated_at"
1087
+ t.integer 'data_miner_touch_count'
1088
+ t.integer 'data_miner_last_run_id'
1089
+ end
1090
+ execute 'ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (name)'
1091
+ end
1092
+
1093
+ # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
1094
+ import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
1095
+ :errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv',
1096
+ :select => lambda { |row| row['volume'].to_i > 0 } do
1097
+ key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
1098
+ store 'make_name', :field_name => 'manufacturer_name'
1099
+ store 'year', :field_name => 'year_content'
1100
+ store 'fleet', :chars => 2..3 # zero-based
1101
+ store 'fuel_efficiency', :from_units => :miles_per_gallon, :to_units => :kilometres_per_litre
1102
+ store 'volume'
1103
+ end
1104
+ end
1105
+ end
1106
+
1071
1107
  # todo: have somebody properly organize these
1072
1108
  class DataMinerTest < Test::Unit::TestCase
1073
1109
  if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
1074
1110
  end
1075
1111
 
1076
1112
  if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
1113
+ should "be able to synthesize rows without using a full parser class" do
1114
+ AutomobileMakeFleetYear.run_data_miner!
1115
+ assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
1116
+ end
1117
+
1077
1118
  should "keep a call stack so that you can call run_data_miner! on a child" do
1078
1119
  CrosscallingCensusDivision.run_data_miner!
1079
1120
  assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 27
9
- version: 0.4.27
8
+ - 28
9
+ version: 0.4.28
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-07 00:00:00 -04:00
18
+ date: 2010-05-14 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency