data_miner 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 1.1.0
2
+ * fixed dependency issues
3
+ 1.0.0
4
+ * bundler and gemspec instead of jeweler
5
+ * clear up memory leaks and destructive argument borking
6
+ * mostly backwards compatible (but no add_hints! in remote_table transforms, for example)
1
7
  0.2.6
2
8
  * Upgrade to remote_table 0.1.6 to handle UTF-8 CSVs and long urls.
3
9
  0.3.0
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_dependency 'remote_table', '>=1.0.3'
22
+ s.add_dependency 'remote_table', '>=1.1.0'
23
23
  s.add_dependency 'escape', '>=0.0.4'
24
24
  s.add_dependency 'activerecord', '>=2.3.4'
25
25
  s.add_dependency 'activesupport', '>=2.3.4'
@@ -31,5 +31,9 @@ Gem::Specification.new do |s|
31
31
  s.add_development_dependency 'test-unit'
32
32
  s.add_development_dependency 'shoulda'
33
33
  s.add_development_dependency 'mysql'
34
- s.add_development_dependency 'ruby-debug'
34
+ if RUBY_VERSION >= '1.9'
35
+ s.add_development_dependency 'ruby-debug19'
36
+ else
37
+ s.add_development_dependency 'ruby-debug'
38
+ end
35
39
  end
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '1.0.1'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -1,15 +1,9 @@
1
1
  require 'rubygems'
2
2
  require 'bundler'
3
- unless RUBY_VERSION >= '1.9'
4
- gem 'fastercsv'
5
- require 'fastercsv'
6
- end
7
3
  Bundler.setup
8
4
  require 'test/unit'
9
5
  require 'shoulda'
10
- unless RUBY_VERSION >= '1.9'
11
- require 'ruby-debug'
12
- end
6
+ require 'ruby-debug'
13
7
  $LOAD_PATH.unshift(File.dirname(__FILE__))
14
8
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
15
9
  require 'data_miner'
@@ -33,6 +33,10 @@ class Aircraft < ActiveRecord::Base
33
33
  row['Manufacturer'] =~ /BOEING/i
34
34
  end
35
35
 
36
+ def is_not_attributed_to_airbus?(row)
37
+ row['Manufacturer'] =~ /AIRBUS/i
38
+ end
39
+
36
40
  def is_attributed_to_cessna?(row)
37
41
  row['Manufacturer'] =~ /CESSNA/i
38
42
  end
@@ -77,8 +81,7 @@ class Aircraft < ActiveRecord::Base
77
81
  import("ICAO codes starting with letter #{letter} used by the FAA",
78
82
  :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
79
83
  :encoding => 'US-ASCII',
80
- :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
81
- :responder => Aircraft::Guru.new),
84
+ :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :responder => 'Aircraft::Guru' },
82
85
  :row_xpath => '//table/tr[2]/td/table/tr',
83
86
  :column_xpath => 'td') do
84
87
  key 'icao_code', :field_name => 'Designator'
@@ -42,9 +42,58 @@ class AutomobileVariant < ActiveRecord::Base
42
42
  }
43
43
 
44
44
  class ParserB
45
+ require 'slither'
46
+ ::Slither.define :fuel_economy_guide_b do |d|
47
+ d.rows do |row|
48
+ row.trap { true } # there's only one section
49
+ row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
50
+ row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
51
+ row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
52
+ row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
53
+ row.column 'carline_name' , 28, :type => :string # CARLINE NAME
54
+ row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
55
+ row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
56
+ row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
57
+ row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
58
+ row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
59
+ row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
60
+ row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
61
+ row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
62
+ row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
63
+ row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
64
+ row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
65
+ row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
66
+ row.spacer 2
67
+ row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
68
+ row.spacer 2
69
+ row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
70
+ row.spacer 2
71
+ row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
72
+ row.spacer 2
73
+ row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
74
+ row.spacer 2
75
+ row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
76
+ row.spacer 2
77
+ row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
78
+ row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
79
+ row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
80
+ row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
81
+ row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
82
+ row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
83
+ row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
84
+ row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
85
+ row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
86
+ row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
87
+ row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
88
+ row.column 'filler' , 1, :type => :string # NOT USED
89
+ row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
90
+ row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
91
+ end
92
+ end
45
93
  attr_accessor :year
46
94
  def initialize(options = {})
47
- @year = options[:year]
95
+ options = options.stringify_keys
96
+ @year = options['year']
48
97
  end
49
98
 
50
99
  def apply(row)
@@ -71,68 +120,12 @@ class AutomobileVariant < ActiveRecord::Base
71
120
  end
72
121
  end
73
122
 
74
- def add_hints!(bus)
75
- bus[:format] = :fixed_width
76
- bus[:cut] = '13-' if year == 1995
77
- bus[:schema_name] = :fuel_economy_guide_b
78
- bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
79
- Slither.define :fuel_economy_guide_b do |d|
80
- d.rows do |row|
81
- row.trap { true } # there's only one section
82
- row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
83
- row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
84
- row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
85
- row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
86
- row.column 'carline_name' , 28, :type => :string # CARLINE NAME
87
- row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
88
- row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
89
- row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
90
- row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
91
- row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
92
- row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
93
- row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
94
- row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
95
- row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
96
- row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
97
- row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
98
- row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
99
- row.spacer 2
100
- row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
101
- row.spacer 2
102
- row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
103
- row.spacer 2
104
- row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
105
- row.spacer 2
106
- row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
107
- row.spacer 2
108
- row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
109
- row.spacer 2
110
- row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
111
- row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
112
- row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
113
- row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
114
- row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
115
- row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
116
- row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
117
- row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
118
- row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
119
- row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
120
- row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
121
- row.column 'filler' , 1, :type => :string # NOT USED
122
- row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
123
- row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
124
- end
125
- end
126
- end
127
123
  end
128
124
  class ParserC
129
125
  attr_accessor :year
130
126
  def initialize(options = {})
131
- @year = options[:year]
132
- end
133
-
134
- def add_hints!(bus)
135
- # File will decide format based on filename
127
+ options = options.stringify_keys
128
+ @year = options['year']
136
129
  end
137
130
 
138
131
  def apply(row)
@@ -153,11 +146,8 @@ class AutomobileVariant < ActiveRecord::Base
153
146
  class ParserD
154
147
  attr_accessor :year
155
148
  def initialize(options = {})
156
- @year = options[:year]
157
- end
158
-
159
- def add_hints!(bus)
160
- bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
149
+ options = options.stringify_keys
150
+ @year = options['year']
161
151
  end
162
152
 
163
153
  def apply(row)
@@ -238,8 +228,7 @@ class AutomobileVariant < ActiveRecord::Base
238
228
  end
239
229
  end
240
230
 
241
- errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
242
- :responder => AutomobileVariant::Guru.new
231
+ errata = { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv', :responder => 'AutomobileVariant::Guru' }
243
232
 
244
233
  data_miner do
245
234
  # 1985---1997
@@ -248,6 +237,10 @@ class AutomobileVariant < ActiveRecord::Base
248
237
  import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
249
238
  :filename => filename,
250
239
  :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
240
+ :format => :fixed_width,
241
+ :cut => (yy == 95) ? '13-' : nil,
242
+ :schema_name => :fuel_economy_guide_b,
243
+ :select => lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' },
251
244
  :errata => errata) do
252
245
  key 'row_hash'
253
246
  store 'make_name', :field_name => 'make'
@@ -317,6 +310,7 @@ class AutomobileVariant < ActiveRecord::Base
317
310
  # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
318
311
  }.sort { |a, b| a.first <=> b.first }.each do |year, options|
319
312
  import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
313
+ :reject => (year == 2007) ? lambda { |row| row.values.first.blank? } : nil,
320
314
  :errata => errata) do
321
315
  key 'row_hash'
322
316
  store 'make_name', :field_name => 'make'
@@ -548,7 +548,7 @@ class AircraftDeux < ActiveRecord::Base
548
548
  import("ICAO codes starting with letter #{letter} used by the FAA",
549
549
  :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
550
550
  :encoding => 'windows-1252',
551
- :errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
551
+ :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw' },
552
552
  :row_xpath => '//table/tr[2]/td/table/tr',
553
553
  :column_xpath => 'td') do
554
554
  key 'icao_code', :field_name => 'Designator'
@@ -588,7 +588,7 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
588
588
 
589
589
  # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
590
590
  import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
591
- :errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv',
591
+ :errata => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
592
592
  :select => lambda { |row| row['volume'].to_i > 0 } do
593
593
  key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
594
594
  store 'make_name', :field_name => 'manufacturer_name'
@@ -661,7 +661,7 @@ class TestOldSyntax < Test::Unit::TestCase
661
661
  end
662
662
  end
663
663
  assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
664
- assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.package.url
664
+ assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.url
665
665
  assert_equal 1, AutomobileFuelType.data_miner_config.step_counter
666
666
  end
667
667
  should "stop and finish if it gets a DataMiner::Finish" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
- - 0
9
8
  - 1
10
- version: 1.0.1
9
+ - 0
10
+ version: 1.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -28,12 +28,12 @@ dependencies:
28
28
  requirements:
29
29
  - - ">="
30
30
  - !ruby/object:Gem::Version
31
- hash: 17
31
+ hash: 19
32
32
  segments:
33
33
  - 1
34
+ - 1
34
35
  - 0
35
- - 3
36
- version: 1.0.3
36
+ version: 1.1.0
37
37
  type: :runtime
38
38
  version_requirements: *id001
39
39
  - !ruby/object:Gem::Dependency