data_miner 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,9 @@
1
+ 1.1.0
2
+ * fixed dependency issues
3
+ 1.0.0
4
+ * bundler and gemspec instead of jeweler
5
+ * clear up memory leaks and destructive argument borking
6
+ * mostly backwards compatible (but no add_hints! in remote_table transforms, for example)
1
7
  0.2.6
2
8
  * Upgrade to remote_table 0.1.6 to handle UTF-8 CSVs and long urls.
3
9
  0.3.0
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
20
  s.require_paths = ["lib"]
21
21
 
22
- s.add_dependency 'remote_table', '>=1.0.3'
22
+ s.add_dependency 'remote_table', '>=1.1.0'
23
23
  s.add_dependency 'escape', '>=0.0.4'
24
24
  s.add_dependency 'activerecord', '>=2.3.4'
25
25
  s.add_dependency 'activesupport', '>=2.3.4'
@@ -31,5 +31,9 @@ Gem::Specification.new do |s|
31
31
  s.add_development_dependency 'test-unit'
32
32
  s.add_development_dependency 'shoulda'
33
33
  s.add_development_dependency 'mysql'
34
- s.add_development_dependency 'ruby-debug'
34
+ if RUBY_VERSION >= '1.9'
35
+ s.add_development_dependency 'ruby-debug19'
36
+ else
37
+ s.add_development_dependency 'ruby-debug'
38
+ end
35
39
  end
@@ -1,3 +1,3 @@
1
1
  class DataMiner
2
- VERSION = '1.0.1'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -1,15 +1,9 @@
1
1
  require 'rubygems'
2
2
  require 'bundler'
3
- unless RUBY_VERSION >= '1.9'
4
- gem 'fastercsv'
5
- require 'fastercsv'
6
- end
7
3
  Bundler.setup
8
4
  require 'test/unit'
9
5
  require 'shoulda'
10
- unless RUBY_VERSION >= '1.9'
11
- require 'ruby-debug'
12
- end
6
+ require 'ruby-debug'
13
7
  $LOAD_PATH.unshift(File.dirname(__FILE__))
14
8
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
15
9
  require 'data_miner'
@@ -33,6 +33,10 @@ class Aircraft < ActiveRecord::Base
33
33
  row['Manufacturer'] =~ /BOEING/i
34
34
  end
35
35
 
36
+ def is_not_attributed_to_airbus?(row)
37
+ row['Manufacturer'] =~ /AIRBUS/i
38
+ end
39
+
36
40
  def is_attributed_to_cessna?(row)
37
41
  row['Manufacturer'] =~ /CESSNA/i
38
42
  end
@@ -77,8 +81,7 @@ class Aircraft < ActiveRecord::Base
77
81
  import("ICAO codes starting with letter #{letter} used by the FAA",
78
82
  :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
79
83
  :encoding => 'US-ASCII',
80
- :errata => Errata.new(:url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
81
- :responder => Aircraft::Guru.new),
84
+ :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw', :responder => 'Aircraft::Guru' },
82
85
  :row_xpath => '//table/tr[2]/td/table/tr',
83
86
  :column_xpath => 'td') do
84
87
  key 'icao_code', :field_name => 'Designator'
@@ -42,9 +42,58 @@ class AutomobileVariant < ActiveRecord::Base
42
42
  }
43
43
 
44
44
  class ParserB
45
+ require 'slither'
46
+ ::Slither.define :fuel_economy_guide_b do |d|
47
+ d.rows do |row|
48
+ row.trap { true } # there's only one section
49
+ row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
50
+ row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
51
+ row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
52
+ row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
53
+ row.column 'carline_name' , 28, :type => :string # CARLINE NAME
54
+ row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
55
+ row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
56
+ row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
57
+ row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
58
+ row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
59
+ row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
60
+ row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
61
+ row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
62
+ row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
63
+ row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
64
+ row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
65
+ row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
66
+ row.spacer 2
67
+ row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
68
+ row.spacer 2
69
+ row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
70
+ row.spacer 2
71
+ row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
72
+ row.spacer 2
73
+ row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
74
+ row.spacer 2
75
+ row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
76
+ row.spacer 2
77
+ row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
78
+ row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
79
+ row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
80
+ row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
81
+ row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
82
+ row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
83
+ row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
84
+ row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
85
+ row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
86
+ row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
87
+ row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
88
+ row.column 'filler' , 1, :type => :string # NOT USED
89
+ row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
90
+ row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
91
+ end
92
+ end
45
93
  attr_accessor :year
46
94
  def initialize(options = {})
47
- @year = options[:year]
95
+ options = options.stringify_keys
96
+ @year = options['year']
48
97
  end
49
98
 
50
99
  def apply(row)
@@ -71,68 +120,12 @@ class AutomobileVariant < ActiveRecord::Base
71
120
  end
72
121
  end
73
122
 
74
- def add_hints!(bus)
75
- bus[:format] = :fixed_width
76
- bus[:cut] = '13-' if year == 1995
77
- bus[:schema_name] = :fuel_economy_guide_b
78
- bus[:select] = lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' }
79
- Slither.define :fuel_economy_guide_b do |d|
80
- d.rows do |row|
81
- row.trap { true } # there's only one section
82
- row.column 'active_year' , 4, :type => :integer # ACTIVE YEAR
83
- row.column 'state_code' , 1, :type => :string # STATE CODE: F=49-STATE,C=CALIFORNIA
84
- row.column 'carline_clss' , 2, :type => :integer # CARLINE CLASS CODE
85
- row.column 'carline_mfr_code' , 3, :type => :integer # CARLINE MANUFACTURER CODE
86
- row.column 'carline_name' , 28, :type => :string # CARLINE NAME
87
- row.column 'disp_cub_in' , 4, :type => :integer # DISP CUBIC INCHES
88
- row.column 'fuel_system' , 2, :type => :string # FUEL SYSTEM: 'FI' FOR FUEL INJECTION, 2-DIGIT INTEGER VALUE FOR #OF VENTURIES IF CARBURETOR SYSTEM.
89
- row.column 'model_trans' , 6, :type => :string # TRANSMISSION TYPE
90
- row.column 'no_cyc' , 2, :type => :integer # NUMBER OF ENGINE CYLINDERS
91
- row.column 'date_time' , 12, :type => :string # DATE AND TIME RECORD ENTERED -YYMMDDHHMMSS (YEAR, MONTH, DAY, HOUR, MINUTE, SECOND)
92
- row.column 'release_date' , 6, :type => :string # RELEASE DATE - YYMMDD (YEAR, MONTH, DAY)
93
- row.column 'vi_mfr_code' , 3, :type => :integer # VI MANUFACTURER CODE
94
- row.column 'carline_code' , 5, :type => :integer # CARLINE CODE
95
- row.column 'basic_eng_id' , 5, :type => :integer # BASIC ENGINE INDEX
96
- row.column 'carline_mfr_name' , 32, :type => :string # CARLINE MANUFACTURER NAME
97
- row.column 'suppress_code' , 1, :type => :integer # SUPPRESSION CODE (NO SUPPRESSED RECORD IF FOR PUBLIC ACCESS)
98
- row.column 'est_city_mpg' , 3, :type => :integer # ESTIMATED (CITY) MILES PER GALLON - 90% OF UNADJUSTED VALUE
99
- row.spacer 2
100
- row.column 'highway_mpg' , 3, :type => :integer # ESTIMATED (HWY) MILES PER GALLON - 78% OF UNADJUSTED VALUE
101
- row.spacer 2
102
- row.column 'combined_mpg' , 3, :type => :integer # COMBINED MILES PER GALLON
103
- row.spacer 2
104
- row.column 'unadj_city_mpg' , 3, :type => :integer # UNADJUSTED CITY MILES PER GALLON
105
- row.spacer 2
106
- row.column 'unadj_hwy_mpg' , 3, :type => :integer # UNADJUSTED HIGHWAY MILES PER GALLON
107
- row.spacer 2
108
- row.column 'unadj_comb_mpg' , 3, :type => :integer # UNADJUSTED COMBINED MILES PER GALLON
109
- row.spacer 2
110
- row.column 'ave_anl_fuel' , 6, :type => :integer # "$" in col 147, Annual Fuel Cost starting col 148 in I5
111
- row.column 'opt_disp' , 8, :type => :string # OPTIONAL DISPLACEMENT
112
- row.column 'engine_desc1' , 10, :type => :string # ENGINE DESCRIPTION 1
113
- row.column 'engine_desc2' , 10, :type => :string # ENGINE DESCRIPTION 2
114
- row.column 'engine_desc3' , 10, :type => :string # ENGINE DESCRIPTION 3
115
- row.column 'body_type_2d' , 10, :type => :string # BODY TYPE 2 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '2DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
116
- row.column 'body_type_4d' , 10, :type => :string # BODY TYPE 4 DOOR - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM '4DR-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
117
- row.column 'body_type_hbk' , 10, :type => :string # BODY TYPE HBK - IF THE BODY TYPE APPLIES IT WILL TAKE THE FORM 'HBK-PPP/LL' WHERE PPP=PASSENGER INTERIOR VOLUME AND LL=LUGGAGE INTERIOR VOLUME.
118
- row.column 'puerto_rico' , 1, :type => :string # '*' IF FOR PUERTO RICO SALES ONLY
119
- row.column 'overdrive' , 4, :type => :string # OVERDRIVE: ' OD ' FOR OVERDRIVE, 'EOD ' FOR ELECTRICALLY OPERATED OVERDRIVE AND 'AEOD' FOR AUTOMATIC OVERDRIVE
120
- row.column 'drive_system' , 3, :type => :string # FWD=FRONT WHEEL DRIVE, RWD=REAR, 4WD=4-WHEEL
121
- row.column 'filler' , 1, :type => :string # NOT USED
122
- row.column 'fuel_type' , 1, :type => :string # R=REGULAR(UNLEADED), P=PREMIUM, D=DIESEL
123
- row.column 'trans_desc' , 15, :type => :string # TRANSMISSION DESCRIPTORS
124
- end
125
- end
126
- end
127
123
  end
128
124
  class ParserC
129
125
  attr_accessor :year
130
126
  def initialize(options = {})
131
- @year = options[:year]
132
- end
133
-
134
- def add_hints!(bus)
135
- # File will decide format based on filename
127
+ options = options.stringify_keys
128
+ @year = options['year']
136
129
  end
137
130
 
138
131
  def apply(row)
@@ -153,11 +146,8 @@ class AutomobileVariant < ActiveRecord::Base
153
146
  class ParserD
154
147
  attr_accessor :year
155
148
  def initialize(options = {})
156
- @year = options[:year]
157
- end
158
-
159
- def add_hints!(bus)
160
- bus[:reject] = lambda { |row| row.values.first.blank? } if year == 2007
149
+ options = options.stringify_keys
150
+ @year = options['year']
161
151
  end
162
152
 
163
153
  def apply(row)
@@ -238,8 +228,7 @@ class AutomobileVariant < ActiveRecord::Base
238
228
  end
239
229
  end
240
230
 
241
- errata = Errata.new :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv',
242
- :responder => AutomobileVariant::Guru.new
231
+ errata = { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/fuel_economy_guide/errata.csv', :responder => 'AutomobileVariant::Guru' }
243
232
 
244
233
  data_miner do
245
234
  # 1985---1997
@@ -248,6 +237,10 @@ class AutomobileVariant < ActiveRecord::Base
248
237
  import(:url => "http://www.fueleconomy.gov/FEG/epadata/#{yy}mfgui.zip",
249
238
  :filename => filename,
250
239
  :transform => { :class => FuelEconomyGuide::ParserB, :year => "19#{yy}".to_i },
240
+ :format => :fixed_width,
241
+ :cut => (yy == 95) ? '13-' : nil,
242
+ :schema_name => :fuel_economy_guide_b,
243
+ :select => lambda { |row| row['supress_code'].blank? and row['state_code'] == 'F' },
251
244
  :errata => errata) do
252
245
  key 'row_hash'
253
246
  store 'make_name', :field_name => 'make'
@@ -317,6 +310,7 @@ class AutomobileVariant < ActiveRecord::Base
317
310
  # 2010 => { :url => 'http://www.fueleconomy.gov/FEG/epadata/10data.zip', :filename => '2010FEguide-for DOE-rel dates before 10-16-09-no-sales10-8-09public.xls' }
318
311
  }.sort { |a, b| a.first <=> b.first }.each do |year, options|
319
312
  import options.merge(:transform => { :class => FuelEconomyGuide::ParserD, :year => year },
313
+ :reject => (year == 2007) ? lambda { |row| row.values.first.blank? } : nil,
320
314
  :errata => errata) do
321
315
  key 'row_hash'
322
316
  store 'make_name', :field_name => 'make'
@@ -548,7 +548,7 @@ class AircraftDeux < ActiveRecord::Base
548
548
  import("ICAO codes starting with letter #{letter} used by the FAA",
549
549
  :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
550
550
  :encoding => 'windows-1252',
551
- :errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
551
+ :errata => { :url => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw' },
552
552
  :row_xpath => '//table/tr[2]/td/table/tr',
553
553
  :column_xpath => 'td') do
554
554
  key 'icao_code', :field_name => 'Designator'
@@ -588,7 +588,7 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
588
588
 
589
589
  # CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
590
590
  import :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/make_fleet_years.csv',
591
- :errata => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv',
591
+ :errata => { :url => 'http://static.brighterplanet.com/science/data/transport/automobiles/make_fleet_years/errata.csv' },
592
592
  :select => lambda { |row| row['volume'].to_i > 0 } do
593
593
  key 'name', :synthesize => lambda { |row| [ row['manufacturer_name'], row['fleet'][2,2], row['year_content'] ].join ' ' }
594
594
  store 'make_name', :field_name => 'manufacturer_name'
@@ -661,7 +661,7 @@ class TestOldSyntax < Test::Unit::TestCase
661
661
  end
662
662
  end
663
663
  assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
664
- assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.package.url
664
+ assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.url
665
665
  assert_equal 1, AutomobileFuelType.data_miner_config.step_counter
666
666
  end
667
667
  should "stop and finish if it gets a DataMiner::Finish" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
- - 0
9
8
  - 1
10
- version: 1.0.1
9
+ - 0
10
+ version: 1.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -28,12 +28,12 @@ dependencies:
28
28
  requirements:
29
29
  - - ">="
30
30
  - !ruby/object:Gem::Version
31
- hash: 17
31
+ hash: 19
32
32
  segments:
33
33
  - 1
34
+ - 1
34
35
  - 0
35
- - 3
36
- version: 1.0.3
36
+ version: 1.1.0
37
37
  type: :runtime
38
38
  version_requirements: *id001
39
39
  - !ruby/object:Gem::Dependency