data_miner 0.4.21 → 0.4.22

Sign up to get free protection for your applications and to get access to all the features.
@@ -81,6 +81,11 @@ Now you should have
81
81
  >> Airport.first.country_name
82
82
  => "Papua New Guinea"
83
83
 
84
+ ==Wishlist
85
+
86
+ * each_record do |record| ... which would use find_in_batches
87
+ * when proxying add_column, rename_column, etc. automatically include the table name
88
+
84
89
  ==Authors
85
90
 
86
91
  * Seamus Abshere <seamus@abshere.net>
data/Rakefile CHANGED
@@ -10,14 +10,16 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/data_miner"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- gem.add_dependency 'remote_table', '>=0.2.10'
13
+ gem.add_dependency 'remote_table', '>=0.2.17'
14
14
  gem.add_dependency 'activerecord', '>=2.3.4'
15
15
  gem.add_dependency 'activesupport', '>=2.3.4'
16
16
  gem.add_dependency 'andand', '>=1.3.1'
17
17
  gem.add_dependency 'errata', '>=0.1.7'
18
18
  gem.add_dependency 'conversions', '>=1.4.4'
19
19
  gem.add_dependency 'blockenspiel', '>=0.3.2'
20
+ gem.add_dependency 'text', '>=0.2.0'
20
21
  gem.add_dependency 'log4r', '>=1.1.7'
22
+ gem.add_development_dependency "loose_tight_dictionary", ">=0.0.3"
21
23
  gem.require_path = "lib"
22
24
  gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
23
25
  gem.rdoc_options << '--line-numbers' << '--inline-source'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.21
1
+ 0.4.22
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.21"
8
+ s.version = "0.4.22"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-04-16}
12
+ s.date = %q{2010-04-28}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -52,33 +52,39 @@ Gem::Specification.new do |s|
52
52
  s.specification_version = 3
53
53
 
54
54
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
- s.add_runtime_dependency(%q<remote_table>, [">= 0.2.10"])
55
+ s.add_runtime_dependency(%q<remote_table>, [">= 0.2.17"])
56
56
  s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
57
57
  s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
58
58
  s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
59
59
  s.add_runtime_dependency(%q<errata>, [">= 0.1.7"])
60
60
  s.add_runtime_dependency(%q<conversions>, [">= 1.4.4"])
61
61
  s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
62
+ s.add_runtime_dependency(%q<text>, [">= 0.2.0"])
62
63
  s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
64
+ s.add_development_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
63
65
  else
64
- s.add_dependency(%q<remote_table>, [">= 0.2.10"])
66
+ s.add_dependency(%q<remote_table>, [">= 0.2.17"])
65
67
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
66
68
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
67
69
  s.add_dependency(%q<andand>, [">= 1.3.1"])
68
70
  s.add_dependency(%q<errata>, [">= 0.1.7"])
69
71
  s.add_dependency(%q<conversions>, [">= 1.4.4"])
70
72
  s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
73
+ s.add_dependency(%q<text>, [">= 0.2.0"])
71
74
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
75
+ s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
72
76
  end
73
77
  else
74
- s.add_dependency(%q<remote_table>, [">= 0.2.10"])
78
+ s.add_dependency(%q<remote_table>, [">= 0.2.17"])
75
79
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
76
80
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
77
81
  s.add_dependency(%q<andand>, [">= 1.3.1"])
78
82
  s.add_dependency(%q<errata>, [">= 0.1.7"])
79
83
  s.add_dependency(%q<conversions>, [">= 1.4.4"])
80
84
  s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
85
+ s.add_dependency(%q<text>, [">= 0.2.0"])
81
86
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
87
+ s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
82
88
  end
83
89
  end
84
90
 
@@ -2,6 +2,8 @@ require 'active_support'
2
2
  require 'active_support/version'
3
3
  %w{
4
4
  active_support/core_ext/array/conversions
5
+ active_support/core_ext/string/access
6
+ active_support/core_ext/string/multibyte
5
7
  }.each do |active_support_3_requirement|
6
8
  require active_support_3_requirement
7
9
  end if ActiveSupport::VERSION::MAJOR == 3
@@ -15,6 +17,7 @@ require 'andand'
15
17
  require 'log4r'
16
18
  require 'fileutils'
17
19
  require 'tmpdir'
20
+ require 'amatch'
18
21
 
19
22
  require 'data_miner/attribute'
20
23
  require 'data_miner/configuration'
@@ -11,6 +11,7 @@ module DataMiner
11
11
  :to_units,
12
12
  :static,
13
13
  :dictionary,
14
+ :matcher,
14
15
  :field_name,
15
16
  :delimiter,
16
17
  :split,
@@ -72,7 +73,12 @@ module DataMiner
72
73
  value
73
74
  end
74
75
 
76
+ def match_row(row)
77
+ matcher.lookup row
78
+ end
79
+
75
80
  def value_from_row(row)
81
+ return match_row row if wants_matcher?
76
82
  value = value_in_source row
77
83
  return value if value.is_a? ActiveRecord::Base # carry through trapdoor
78
84
  value = value_in_dictionary value if wants_dictionary?
@@ -159,6 +165,9 @@ module DataMiner
159
165
  def wants_dictionary?
160
166
  options[:dictionary].present?
161
167
  end
168
+ def wants_matcher?
169
+ options[:matcher].present?
170
+ end
162
171
 
163
172
  # Options that always have values
164
173
  def field_name
@@ -209,5 +218,8 @@ module DataMiner
209
218
  def dictionary
210
219
  @_dictionary ||= Dictionary.new options[:dictionary]
211
220
  end
221
+ def matcher
222
+ @_matcher ||= options[:matcher].new
223
+ end
212
224
  end
213
225
  end
@@ -16,7 +16,7 @@ module DataMiner
16
16
  @position_in_run = position_in_run
17
17
  @description = description
18
18
  @errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
19
- @table = RemoteTable.new(options.slice(:url, :filename, :form_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding, :compression, :glob))
19
+ @table = RemoteTable.new options
20
20
  end
21
21
 
22
22
  def inspect
@@ -940,9 +940,91 @@ class T100FlightSegment < ActiveRecord::Base
940
940
  end
941
941
  end
942
942
 
943
+ require 'loose_tight_dictionary'
944
+ class Aircraft < ActiveRecord::Base
945
+ set_primary_key :icao_code
946
+
947
+ def self.bts_dictionary
948
+ @_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
949
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
950
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
951
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
952
+ :left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
953
+ :right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
954
+ end
955
+
956
+ class BtsAircraftTypeCodeMatcher
957
+ def lookup(left_record)
958
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
959
+ right_record['Aircraft Type'] if right_record
960
+ end
961
+ end
962
+
963
+ class BtsNameMatcher
964
+ def lookup(left_record)
965
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
966
+ right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
967
+ end
968
+ end
969
+
970
+ class << self
971
+ # for errata
972
+ def is_not_attributed_to_aerospatiale?(row)
973
+ not row['Manufacturer'] =~ /AEROSPATIALE/i
974
+ end
975
+
976
+ def is_not_attributed_to_cessna?(row)
977
+ not row['Manufacturer'] =~ /CESSNA/i
978
+ end
979
+
980
+ def is_not_attributed_to_learjet?(row)
981
+ not row['Manufacturer'] =~ /LEAR/i
982
+ end
983
+
984
+ def is_not_attributed_to_dehavilland?(row)
985
+ not row['Manufacturer'] =~ /DE ?HAVILLAND/i
986
+ end
987
+
988
+ def is_not_attributed_to_mcdonnell_douglas?(row)
989
+ not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
990
+ end
991
+
992
+ def is_not_a_dc_plane?(row)
993
+ not row['Model'] =~ /DC/i
994
+ end
995
+
996
+ def is_a_crj_900?(row)
997
+ row['Designator'].downcase == 'crj9'
998
+ end
999
+ end
1000
+
1001
+ data_miner do
1002
+ # ('A'..'Z').each do |letter|
1003
+ # Note: for the purposes of testing, only importing "D"
1004
+ %w{ D }.each do |letter|
1005
+ import("ICAO codes starting with letter #{letter} used by the FAA",
1006
+ :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
1007
+ :encoding => 'US-ASCII',
1008
+ :row_xpath => '//table/tr[2]/td/table/tr',
1009
+ :errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
1010
+ :column_xpath => 'td') do
1011
+ key 'icao_code', :field_name => 'Designator'
1012
+ store 'bts_name', :matcher => Aircraft::BtsNameMatcher
1013
+ store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher
1014
+ store 'manufacturer_name', :field_name => 'Manufacturer'
1015
+ store 'name', :field_name => 'Model'
1016
+ end
1017
+ end
1018
+ end
1019
+ end
1020
+
943
1021
  # todo: have somebody properly organize these
944
1022
  class DataMinerTest < Test::Unit::TestCase
945
1023
  if ENV['NEW'] == 'true'
1024
+ should "mine aircraft" do
1025
+ Aircraft.run_data_miner!
1026
+ assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
1027
+ end
946
1028
  end
947
1029
 
948
1030
  if ENV['FAST'] == 'true'
@@ -14,6 +14,10 @@ ActiveRecord::Base.establish_connection(
14
14
  'password' => ''
15
15
  )
16
16
 
17
+ ActiveSupport::Inflector.inflections do |inflect|
18
+ inflect.uncountable 'aircraft'
19
+ end
20
+
17
21
  class Test::Unit::TestCase
18
22
  end
19
23
 
@@ -269,6 +273,26 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
269
273
  t.integer 'data_miner_last_run_id'
270
274
  end
271
275
  execute "ALTER TABLE residential_energy_consumption_survey_responses ADD PRIMARY KEY (department_of_energy_identifier);"
276
+
277
+ create_table 'aircraft', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
278
+ t.string 'icao_code'
279
+ t.string 'manufacturer_name'
280
+ t.string 'name'
281
+
282
+ t.string "bts_name"
283
+ t.string "bts_aircraft_type_code"
284
+
285
+ # t.string 'brighter_planet_aircraft_class_code'
286
+ # t.float 'm3'
287
+ # t.float 'm2'
288
+ # t.float 'm1'
289
+ # t.float 'endpoint_fuel'
290
+ t.datetime 'updated_at'
291
+ t.datetime 'created_at'
292
+ t.integer 'data_miner_touch_count'
293
+ t.integer 'data_miner_last_run_id'
294
+ end
295
+ execute 'ALTER TABLE aircraft ADD PRIMARY KEY (icao_code);'
272
296
  end
273
297
 
274
298
  DataMiner::Run.create_tables
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 21
9
- version: 0.4.21
8
+ - 22
9
+ version: 0.4.22
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-04-16 00:00:00 -04:00
18
+ date: 2010-04-28 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -28,8 +28,8 @@ dependencies:
28
28
  segments:
29
29
  - 0
30
30
  - 2
31
- - 10
32
- version: 0.2.10
31
+ - 17
32
+ version: 0.2.17
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
35
  - !ruby/object:Gem::Dependency
@@ -117,9 +117,23 @@ dependencies:
117
117
  type: :runtime
118
118
  version_requirements: *id007
119
119
  - !ruby/object:Gem::Dependency
120
- name: log4r
120
+ name: text
121
121
  prerelease: false
122
122
  requirement: &id008 !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ segments:
127
+ - 0
128
+ - 2
129
+ - 0
130
+ version: 0.2.0
131
+ type: :runtime
132
+ version_requirements: *id008
133
+ - !ruby/object:Gem::Dependency
134
+ name: log4r
135
+ prerelease: false
136
+ requirement: &id009 !ruby/object:Gem::Requirement
123
137
  requirements:
124
138
  - - ">="
125
139
  - !ruby/object:Gem::Version
@@ -129,7 +143,21 @@ dependencies:
129
143
  - 7
130
144
  version: 1.1.7
131
145
  type: :runtime
132
- version_requirements: *id008
146
+ version_requirements: *id009
147
+ - !ruby/object:Gem::Dependency
148
+ name: loose_tight_dictionary
149
+ prerelease: false
150
+ requirement: &id010 !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ segments:
155
+ - 0
156
+ - 0
157
+ - 3
158
+ version: 0.0.3
159
+ type: :development
160
+ version_requirements: *id010
133
161
  description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
134
162
  email: seamus@abshere.net
135
163
  executables: []