data_miner 0.4.21 → 0.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,11 @@ Now you should have
81
81
  >> Airport.first.country_name
82
82
  => "Papua New Guinea"
83
83
 
84
+ ==Wishlist
85
+
86
+ * each_record do |record| ... which would use find_in_batches
87
+ * when proxying add_column, rename_column, etc. automatically include the table name
88
+
84
89
  ==Authors
85
90
 
86
91
  * Seamus Abshere <seamus@abshere.net>
data/Rakefile CHANGED
@@ -10,14 +10,16 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/data_miner"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- gem.add_dependency 'remote_table', '>=0.2.10'
13
+ gem.add_dependency 'remote_table', '>=0.2.17'
14
14
  gem.add_dependency 'activerecord', '>=2.3.4'
15
15
  gem.add_dependency 'activesupport', '>=2.3.4'
16
16
  gem.add_dependency 'andand', '>=1.3.1'
17
17
  gem.add_dependency 'errata', '>=0.1.7'
18
18
  gem.add_dependency 'conversions', '>=1.4.4'
19
19
  gem.add_dependency 'blockenspiel', '>=0.3.2'
20
+ gem.add_dependency 'text', '>=0.2.0'
20
21
  gem.add_dependency 'log4r', '>=1.1.7'
22
+ gem.add_development_dependency "loose_tight_dictionary", ">=0.0.3"
21
23
  gem.require_path = "lib"
22
24
  gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
23
25
  gem.rdoc_options << '--line-numbers' << '--inline-source'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.21
1
+ 0.4.22
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.21"
8
+ s.version = "0.4.22"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-04-16}
12
+ s.date = %q{2010-04-28}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -52,33 +52,39 @@ Gem::Specification.new do |s|
52
52
  s.specification_version = 3
53
53
 
54
54
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
- s.add_runtime_dependency(%q<remote_table>, [">= 0.2.10"])
55
+ s.add_runtime_dependency(%q<remote_table>, [">= 0.2.17"])
56
56
  s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
57
57
  s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
58
58
  s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
59
59
  s.add_runtime_dependency(%q<errata>, [">= 0.1.7"])
60
60
  s.add_runtime_dependency(%q<conversions>, [">= 1.4.4"])
61
61
  s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
62
+ s.add_runtime_dependency(%q<text>, [">= 0.2.0"])
62
63
  s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
64
+ s.add_development_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
63
65
  else
64
- s.add_dependency(%q<remote_table>, [">= 0.2.10"])
66
+ s.add_dependency(%q<remote_table>, [">= 0.2.17"])
65
67
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
66
68
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
67
69
  s.add_dependency(%q<andand>, [">= 1.3.1"])
68
70
  s.add_dependency(%q<errata>, [">= 0.1.7"])
69
71
  s.add_dependency(%q<conversions>, [">= 1.4.4"])
70
72
  s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
73
+ s.add_dependency(%q<text>, [">= 0.2.0"])
71
74
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
75
+ s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
72
76
  end
73
77
  else
74
- s.add_dependency(%q<remote_table>, [">= 0.2.10"])
78
+ s.add_dependency(%q<remote_table>, [">= 0.2.17"])
75
79
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
76
80
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
77
81
  s.add_dependency(%q<andand>, [">= 1.3.1"])
78
82
  s.add_dependency(%q<errata>, [">= 0.1.7"])
79
83
  s.add_dependency(%q<conversions>, [">= 1.4.4"])
80
84
  s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
85
+ s.add_dependency(%q<text>, [">= 0.2.0"])
81
86
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
87
+ s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
82
88
  end
83
89
  end
84
90
 
@@ -2,6 +2,8 @@ require 'active_support'
2
2
  require 'active_support/version'
3
3
  %w{
4
4
  active_support/core_ext/array/conversions
5
+ active_support/core_ext/string/access
6
+ active_support/core_ext/string/multibyte
5
7
  }.each do |active_support_3_requirement|
6
8
  require active_support_3_requirement
7
9
  end if ActiveSupport::VERSION::MAJOR == 3
@@ -15,6 +17,7 @@ require 'andand'
15
17
  require 'log4r'
16
18
  require 'fileutils'
17
19
  require 'tmpdir'
20
+ require 'amatch'
18
21
 
19
22
  require 'data_miner/attribute'
20
23
  require 'data_miner/configuration'
@@ -11,6 +11,7 @@ module DataMiner
11
11
  :to_units,
12
12
  :static,
13
13
  :dictionary,
14
+ :matcher,
14
15
  :field_name,
15
16
  :delimiter,
16
17
  :split,
@@ -72,7 +73,12 @@ module DataMiner
72
73
  value
73
74
  end
74
75
 
76
+ def match_row(row)
77
+ matcher.lookup row
78
+ end
79
+
75
80
  def value_from_row(row)
81
+ return match_row row if wants_matcher?
76
82
  value = value_in_source row
77
83
  return value if value.is_a? ActiveRecord::Base # carry through trapdoor
78
84
  value = value_in_dictionary value if wants_dictionary?
@@ -159,6 +165,9 @@ module DataMiner
159
165
  def wants_dictionary?
160
166
  options[:dictionary].present?
161
167
  end
168
+ def wants_matcher?
169
+ options[:matcher].present?
170
+ end
162
171
 
163
172
  # Options that always have values
164
173
  def field_name
@@ -209,5 +218,8 @@ module DataMiner
209
218
  def dictionary
210
219
  @_dictionary ||= Dictionary.new options[:dictionary]
211
220
  end
221
+ def matcher
222
+ @_matcher ||= options[:matcher].new
223
+ end
212
224
  end
213
225
  end
@@ -16,7 +16,7 @@ module DataMiner
16
16
  @position_in_run = position_in_run
17
17
  @description = description
18
18
  @errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
19
- @table = RemoteTable.new(options.slice(:url, :filename, :form_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop, :encoding, :compression, :glob))
19
+ @table = RemoteTable.new options
20
20
  end
21
21
 
22
22
  def inspect
@@ -940,9 +940,91 @@ class T100FlightSegment < ActiveRecord::Base
940
940
  end
941
941
  end
942
942
 
943
+ require 'loose_tight_dictionary'
944
+ class Aircraft < ActiveRecord::Base
945
+ set_primary_key :icao_code
946
+
947
+ def self.bts_dictionary
948
+ @_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
949
+ :tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
950
+ :identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
951
+ :blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
952
+ :left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
953
+ :right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
954
+ end
955
+
956
+ class BtsAircraftTypeCodeMatcher
957
+ def lookup(left_record)
958
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
959
+ right_record['Aircraft Type'] if right_record
960
+ end
961
+ end
962
+
963
+ class BtsNameMatcher
964
+ def lookup(left_record)
965
+ right_record = Aircraft.bts_dictionary.left_to_right left_record
966
+ right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
967
+ end
968
+ end
969
+
970
+ class << self
971
+ # for errata
972
+ def is_not_attributed_to_aerospatiale?(row)
973
+ not row['Manufacturer'] =~ /AEROSPATIALE/i
974
+ end
975
+
976
+ def is_not_attributed_to_cessna?(row)
977
+ not row['Manufacturer'] =~ /CESSNA/i
978
+ end
979
+
980
+ def is_not_attributed_to_learjet?(row)
981
+ not row['Manufacturer'] =~ /LEAR/i
982
+ end
983
+
984
+ def is_not_attributed_to_dehavilland?(row)
985
+ not row['Manufacturer'] =~ /DE ?HAVILLAND/i
986
+ end
987
+
988
+ def is_not_attributed_to_mcdonnell_douglas?(row)
989
+ not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
990
+ end
991
+
992
+ def is_not_a_dc_plane?(row)
993
+ not row['Model'] =~ /DC/i
994
+ end
995
+
996
+ def is_a_crj_900?(row)
997
+ row['Designator'].downcase == 'crj9'
998
+ end
999
+ end
1000
+
1001
+ data_miner do
1002
+ # ('A'..'Z').each do |letter|
1003
+ # Note: for the purposes of testing, only importing "D"
1004
+ %w{ D }.each do |letter|
1005
+ import("ICAO codes starting with letter #{letter} used by the FAA",
1006
+ :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
1007
+ :encoding => 'US-ASCII',
1008
+ :row_xpath => '//table/tr[2]/td/table/tr',
1009
+ :errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
1010
+ :column_xpath => 'td') do
1011
+ key 'icao_code', :field_name => 'Designator'
1012
+ store 'bts_name', :matcher => Aircraft::BtsNameMatcher
1013
+ store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher
1014
+ store 'manufacturer_name', :field_name => 'Manufacturer'
1015
+ store 'name', :field_name => 'Model'
1016
+ end
1017
+ end
1018
+ end
1019
+ end
1020
+
943
1021
  # todo: have somebody properly organize these
944
1022
  class DataMinerTest < Test::Unit::TestCase
945
1023
  if ENV['NEW'] == 'true'
1024
+ should "mine aircraft" do
1025
+ Aircraft.run_data_miner!
1026
+ assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
1027
+ end
946
1028
  end
947
1029
 
948
1030
  if ENV['FAST'] == 'true'
@@ -14,6 +14,10 @@ ActiveRecord::Base.establish_connection(
14
14
  'password' => ''
15
15
  )
16
16
 
17
+ ActiveSupport::Inflector.inflections do |inflect|
18
+ inflect.uncountable 'aircraft'
19
+ end
20
+
17
21
  class Test::Unit::TestCase
18
22
  end
19
23
 
@@ -269,6 +273,26 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
269
273
  t.integer 'data_miner_last_run_id'
270
274
  end
271
275
  execute "ALTER TABLE residential_energy_consumption_survey_responses ADD PRIMARY KEY (department_of_energy_identifier);"
276
+
277
+ create_table 'aircraft', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
278
+ t.string 'icao_code'
279
+ t.string 'manufacturer_name'
280
+ t.string 'name'
281
+
282
+ t.string "bts_name"
283
+ t.string "bts_aircraft_type_code"
284
+
285
+ # t.string 'brighter_planet_aircraft_class_code'
286
+ # t.float 'm3'
287
+ # t.float 'm2'
288
+ # t.float 'm1'
289
+ # t.float 'endpoint_fuel'
290
+ t.datetime 'updated_at'
291
+ t.datetime 'created_at'
292
+ t.integer 'data_miner_touch_count'
293
+ t.integer 'data_miner_last_run_id'
294
+ end
295
+ execute 'ALTER TABLE aircraft ADD PRIMARY KEY (icao_code);'
272
296
  end
273
297
 
274
298
  DataMiner::Run.create_tables
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 21
9
- version: 0.4.21
8
+ - 22
9
+ version: 0.4.22
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-04-16 00:00:00 -04:00
18
+ date: 2010-04-28 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -28,8 +28,8 @@ dependencies:
28
28
  segments:
29
29
  - 0
30
30
  - 2
31
- - 10
32
- version: 0.2.10
31
+ - 17
32
+ version: 0.2.17
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
35
  - !ruby/object:Gem::Dependency
@@ -117,9 +117,23 @@ dependencies:
117
117
  type: :runtime
118
118
  version_requirements: *id007
119
119
  - !ruby/object:Gem::Dependency
120
- name: log4r
120
+ name: text
121
121
  prerelease: false
122
122
  requirement: &id008 !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ segments:
127
+ - 0
128
+ - 2
129
+ - 0
130
+ version: 0.2.0
131
+ type: :runtime
132
+ version_requirements: *id008
133
+ - !ruby/object:Gem::Dependency
134
+ name: log4r
135
+ prerelease: false
136
+ requirement: &id009 !ruby/object:Gem::Requirement
123
137
  requirements:
124
138
  - - ">="
125
139
  - !ruby/object:Gem::Version
@@ -129,7 +143,21 @@ dependencies:
129
143
  - 7
130
144
  version: 1.1.7
131
145
  type: :runtime
132
- version_requirements: *id008
146
+ version_requirements: *id009
147
+ - !ruby/object:Gem::Dependency
148
+ name: loose_tight_dictionary
149
+ prerelease: false
150
+ requirement: &id010 !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ segments:
155
+ - 0
156
+ - 0
157
+ - 3
158
+ version: 0.0.3
159
+ type: :development
160
+ version_requirements: *id010
133
161
  description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
134
162
  email: seamus@abshere.net
135
163
  executables: []