data_miner 0.4.21 → 0.4.22
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +5 -0
- data/Rakefile +3 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +11 -5
- data/lib/data_miner.rb +3 -0
- data/lib/data_miner/attribute.rb +12 -0
- data/lib/data_miner/import.rb +1 -1
- data/test/data_miner_test.rb +82 -0
- data/test/test_helper.rb +24 -0
- metadata +35 -7
data/README.rdoc
CHANGED
@@ -81,6 +81,11 @@ Now you should have
|
|
81
81
|
>> Airport.first.country_name
|
82
82
|
=> "Papua New Guinea"
|
83
83
|
|
84
|
+
==Wishlist
|
85
|
+
|
86
|
+
* each_record do |record| ... which would use find_in_batches
|
87
|
+
* when proxying add_column, rename_column, etc. automatically include the table name
|
88
|
+
|
84
89
|
==Authors
|
85
90
|
|
86
91
|
* Seamus Abshere <seamus@abshere.net>
|
data/Rakefile
CHANGED
@@ -10,14 +10,16 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
gem.add_dependency 'remote_table', '>=0.2.
|
13
|
+
gem.add_dependency 'remote_table', '>=0.2.17'
|
14
14
|
gem.add_dependency 'activerecord', '>=2.3.4'
|
15
15
|
gem.add_dependency 'activesupport', '>=2.3.4'
|
16
16
|
gem.add_dependency 'andand', '>=1.3.1'
|
17
17
|
gem.add_dependency 'errata', '>=0.1.7'
|
18
18
|
gem.add_dependency 'conversions', '>=1.4.4'
|
19
19
|
gem.add_dependency 'blockenspiel', '>=0.3.2'
|
20
|
+
gem.add_dependency 'text', '>=0.2.0'
|
20
21
|
gem.add_dependency 'log4r', '>=1.1.7'
|
22
|
+
gem.add_development_dependency "loose_tight_dictionary", ">=0.0.3"
|
21
23
|
gem.require_path = "lib"
|
22
24
|
gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
|
23
25
|
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.22
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.22"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-04-
|
12
|
+
s.date = %q{2010-04-28}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -52,33 +52,39 @@ Gem::Specification.new do |s|
|
|
52
52
|
s.specification_version = 3
|
53
53
|
|
54
54
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
55
|
-
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.
|
55
|
+
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.17"])
|
56
56
|
s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
|
57
57
|
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
|
58
58
|
s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
|
59
59
|
s.add_runtime_dependency(%q<errata>, [">= 0.1.7"])
|
60
60
|
s.add_runtime_dependency(%q<conversions>, [">= 1.4.4"])
|
61
61
|
s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
62
|
+
s.add_runtime_dependency(%q<text>, [">= 0.2.0"])
|
62
63
|
s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
|
64
|
+
s.add_development_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
|
63
65
|
else
|
64
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
66
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.17"])
|
65
67
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
66
68
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
67
69
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
68
70
|
s.add_dependency(%q<errata>, [">= 0.1.7"])
|
69
71
|
s.add_dependency(%q<conversions>, [">= 1.4.4"])
|
70
72
|
s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
73
|
+
s.add_dependency(%q<text>, [">= 0.2.0"])
|
71
74
|
s.add_dependency(%q<log4r>, [">= 1.1.7"])
|
75
|
+
s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
|
72
76
|
end
|
73
77
|
else
|
74
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
78
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.17"])
|
75
79
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
76
80
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
77
81
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
78
82
|
s.add_dependency(%q<errata>, [">= 0.1.7"])
|
79
83
|
s.add_dependency(%q<conversions>, [">= 1.4.4"])
|
80
84
|
s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
85
|
+
s.add_dependency(%q<text>, [">= 0.2.0"])
|
81
86
|
s.add_dependency(%q<log4r>, [">= 1.1.7"])
|
87
|
+
s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
|
82
88
|
end
|
83
89
|
end
|
84
90
|
|
data/lib/data_miner.rb
CHANGED
@@ -2,6 +2,8 @@ require 'active_support'
|
|
2
2
|
require 'active_support/version'
|
3
3
|
%w{
|
4
4
|
active_support/core_ext/array/conversions
|
5
|
+
active_support/core_ext/string/access
|
6
|
+
active_support/core_ext/string/multibyte
|
5
7
|
}.each do |active_support_3_requirement|
|
6
8
|
require active_support_3_requirement
|
7
9
|
end if ActiveSupport::VERSION::MAJOR == 3
|
@@ -15,6 +17,7 @@ require 'andand'
|
|
15
17
|
require 'log4r'
|
16
18
|
require 'fileutils'
|
17
19
|
require 'tmpdir'
|
20
|
+
require 'amatch'
|
18
21
|
|
19
22
|
require 'data_miner/attribute'
|
20
23
|
require 'data_miner/configuration'
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -11,6 +11,7 @@ module DataMiner
|
|
11
11
|
:to_units,
|
12
12
|
:static,
|
13
13
|
:dictionary,
|
14
|
+
:matcher,
|
14
15
|
:field_name,
|
15
16
|
:delimiter,
|
16
17
|
:split,
|
@@ -72,7 +73,12 @@ module DataMiner
|
|
72
73
|
value
|
73
74
|
end
|
74
75
|
|
76
|
+
def match_row(row)
|
77
|
+
matcher.lookup row
|
78
|
+
end
|
79
|
+
|
75
80
|
def value_from_row(row)
|
81
|
+
return match_row row if wants_matcher?
|
76
82
|
value = value_in_source row
|
77
83
|
return value if value.is_a? ActiveRecord::Base # carry through trapdoor
|
78
84
|
value = value_in_dictionary value if wants_dictionary?
|
@@ -159,6 +165,9 @@ module DataMiner
|
|
159
165
|
def wants_dictionary?
|
160
166
|
options[:dictionary].present?
|
161
167
|
end
|
168
|
+
def wants_matcher?
|
169
|
+
options[:matcher].present?
|
170
|
+
end
|
162
171
|
|
163
172
|
# Options that always have values
|
164
173
|
def field_name
|
@@ -209,5 +218,8 @@ module DataMiner
|
|
209
218
|
def dictionary
|
210
219
|
@_dictionary ||= Dictionary.new options[:dictionary]
|
211
220
|
end
|
221
|
+
def matcher
|
222
|
+
@_matcher ||= options[:matcher].new
|
223
|
+
end
|
212
224
|
end
|
213
225
|
end
|
data/lib/data_miner/import.rb
CHANGED
@@ -16,7 +16,7 @@ module DataMiner
|
|
16
16
|
@position_in_run = position_in_run
|
17
17
|
@description = description
|
18
18
|
@errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
|
19
|
-
@table = RemoteTable.new
|
19
|
+
@table = RemoteTable.new options
|
20
20
|
end
|
21
21
|
|
22
22
|
def inspect
|
data/test/data_miner_test.rb
CHANGED
@@ -940,9 +940,91 @@ class T100FlightSegment < ActiveRecord::Base
|
|
940
940
|
end
|
941
941
|
end
|
942
942
|
|
943
|
+
require 'loose_tight_dictionary'
|
944
|
+
class Aircraft < ActiveRecord::Base
|
945
|
+
set_primary_key :icao_code
|
946
|
+
|
947
|
+
def self.bts_dictionary
|
948
|
+
@_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
|
949
|
+
:tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
|
950
|
+
:identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
|
951
|
+
:blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
|
952
|
+
:left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
|
953
|
+
:right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
954
|
+
end
|
955
|
+
|
956
|
+
class BtsAircraftTypeCodeMatcher
|
957
|
+
def lookup(left_record)
|
958
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
959
|
+
right_record['Aircraft Type'] if right_record
|
960
|
+
end
|
961
|
+
end
|
962
|
+
|
963
|
+
class BtsNameMatcher
|
964
|
+
def lookup(left_record)
|
965
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
966
|
+
right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
|
967
|
+
end
|
968
|
+
end
|
969
|
+
|
970
|
+
class << self
|
971
|
+
# for errata
|
972
|
+
def is_not_attributed_to_aerospatiale?(row)
|
973
|
+
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
974
|
+
end
|
975
|
+
|
976
|
+
def is_not_attributed_to_cessna?(row)
|
977
|
+
not row['Manufacturer'] =~ /CESSNA/i
|
978
|
+
end
|
979
|
+
|
980
|
+
def is_not_attributed_to_learjet?(row)
|
981
|
+
not row['Manufacturer'] =~ /LEAR/i
|
982
|
+
end
|
983
|
+
|
984
|
+
def is_not_attributed_to_dehavilland?(row)
|
985
|
+
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
986
|
+
end
|
987
|
+
|
988
|
+
def is_not_attributed_to_mcdonnell_douglas?(row)
|
989
|
+
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
990
|
+
end
|
991
|
+
|
992
|
+
def is_not_a_dc_plane?(row)
|
993
|
+
not row['Model'] =~ /DC/i
|
994
|
+
end
|
995
|
+
|
996
|
+
def is_a_crj_900?(row)
|
997
|
+
row['Designator'].downcase == 'crj9'
|
998
|
+
end
|
999
|
+
end
|
1000
|
+
|
1001
|
+
data_miner do
|
1002
|
+
# ('A'..'Z').each do |letter|
|
1003
|
+
# Note: for the purposes of testing, only importing "D"
|
1004
|
+
%w{ D }.each do |letter|
|
1005
|
+
import("ICAO codes starting with letter #{letter} used by the FAA",
|
1006
|
+
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
1007
|
+
:encoding => 'US-ASCII',
|
1008
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
1009
|
+
:errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
1010
|
+
:column_xpath => 'td') do
|
1011
|
+
key 'icao_code', :field_name => 'Designator'
|
1012
|
+
store 'bts_name', :matcher => Aircraft::BtsNameMatcher
|
1013
|
+
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher
|
1014
|
+
store 'manufacturer_name', :field_name => 'Manufacturer'
|
1015
|
+
store 'name', :field_name => 'Model'
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
end
|
1020
|
+
|
943
1021
|
# todo: have somebody properly organize these
|
944
1022
|
class DataMinerTest < Test::Unit::TestCase
|
945
1023
|
if ENV['NEW'] == 'true'
|
1024
|
+
should "mine aircraft" do
|
1025
|
+
Aircraft.run_data_miner!
|
1026
|
+
assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
1027
|
+
end
|
946
1028
|
end
|
947
1029
|
|
948
1030
|
if ENV['FAST'] == 'true'
|
data/test/test_helper.rb
CHANGED
@@ -14,6 +14,10 @@ ActiveRecord::Base.establish_connection(
|
|
14
14
|
'password' => ''
|
15
15
|
)
|
16
16
|
|
17
|
+
ActiveSupport::Inflector.inflections do |inflect|
|
18
|
+
inflect.uncountable 'aircraft'
|
19
|
+
end
|
20
|
+
|
17
21
|
class Test::Unit::TestCase
|
18
22
|
end
|
19
23
|
|
@@ -269,6 +273,26 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
269
273
|
t.integer 'data_miner_last_run_id'
|
270
274
|
end
|
271
275
|
execute "ALTER TABLE residential_energy_consumption_survey_responses ADD PRIMARY KEY (department_of_energy_identifier);"
|
276
|
+
|
277
|
+
create_table 'aircraft', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
278
|
+
t.string 'icao_code'
|
279
|
+
t.string 'manufacturer_name'
|
280
|
+
t.string 'name'
|
281
|
+
|
282
|
+
t.string "bts_name"
|
283
|
+
t.string "bts_aircraft_type_code"
|
284
|
+
|
285
|
+
# t.string 'brighter_planet_aircraft_class_code'
|
286
|
+
# t.float 'm3'
|
287
|
+
# t.float 'm2'
|
288
|
+
# t.float 'm1'
|
289
|
+
# t.float 'endpoint_fuel'
|
290
|
+
t.datetime 'updated_at'
|
291
|
+
t.datetime 'created_at'
|
292
|
+
t.integer 'data_miner_touch_count'
|
293
|
+
t.integer 'data_miner_last_run_id'
|
294
|
+
end
|
295
|
+
execute 'ALTER TABLE aircraft ADD PRIMARY KEY (icao_code);'
|
272
296
|
end
|
273
297
|
|
274
298
|
DataMiner::Run.create_tables
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 22
|
9
|
+
version: 0.4.22
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-04-
|
18
|
+
date: 2010-04-28 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -28,8 +28,8 @@ dependencies:
|
|
28
28
|
segments:
|
29
29
|
- 0
|
30
30
|
- 2
|
31
|
-
-
|
32
|
-
version: 0.2.
|
31
|
+
- 17
|
32
|
+
version: 0.2.17
|
33
33
|
type: :runtime
|
34
34
|
version_requirements: *id001
|
35
35
|
- !ruby/object:Gem::Dependency
|
@@ -117,9 +117,23 @@ dependencies:
|
|
117
117
|
type: :runtime
|
118
118
|
version_requirements: *id007
|
119
119
|
- !ruby/object:Gem::Dependency
|
120
|
-
name:
|
120
|
+
name: text
|
121
121
|
prerelease: false
|
122
122
|
requirement: &id008 !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
segments:
|
127
|
+
- 0
|
128
|
+
- 2
|
129
|
+
- 0
|
130
|
+
version: 0.2.0
|
131
|
+
type: :runtime
|
132
|
+
version_requirements: *id008
|
133
|
+
- !ruby/object:Gem::Dependency
|
134
|
+
name: log4r
|
135
|
+
prerelease: false
|
136
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
123
137
|
requirements:
|
124
138
|
- - ">="
|
125
139
|
- !ruby/object:Gem::Version
|
@@ -129,7 +143,21 @@ dependencies:
|
|
129
143
|
- 7
|
130
144
|
version: 1.1.7
|
131
145
|
type: :runtime
|
132
|
-
version_requirements: *
|
146
|
+
version_requirements: *id009
|
147
|
+
- !ruby/object:Gem::Dependency
|
148
|
+
name: loose_tight_dictionary
|
149
|
+
prerelease: false
|
150
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
segments:
|
155
|
+
- 0
|
156
|
+
- 0
|
157
|
+
- 3
|
158
|
+
version: 0.0.3
|
159
|
+
type: :development
|
160
|
+
version_requirements: *id010
|
133
161
|
description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
|
134
162
|
email: seamus@abshere.net
|
135
163
|
executables: []
|