data_miner 0.4.21 → 0.4.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +5 -0
- data/Rakefile +3 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +11 -5
- data/lib/data_miner.rb +3 -0
- data/lib/data_miner/attribute.rb +12 -0
- data/lib/data_miner/import.rb +1 -1
- data/test/data_miner_test.rb +82 -0
- data/test/test_helper.rb +24 -0
- metadata +35 -7
data/README.rdoc
CHANGED
@@ -81,6 +81,11 @@ Now you should have
|
|
81
81
|
>> Airport.first.country_name
|
82
82
|
=> "Papua New Guinea"
|
83
83
|
|
84
|
+
==Wishlist
|
85
|
+
|
86
|
+
* each_record do |record| ... which would use find_in_batches
|
87
|
+
* when proxying add_column, rename_column, etc. automatically include the table name
|
88
|
+
|
84
89
|
==Authors
|
85
90
|
|
86
91
|
* Seamus Abshere <seamus@abshere.net>
|
data/Rakefile
CHANGED
@@ -10,14 +10,16 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
gem.add_dependency 'remote_table', '>=0.2.
|
13
|
+
gem.add_dependency 'remote_table', '>=0.2.17'
|
14
14
|
gem.add_dependency 'activerecord', '>=2.3.4'
|
15
15
|
gem.add_dependency 'activesupport', '>=2.3.4'
|
16
16
|
gem.add_dependency 'andand', '>=1.3.1'
|
17
17
|
gem.add_dependency 'errata', '>=0.1.7'
|
18
18
|
gem.add_dependency 'conversions', '>=1.4.4'
|
19
19
|
gem.add_dependency 'blockenspiel', '>=0.3.2'
|
20
|
+
gem.add_dependency 'text', '>=0.2.0'
|
20
21
|
gem.add_dependency 'log4r', '>=1.1.7'
|
22
|
+
gem.add_development_dependency "loose_tight_dictionary", ">=0.0.3"
|
21
23
|
gem.require_path = "lib"
|
22
24
|
gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
|
23
25
|
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.22
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.22"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-04-
|
12
|
+
s.date = %q{2010-04-28}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -52,33 +52,39 @@ Gem::Specification.new do |s|
|
|
52
52
|
s.specification_version = 3
|
53
53
|
|
54
54
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
55
|
-
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.
|
55
|
+
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.17"])
|
56
56
|
s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
|
57
57
|
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
|
58
58
|
s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
|
59
59
|
s.add_runtime_dependency(%q<errata>, [">= 0.1.7"])
|
60
60
|
s.add_runtime_dependency(%q<conversions>, [">= 1.4.4"])
|
61
61
|
s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
62
|
+
s.add_runtime_dependency(%q<text>, [">= 0.2.0"])
|
62
63
|
s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
|
64
|
+
s.add_development_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
|
63
65
|
else
|
64
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
66
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.17"])
|
65
67
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
66
68
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
67
69
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
68
70
|
s.add_dependency(%q<errata>, [">= 0.1.7"])
|
69
71
|
s.add_dependency(%q<conversions>, [">= 1.4.4"])
|
70
72
|
s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
73
|
+
s.add_dependency(%q<text>, [">= 0.2.0"])
|
71
74
|
s.add_dependency(%q<log4r>, [">= 1.1.7"])
|
75
|
+
s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
|
72
76
|
end
|
73
77
|
else
|
74
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
78
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.17"])
|
75
79
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
76
80
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
77
81
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
78
82
|
s.add_dependency(%q<errata>, [">= 0.1.7"])
|
79
83
|
s.add_dependency(%q<conversions>, [">= 1.4.4"])
|
80
84
|
s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
85
|
+
s.add_dependency(%q<text>, [">= 0.2.0"])
|
81
86
|
s.add_dependency(%q<log4r>, [">= 1.1.7"])
|
87
|
+
s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.3"])
|
82
88
|
end
|
83
89
|
end
|
84
90
|
|
data/lib/data_miner.rb
CHANGED
@@ -2,6 +2,8 @@ require 'active_support'
|
|
2
2
|
require 'active_support/version'
|
3
3
|
%w{
|
4
4
|
active_support/core_ext/array/conversions
|
5
|
+
active_support/core_ext/string/access
|
6
|
+
active_support/core_ext/string/multibyte
|
5
7
|
}.each do |active_support_3_requirement|
|
6
8
|
require active_support_3_requirement
|
7
9
|
end if ActiveSupport::VERSION::MAJOR == 3
|
@@ -15,6 +17,7 @@ require 'andand'
|
|
15
17
|
require 'log4r'
|
16
18
|
require 'fileutils'
|
17
19
|
require 'tmpdir'
|
20
|
+
require 'amatch'
|
18
21
|
|
19
22
|
require 'data_miner/attribute'
|
20
23
|
require 'data_miner/configuration'
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -11,6 +11,7 @@ module DataMiner
|
|
11
11
|
:to_units,
|
12
12
|
:static,
|
13
13
|
:dictionary,
|
14
|
+
:matcher,
|
14
15
|
:field_name,
|
15
16
|
:delimiter,
|
16
17
|
:split,
|
@@ -72,7 +73,12 @@ module DataMiner
|
|
72
73
|
value
|
73
74
|
end
|
74
75
|
|
76
|
+
def match_row(row)
|
77
|
+
matcher.lookup row
|
78
|
+
end
|
79
|
+
|
75
80
|
def value_from_row(row)
|
81
|
+
return match_row row if wants_matcher?
|
76
82
|
value = value_in_source row
|
77
83
|
return value if value.is_a? ActiveRecord::Base # carry through trapdoor
|
78
84
|
value = value_in_dictionary value if wants_dictionary?
|
@@ -159,6 +165,9 @@ module DataMiner
|
|
159
165
|
def wants_dictionary?
|
160
166
|
options[:dictionary].present?
|
161
167
|
end
|
168
|
+
def wants_matcher?
|
169
|
+
options[:matcher].present?
|
170
|
+
end
|
162
171
|
|
163
172
|
# Options that always have values
|
164
173
|
def field_name
|
@@ -209,5 +218,8 @@ module DataMiner
|
|
209
218
|
def dictionary
|
210
219
|
@_dictionary ||= Dictionary.new options[:dictionary]
|
211
220
|
end
|
221
|
+
def matcher
|
222
|
+
@_matcher ||= options[:matcher].new
|
223
|
+
end
|
212
224
|
end
|
213
225
|
end
|
data/lib/data_miner/import.rb
CHANGED
@@ -16,7 +16,7 @@ module DataMiner
|
|
16
16
|
@position_in_run = position_in_run
|
17
17
|
@description = description
|
18
18
|
@errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
|
19
|
-
@table = RemoteTable.new
|
19
|
+
@table = RemoteTable.new options
|
20
20
|
end
|
21
21
|
|
22
22
|
def inspect
|
data/test/data_miner_test.rb
CHANGED
@@ -940,9 +940,91 @@ class T100FlightSegment < ActiveRecord::Base
|
|
940
940
|
end
|
941
941
|
end
|
942
942
|
|
943
|
+
require 'loose_tight_dictionary'
|
944
|
+
class Aircraft < ActiveRecord::Base
|
945
|
+
set_primary_key :icao_code
|
946
|
+
|
947
|
+
def self.bts_dictionary
|
948
|
+
@_dictionary ||= LooseTightDictionary.new RemoteTable.new(:url => 'http://www.bts.gov/programs/airline_information/accounting_and_reporting_directives/csv/number_260.csv', :select => lambda { |record| record['Aircraft Type'].to_i.between?(1, 998) and record['Manufacturer'].present? }),
|
949
|
+
:tightenings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=0&output=csv', :headers => false),
|
950
|
+
:identities => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=3&output=csv', :headers => false),
|
951
|
+
:blockings => RemoteTable.new(:url => 'http://spreadsheets.google.com/pub?key=tiS_6CCDDM_drNphpYwE_iw&single=true&gid=4&output=csv', :headers => false),
|
952
|
+
:left_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Model'] },
|
953
|
+
:right_reader => lambda { |record| record['Manufacturer'] + ' ' + record['Long Name'] }
|
954
|
+
end
|
955
|
+
|
956
|
+
class BtsAircraftTypeCodeMatcher
|
957
|
+
def lookup(left_record)
|
958
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
959
|
+
right_record['Aircraft Type'] if right_record
|
960
|
+
end
|
961
|
+
end
|
962
|
+
|
963
|
+
class BtsNameMatcher
|
964
|
+
def lookup(left_record)
|
965
|
+
right_record = Aircraft.bts_dictionary.left_to_right left_record
|
966
|
+
right_record['Manufacturer'] + ' ' + right_record['Long Name'] if right_record
|
967
|
+
end
|
968
|
+
end
|
969
|
+
|
970
|
+
class << self
|
971
|
+
# for errata
|
972
|
+
def is_not_attributed_to_aerospatiale?(row)
|
973
|
+
not row['Manufacturer'] =~ /AEROSPATIALE/i
|
974
|
+
end
|
975
|
+
|
976
|
+
def is_not_attributed_to_cessna?(row)
|
977
|
+
not row['Manufacturer'] =~ /CESSNA/i
|
978
|
+
end
|
979
|
+
|
980
|
+
def is_not_attributed_to_learjet?(row)
|
981
|
+
not row['Manufacturer'] =~ /LEAR/i
|
982
|
+
end
|
983
|
+
|
984
|
+
def is_not_attributed_to_dehavilland?(row)
|
985
|
+
not row['Manufacturer'] =~ /DE ?HAVILLAND/i
|
986
|
+
end
|
987
|
+
|
988
|
+
def is_not_attributed_to_mcdonnell_douglas?(row)
|
989
|
+
not row['Manufacturer'] =~ /MCDONNELL DOUGLAS/i
|
990
|
+
end
|
991
|
+
|
992
|
+
def is_not_a_dc_plane?(row)
|
993
|
+
not row['Model'] =~ /DC/i
|
994
|
+
end
|
995
|
+
|
996
|
+
def is_a_crj_900?(row)
|
997
|
+
row['Designator'].downcase == 'crj9'
|
998
|
+
end
|
999
|
+
end
|
1000
|
+
|
1001
|
+
data_miner do
|
1002
|
+
# ('A'..'Z').each do |letter|
|
1003
|
+
# Note: for the purposes of testing, only importing "D"
|
1004
|
+
%w{ D }.each do |letter|
|
1005
|
+
import("ICAO codes starting with letter #{letter} used by the FAA",
|
1006
|
+
:url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
|
1007
|
+
:encoding => 'US-ASCII',
|
1008
|
+
:row_xpath => '//table/tr[2]/td/table/tr',
|
1009
|
+
:errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
|
1010
|
+
:column_xpath => 'td') do
|
1011
|
+
key 'icao_code', :field_name => 'Designator'
|
1012
|
+
store 'bts_name', :matcher => Aircraft::BtsNameMatcher
|
1013
|
+
store 'bts_aircraft_type_code', :matcher => Aircraft::BtsAircraftTypeCodeMatcher
|
1014
|
+
store 'manufacturer_name', :field_name => 'Manufacturer'
|
1015
|
+
store 'name', :field_name => 'Model'
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
end
|
1020
|
+
|
943
1021
|
# todo: have somebody properly organize these
|
944
1022
|
class DataMinerTest < Test::Unit::TestCase
|
945
1023
|
if ENV['NEW'] == 'true'
|
1024
|
+
should "mine aircraft" do
|
1025
|
+
Aircraft.run_data_miner!
|
1026
|
+
assert Aircraft.exists? :icao_code => 'DC91', :bts_aircraft_type_code => '630'
|
1027
|
+
end
|
946
1028
|
end
|
947
1029
|
|
948
1030
|
if ENV['FAST'] == 'true'
|
data/test/test_helper.rb
CHANGED
@@ -14,6 +14,10 @@ ActiveRecord::Base.establish_connection(
|
|
14
14
|
'password' => ''
|
15
15
|
)
|
16
16
|
|
17
|
+
ActiveSupport::Inflector.inflections do |inflect|
|
18
|
+
inflect.uncountable 'aircraft'
|
19
|
+
end
|
20
|
+
|
17
21
|
class Test::Unit::TestCase
|
18
22
|
end
|
19
23
|
|
@@ -269,6 +273,26 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
269
273
|
t.integer 'data_miner_last_run_id'
|
270
274
|
end
|
271
275
|
execute "ALTER TABLE residential_energy_consumption_survey_responses ADD PRIMARY KEY (department_of_energy_identifier);"
|
276
|
+
|
277
|
+
create_table 'aircraft', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
278
|
+
t.string 'icao_code'
|
279
|
+
t.string 'manufacturer_name'
|
280
|
+
t.string 'name'
|
281
|
+
|
282
|
+
t.string "bts_name"
|
283
|
+
t.string "bts_aircraft_type_code"
|
284
|
+
|
285
|
+
# t.string 'brighter_planet_aircraft_class_code'
|
286
|
+
# t.float 'm3'
|
287
|
+
# t.float 'm2'
|
288
|
+
# t.float 'm1'
|
289
|
+
# t.float 'endpoint_fuel'
|
290
|
+
t.datetime 'updated_at'
|
291
|
+
t.datetime 'created_at'
|
292
|
+
t.integer 'data_miner_touch_count'
|
293
|
+
t.integer 'data_miner_last_run_id'
|
294
|
+
end
|
295
|
+
execute 'ALTER TABLE aircraft ADD PRIMARY KEY (icao_code);'
|
272
296
|
end
|
273
297
|
|
274
298
|
DataMiner::Run.create_tables
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 22
|
9
|
+
version: 0.4.22
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-04-
|
18
|
+
date: 2010-04-28 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -28,8 +28,8 @@ dependencies:
|
|
28
28
|
segments:
|
29
29
|
- 0
|
30
30
|
- 2
|
31
|
-
-
|
32
|
-
version: 0.2.
|
31
|
+
- 17
|
32
|
+
version: 0.2.17
|
33
33
|
type: :runtime
|
34
34
|
version_requirements: *id001
|
35
35
|
- !ruby/object:Gem::Dependency
|
@@ -117,9 +117,23 @@ dependencies:
|
|
117
117
|
type: :runtime
|
118
118
|
version_requirements: *id007
|
119
119
|
- !ruby/object:Gem::Dependency
|
120
|
-
name:
|
120
|
+
name: text
|
121
121
|
prerelease: false
|
122
122
|
requirement: &id008 !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
segments:
|
127
|
+
- 0
|
128
|
+
- 2
|
129
|
+
- 0
|
130
|
+
version: 0.2.0
|
131
|
+
type: :runtime
|
132
|
+
version_requirements: *id008
|
133
|
+
- !ruby/object:Gem::Dependency
|
134
|
+
name: log4r
|
135
|
+
prerelease: false
|
136
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
123
137
|
requirements:
|
124
138
|
- - ">="
|
125
139
|
- !ruby/object:Gem::Version
|
@@ -129,7 +143,21 @@ dependencies:
|
|
129
143
|
- 7
|
130
144
|
version: 1.1.7
|
131
145
|
type: :runtime
|
132
|
-
version_requirements: *
|
146
|
+
version_requirements: *id009
|
147
|
+
- !ruby/object:Gem::Dependency
|
148
|
+
name: loose_tight_dictionary
|
149
|
+
prerelease: false
|
150
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
segments:
|
155
|
+
- 0
|
156
|
+
- 0
|
157
|
+
- 3
|
158
|
+
version: 0.0.3
|
159
|
+
type: :development
|
160
|
+
version_requirements: *id010
|
133
161
|
description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
|
134
162
|
email: seamus@abshere.net
|
135
163
|
executables: []
|