earth 0.11.7 → 0.11.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (151) hide show
  1. data/README.markdown +30 -0
  2. data/bin/earth_tester.rb +1 -1
  3. data/earth.gemspec +3 -3
  4. data/lib/earth.rb +105 -90
  5. data/lib/earth/air.rb +3 -12
  6. data/lib/earth/air/aircraft.rb +9 -9
  7. data/lib/earth/air/airport.rb +3 -0
  8. data/lib/earth/air/airport/data_miner.rb +2 -0
  9. data/lib/earth/air/data_miner.rb +3 -10
  10. data/lib/earth/air/flight_segment.rb +3 -1
  11. data/lib/earth/air/flight_segment/data_miner.rb +2 -0
  12. data/lib/earth/all.rb +3 -13
  13. data/lib/earth/automobile.rb +3 -18
  14. data/lib/earth/automobile/automobile_fuel.rb +1 -3
  15. data/lib/earth/automobile/automobile_fuel/data_miner.rb +1 -0
  16. data/lib/earth/automobile/automobile_make.rb +1 -0
  17. data/lib/earth/automobile/automobile_make/data_miner.rb +1 -0
  18. data/lib/earth/automobile/automobile_make_model.rb +1 -0
  19. data/lib/earth/automobile/automobile_make_model/data_miner.rb +1 -0
  20. data/lib/earth/automobile/automobile_make_model_year.rb +1 -0
  21. data/lib/earth/automobile/automobile_make_model_year/data_miner.rb +1 -0
  22. data/lib/earth/automobile/automobile_make_model_year_variant.rb +1 -0
  23. data/lib/earth/automobile/automobile_make_model_year_variant/data_miner.rb +1 -0
  24. data/lib/earth/automobile/automobile_make_year.rb +1 -0
  25. data/lib/earth/automobile/automobile_make_year/data_miner.rb +1 -0
  26. data/lib/earth/automobile/automobile_make_year_fleet.rb +1 -0
  27. data/lib/earth/automobile/automobile_make_year_fleet/data_miner.rb +1 -0
  28. data/lib/earth/automobile/automobile_model.rb +1 -0
  29. data/lib/earth/automobile/automobile_model/data_miner.rb +1 -0
  30. data/lib/earth/automobile/automobile_size_class.rb +1 -0
  31. data/lib/earth/automobile/automobile_size_class/data_miner.rb +1 -0
  32. data/lib/earth/automobile/automobile_size_class_year.rb +1 -0
  33. data/lib/earth/automobile/automobile_size_class_year/data_miner.rb +1 -0
  34. data/lib/earth/automobile/automobile_type_fuel_control.rb +1 -0
  35. data/lib/earth/automobile/automobile_type_fuel_control/data_miner.rb +1 -0
  36. data/lib/earth/automobile/automobile_type_fuel_year.rb +1 -0
  37. data/lib/earth/automobile/automobile_type_fuel_year/data_miner.rb +1 -0
  38. data/lib/earth/automobile/automobile_type_fuel_year_age.rb +1 -0
  39. data/lib/earth/automobile/automobile_type_fuel_year_age/data_miner.rb +1 -0
  40. data/lib/earth/automobile/automobile_type_fuel_year_control.rb +1 -0
  41. data/lib/earth/automobile/automobile_type_fuel_year_control/data_miner.rb +1 -0
  42. data/lib/earth/automobile/automobile_type_year.rb +1 -0
  43. data/lib/earth/automobile/automobile_type_year/data_miner.rb +1 -0
  44. data/lib/earth/automobile/automobile_year.rb +1 -0
  45. data/lib/earth/automobile/automobile_year/data_miner.rb +1 -0
  46. data/lib/earth/automobile/data_miner.rb +3 -18
  47. data/lib/earth/bus.rb +3 -6
  48. data/lib/earth/bus/bus_class.rb +1 -0
  49. data/lib/earth/bus/bus_class/data_miner.rb +1 -0
  50. data/lib/earth/bus/bus_fuel.rb +1 -0
  51. data/lib/earth/bus/bus_fuel/data_miner.rb +1 -0
  52. data/lib/earth/bus/bus_fuel_control.rb +1 -0
  53. data/lib/earth/bus/bus_fuel_control/data_miner.rb +1 -0
  54. data/lib/earth/bus/bus_fuel_year_control.rb +1 -0
  55. data/lib/earth/bus/bus_fuel_year_control/data_miner.rb +1 -0
  56. data/lib/earth/bus/data_miner.rb +3 -6
  57. data/lib/earth/computation.rb +3 -5
  58. data/lib/earth/computation/computation_carrier.rb +1 -0
  59. data/lib/earth/computation/computation_carrier/data_miner.rb +1 -1
  60. data/lib/earth/computation/computation_carrier_instance_class.rb +1 -0
  61. data/lib/earth/computation/computation_carrier_instance_class/data_miner.rb +1 -0
  62. data/lib/earth/computation/computation_carrier_region.rb +1 -0
  63. data/lib/earth/computation/computation_carrier_region/data_miner.rb +1 -0
  64. data/lib/earth/computation/data_miner.rb +3 -5
  65. data/lib/earth/data_miner.rb +3 -13
  66. data/lib/earth/diet.rb +3 -2
  67. data/lib/earth/diet/data_miner.rb +3 -2
  68. data/lib/earth/eia.rb +11 -13
  69. data/lib/earth/fuel.rb +3 -5
  70. data/lib/earth/fuel/data_miner.rb +3 -5
  71. data/lib/earth/fuel/fuel_price.rb +1 -1
  72. data/lib/earth/hospitality.rb +3 -5
  73. data/lib/earth/hospitality/commercial_building_energy_consumption_survey_response.rb +2 -0
  74. data/lib/earth/hospitality/commercial_building_energy_consumption_survey_response/data_miner.rb +13 -7
  75. data/lib/earth/hospitality/country_lodging_class.rb +1 -0
  76. data/lib/earth/hospitality/country_lodging_class/data_miner.rb +4 -2
  77. data/lib/earth/hospitality/data_miner.rb +3 -4
  78. data/lib/earth/hospitality/lodging_class.rb +1 -10
  79. data/lib/earth/hospitality/lodging_class/data_miner.rb +7 -30
  80. data/lib/earth/hospitality/lodging_property.rb +1 -0
  81. data/lib/earth/industry.rb +3 -13
  82. data/lib/earth/industry/cbecs_energy_intensity.rb +5 -49
  83. data/lib/earth/industry/cbecs_energy_intensity/data_miner.rb +94 -42
  84. data/lib/earth/industry/data_miner.rb +3 -4
  85. data/lib/earth/industry/industry.rb +1 -0
  86. data/lib/earth/industry/industry/data_miner.rb +1 -0
  87. data/lib/earth/industry/industry_product.rb +1 -0
  88. data/lib/earth/industry/industry_product_line.rb +1 -0
  89. data/lib/earth/industry/industry_sector.rb +1 -0
  90. data/lib/earth/industry/mecs_energy.rb +3 -24
  91. data/lib/earth/industry/mecs_energy/data_miner.rb +34 -11
  92. data/lib/earth/industry/mecs_ratio.rb +6 -24
  93. data/lib/earth/industry/mecs_ratio/data_miner.rb +24 -1
  94. data/lib/earth/locality.rb +3 -15
  95. data/lib/earth/locality/census_division.rb +1 -0
  96. data/lib/earth/locality/census_division/data_miner.rb +1 -0
  97. data/lib/earth/locality/country.rb +3 -0
  98. data/lib/earth/locality/country/data_miner.rb +5 -1
  99. data/lib/earth/locality/data_miner.rb +3 -15
  100. data/lib/earth/locality/egrid_subregion.rb +1 -0
  101. data/lib/earth/locality/egrid_subregion/data_miner.rb +1 -0
  102. data/lib/earth/locality/state/data_miner.rb +11 -11
  103. data/lib/earth/locality/zip_code.rb +6 -0
  104. data/lib/earth/pet.rb +3 -4
  105. data/lib/earth/pet/data_miner.rb +3 -4
  106. data/lib/earth/rail.rb +3 -15
  107. data/lib/earth/rail/country_rail_class.rb +1 -0
  108. data/lib/earth/rail/country_rail_class/data_miner.rb +1 -0
  109. data/lib/earth/rail/country_rail_traction.rb +2 -0
  110. data/lib/earth/rail/country_rail_traction/data_miner.rb +2 -0
  111. data/lib/earth/rail/country_rail_traction_class.rb +2 -0
  112. data/lib/earth/rail/country_rail_traction_class/data_miner.rb +2 -0
  113. data/lib/earth/rail/data_miner.rb +3 -16
  114. data/lib/earth/rail/national_transit_database_company.rb +1 -0
  115. data/lib/earth/rail/national_transit_database_company/data_miner.rb +1 -0
  116. data/lib/earth/rail/national_transit_database_record.rb +1 -0
  117. data/lib/earth/rail/national_transit_database_record/data_miner.rb +1 -0
  118. data/lib/earth/rail/rail_company.rb +2 -0
  119. data/lib/earth/rail/rail_company/data_miner.rb +2 -0
  120. data/lib/earth/rail/rail_company_traction.rb +1 -0
  121. data/lib/earth/rail/rail_company_traction/data_miner.rb +1 -0
  122. data/lib/earth/rail/rail_company_traction_class.rb +1 -0
  123. data/lib/earth/rail/rail_company_traction_class/data_miner.rb +1 -0
  124. data/lib/earth/rail/rail_fuel.rb +1 -0
  125. data/lib/earth/rail/rail_fuel/data_miner.rb +1 -0
  126. data/lib/earth/residence.rb +3 -11
  127. data/lib/earth/residence/data_miner.rb +3 -11
  128. data/lib/earth/residence/residence_fuel_price.rb +1 -0
  129. data/lib/earth/residence/residence_fuel_price/data_miner.rb +1 -0
  130. data/lib/earth/residence/residential_energy_consumption_survey_response.rb +1 -0
  131. data/lib/earth/residence/residential_energy_consumption_survey_response/data_miner.rb +1 -0
  132. data/lib/earth/shipping.rb +3 -3
  133. data/lib/earth/shipping/data_miner.rb +3 -3
  134. data/lib/earth/version.rb +1 -1
  135. data/spec/earth/industry/mecs_energy_spec.rb +18 -0
  136. data/spec/earth_spec.rb +18 -29
  137. data/test/test_aircraft_match.rb +142 -142
  138. data/vendor/geokit-rails/.gitignore +2 -0
  139. data/vendor/geokit-rails/CHANGELOG.rdoc +4 -1
  140. data/vendor/geokit-rails/README.markdown +11 -3
  141. data/vendor/geokit-rails/about.yml +2 -0
  142. data/vendor/geokit-rails/lib/geokit-rails.rb +7 -4
  143. data/vendor/geokit-rails/lib/geokit-rails/acts_as_mappable.rb +15 -2
  144. data/vendor/geokit-rails/lib/geokit-rails/adapters/abstract.rb +1 -1
  145. data/vendor/geokit-rails/lib/geokit-rails/adapters/mysql2.rb +22 -0
  146. data/vendor/geokit-rails/test/database.yml +5 -0
  147. data/vendor/geokit-rails/test/ip_geocode_lookup_test.rb +1 -1
  148. data/vendor/geokit-rails/test/tasks.rake +2 -2
  149. metadata +179 -191
  150. data/lib/earth/hospitality/census_region_lodging_class.rb +0 -16
  151. data/lib/earth/hospitality/census_region_lodging_class/data_miner.rb +0 -40
data/README.markdown CHANGED
@@ -11,6 +11,36 @@ The data that these models represent can be pulled from http://data.brighterplan
11
11
  ft = AutomobileFuel.first
12
12
  ...
13
13
 
14
+ `Earth.init` loads desired "data domains" as well as any supporting classes and plugins that each data model needs. A "data domain" is a grouping of related data models. For instance, all automobile-related data is in the `:automobile` domain.
15
+
16
+ ### Data storage
17
+
18
+ You can store Earth data in any relational database. On your very first run, you will need to create the tables for data each model. This is done using minirecord with the `apply_schemas` option:
19
+
20
+ require 'activerecord'
21
+ ActiveRecord::Base.establish_connection :adapter => ... # Not needed if using Rails
22
+
23
+ require 'earth'
24
+ Earth.init :all, :apply_schemas => true
25
+
26
+ ### Pulling data from data.brighterplanet.com
27
+
28
+ By default, Earth will pull data from data.brighterplanet.com. Simply call `run_data_miner!` on whichever data model class you need. If there are any Earth classes that the chosen class depends on, they will be downloaded as well automatically:
29
+
30
+ require 'earth'
31
+ Earth.init :locality
32
+ ZipCode.run_data_miner!
33
+
34
+ ### Pulling data from the original sources
35
+
36
+ If you'd like to pull data directly from the source, e.g. Automobile data from EPA's sources, simply require the data\_miner file for the desired domain:
37
+
38
+ require 'earth'
39
+ Earth.init :automobile
40
+
41
+ require 'earth/automobile/data_miner'
42
+ AutomobileMake.run_data_miner!
43
+
14
44
  ## Collaboration cycle
15
45
  Brighter Planet vigorously encourages collaborative improvement of its emitter libraries. Collaboration requires a (free) GitHub account.
16
46
 
data/bin/earth_tester.rb CHANGED
@@ -79,7 +79,7 @@ if (resource = ARGV[1].to_s.camelcase).present?
79
79
  show_resource resource
80
80
  else
81
81
  DataMiner.run
82
- Earth.search(domain).each do |resource|
82
+ Earth.resources(domain).each do |resource|
83
83
  show_resource resource
84
84
  end
85
85
  end
data/earth.gemspec CHANGED
@@ -31,11 +31,11 @@ Gem::Specification.new do |s|
31
31
  s.add_runtime_dependency 'to_regexp'
32
32
  s.add_runtime_dependency 'cohort_scope'
33
33
  s.add_runtime_dependency 'table_warnings', '>=0.0.6'
34
- s.add_runtime_dependency 'remote_table', '>=1.2.3'
34
+ s.add_runtime_dependency 'remote_table', '>=1.3.0'
35
35
  s.add_runtime_dependency 'falls_back_on'
36
36
  s.add_runtime_dependency 'fixed_width-multibyte'
37
- s.add_runtime_dependency 'geokit-rails'
38
- s.add_runtime_dependency 'loose_tight_dictionary', '>=0.2.3'
37
+ s.add_runtime_dependency 'geokit'
38
+ s.add_runtime_dependency 'fuzzy_match', '>=1.1.0'
39
39
  s.add_runtime_dependency 'weighted_average', '>=1.0.2'
40
40
  s.add_runtime_dependency 'mini_record-compat' # need https://github.com/DAddYE/mini_record/pull/7
41
41
  s.add_runtime_dependency 'activesupport'
data/lib/earth.rb CHANGED
@@ -10,55 +10,44 @@ require 'fixed_width'
10
10
  require 'errata'
11
11
  require 'mini_record'
12
12
  require 'table_warnings'
13
- require 'loose_tight_dictionary'
14
- require 'loose_tight_dictionary/cached_result'
13
+ require 'fuzzy_match'
14
+ require 'fuzzy_match/cached_result'
15
15
 
16
16
  # The earth module is an interface for loading data models from various domains.
17
17
  module Earth
18
- autoload :Utils, 'earth/utils'
19
-
18
+ extend ::ActiveSupport::Memoizable
20
19
  extend self
21
20
 
22
- # Takes argument like Earth.search(['air'])
23
- # Default is search all domains
24
- # For example, <tt>[ 'Aircraft', 'Airline' ]</tt>
25
- def search(*search_domains)
26
- search_domains = search_domains.empty? ? [:all] : search_domains.flatten.map(&:to_sym)
27
- if search_domains.include? :all
28
- resources
29
- else
30
- resource_map.select do |resource, domain|
31
- search_domains.include? domain.to_sym
32
- end.map do |resource, domain|
33
- resource
21
+ def domains
22
+ ::Dir[::File.join(lib_dir, '*')].map do |path|
23
+ if ::File.directory? path
24
+ ::File.basename path
34
25
  end
35
- end
26
+ end.compact.uniq.sort
36
27
  end
37
-
38
- def gem_root
39
- File.expand_path File.join(File.dirname(__FILE__), '..')
28
+ memoize :domains
29
+
30
+ def resources(*search_domains)
31
+ search_domains = search_domains.flatten.compact.map(&:to_s)
32
+ if search_domains.empty?
33
+ search_domains = domains
34
+ end
35
+ search_domains.map do |domain|
36
+ ::Dir[::File.join(lib_dir, domain, '**', '*.rb')].map do |possible_resource|
37
+ unless possible_resource.include?('data_miner')
38
+ ::File.basename(possible_resource, '.rb').camelcase
39
+ end
40
+ end
41
+ end.flatten.compact.sort
40
42
  end
43
+ memoize :resources
41
44
 
42
- def domains
43
- resource_map.values.uniq.sort
45
+ def vendor_dir
46
+ ::File.expand_path '../../vendor', __FILE__
44
47
  end
45
48
 
46
- def resources
47
- resource_map.keys.sort
48
- end
49
-
50
- def resource_map
51
- @resource_map ||= Dir[File.join(gem_root, 'lib', 'earth', '*')].select do |path|
52
- File.directory? path
53
- end.inject({}) do |memo, domain_path|
54
- Dir[File.join(domain_path, '*.rb')].each do |resource_path|
55
- resource = File.basename(resource_path, '.rb').camelcase
56
- unless resource == 'DataMiner'
57
- memo[resource] = File.basename domain_path
58
- end
59
- end
60
- memo
61
- end
49
+ def lib_dir
50
+ ::File.expand_path '../earth', __FILE__
62
51
  end
63
52
 
64
53
  # Earth.init will load any specified domains, any needed ActiveRecord plugins,
@@ -69,51 +58,94 @@ module Earth
69
58
  # Earth.init should be performed after a connection is made to the database and
70
59
  # before any domain models are referenced.
71
60
  def init(*args)
72
- options = args.last.is_a?(Hash) ? args.pop.symbolize_keys : {}
73
- domains = args.empty? ? [ :all ] : args.map(&:to_sym)
61
+ options = args.extract_options!
62
+ domains = args
63
+
64
+ options[:load_data_miner] = true if options[:apply_schemas]
74
65
 
75
66
  _warn_unless_mysql_ansi_mode
76
67
  _load_plugins
77
- _load_domains domains, options
78
- _decorate_resources options
79
- _load_schemas search(domains), options
68
+
69
+ if domains.include?(:none)
70
+ # don't load anything
71
+ elsif domains.include?(:all) or domains.empty?
72
+ require_all options
73
+ else
74
+ domains.each do |domain|
75
+ require_domain domain, options
76
+ end
77
+ end
78
+
79
+ # be sure to look at both explicitly and implicitly loaded resources
80
+ resources.each do |resource|
81
+ next unless ::Object.const_defined?(resource)
82
+ resource_model = resource.constantize
83
+
84
+ _append_pull_dependencies_step_to_data_miner resource_model
85
+
86
+ if options[:load_data_miner]
87
+ _prepend_auto_upgrade_step_to_data_miner resource_model
88
+ else
89
+ _prepend_taps_step_to_data_miner resource_model
90
+ end
91
+
92
+ if options[:apply_schemas]
93
+ resource_model.auto_upgrade!
94
+ end
95
+ end
96
+ end
97
+
98
+ # internal use
99
+ def require_related(path)
100
+ path = ::File.expand_path path
101
+ raise ::ArgumentError, %{[earth gem] #{path} is not in #{lib_dir}} unless path.start_with?(lib_dir)
102
+ domain = %r{#{lib_dir}/([^\./]+)}.match(path).captures.first
103
+ require_domain domain, :load_data_miner => path.include?('data_miner')
104
+ end
105
+
106
+ # internal use
107
+ def require_all(options = {})
108
+ require_glob ::File.join(lib_dir, '**', '*.rb'), options
80
109
  end
81
110
 
82
111
  private
83
112
 
84
- def _warn_unless_mysql_ansi_mode
85
- if ActiveRecord::Base.connection.adapter_name =~ /mysql/i
86
- sql_mode = ActiveRecord::Base.connection.select_value("SELECT @@GLOBAL.sql_mode") + ActiveRecord::Base.connection.select_value("SELECT @@SESSION.sql_mode")
87
- $stderr.puts "[earth gem] Warning: MySQL detected, but PIPES_AS_CONCAT not set. Importing from scratch will fail. Consider setting sql-mode=ANSI in my.cnf." unless sql_mode =~ /pipes_as_concat/i
88
- end
113
+ def require_domain(domain, options = {})
114
+ require_glob ::File.join(lib_dir, domain.to_s, '**', '*.rb'), options
89
115
  end
90
116
 
91
- # TODO sabshere don't use directories to specify domains
92
- # * you have 20 million data_miner.rb files which are easy to confuse
93
- # * you have to go all over the filesystem to figure things out
94
- def _load_domains(domains, options)
95
- return if domains.include? :none
96
- if domains.empty? or domains.include?(:all)
97
- # sabshere 9/16/10 why maintain this separately?
98
- require 'earth/all'
99
- require 'earth/data_miner' if options[:apply_schemas] or options[:load_data_miner]
100
- else
101
- domains.each do |domain|
102
- require "earth/#{domain}"
103
- require "earth/#{domain}/data_miner" if options[:apply_schemas] or options[:load_data_miner]
117
+ def require_glob(glob, options = {})
118
+ data_miner_paths = []
119
+ ::Dir[glob].each do |path|
120
+ if path.include?('data_miner')
121
+ data_miner_paths << path
122
+ else
123
+ require path
104
124
  end
105
125
  end
126
+ # load data_miner blocks second to make sure they override
127
+ data_miner_paths.each do |path|
128
+ require path
129
+ end if options[:load_data_miner]
130
+ nil
106
131
  end
107
-
132
+ memoize :require_glob
133
+
134
+ def _warn_unless_mysql_ansi_mode
135
+ if ::ActiveRecord::Base.connection.adapter_name =~ /mysql/i
136
+ sql_mode = ::ActiveRecord::Base.connection.select_value("SELECT @@GLOBAL.sql_mode") + ::ActiveRecord::Base.connection.select_value("SELECT @@SESSION.sql_mode")
137
+ $stderr.puts "[earth gem] Warning: MySQL detected, but PIPES_AS_CONCAT not set. Importing from scratch will fail. Consider setting sql-mode=ANSI in my.cnf." unless sql_mode =~ /pipes_as_concat/i
138
+ end
139
+ end
140
+
108
141
  def _load_plugins
109
- Dir[File.join(Earth.gem_root, 'vendor', '**', 'init.rb')].each do |pluginit|
110
- $:.unshift File.join(File.dirname(pluginit), 'lib')
111
- load pluginit
142
+ ::Dir[::File.expand_path('../../vendor/**/init.rb', __FILE__)].each do |pluginit|
143
+ $LOAD_PATH.unshift ::File.join(::File.dirname(pluginit), 'lib')
144
+ ::Kernel.load pluginit
112
145
  end
113
146
  end
114
147
 
115
- def _append_pull_dependencies_step_to_data_miner(resource)
116
- resource_model = resource.constantize
148
+ def _append_pull_dependencies_step_to_data_miner(resource_model)
117
149
  return if resource_model.data_miner_config.steps.any? { |step| step.description == :run_data_miner_on_parent_associations! }
118
150
 
119
151
  pull_dependencies_step = DataMiner::Process.new resource_model.data_miner_config, :run_data_miner_on_parent_associations!
@@ -121,8 +153,7 @@ module Earth
121
153
  resource_model.data_miner_config.steps.push pull_dependencies_step
122
154
  end
123
155
 
124
- def _prepend_auto_upgrade_step_to_data_miner(resource)
125
- resource_model = resource.constantize
156
+ def _prepend_auto_upgrade_step_to_data_miner(resource_model)
126
157
  return if resource_model.data_miner_config.steps.any? { |step| step.description == :auto_upgrade! }
127
158
 
128
159
  auto_upgrade_step = DataMiner::Process.new resource_model.data_miner_config, :auto_upgrade!
@@ -132,31 +163,15 @@ module Earth
132
163
 
133
164
  TAPS_STEP = 'Tap the Brighter Planet data server'
134
165
  TAPS_SOURCE = 'http://carbon:neutral@data.brighterplanet.com:5000'
135
- def _prepend_taps_step_to_data_miner(resource)
136
- resource_model = resource.constantize
166
+ def _prepend_taps_step_to_data_miner(resource_model)
137
167
  return if resource_model.data_miner_config.steps.any? { |step| step.description == TAPS_STEP }
138
168
 
139
169
  taps_step = DataMiner::Tap.new resource_model.data_miner_config, TAPS_STEP, TAPS_SOURCE
140
170
 
141
171
  resource_model.data_miner_config.steps.unshift taps_step
142
172
  end
143
-
144
- def _decorate_resources(options)
145
- resources.each do |resource|
146
- next unless ::Object.const_defined?(resource)
147
- _append_pull_dependencies_step_to_data_miner resource
148
- if options[:apply_schemas] or options[:load_data_miner]
149
- _prepend_auto_upgrade_step_to_data_miner resource
150
- else
151
- _prepend_taps_step_to_data_miner resource
152
- end
153
- end
154
- end
155
-
156
- def _load_schemas(selected_resources, options)
157
- return unless options[:apply_schemas]
158
- selected_resources.each do |resource|
159
- resource.constantize.auto_upgrade!
160
- end
161
- end
162
173
  end
174
+
175
+ require 'earth/utils'
176
+ # TODO move this into a gem or into its own namespace in this gem
177
+ require ::File.join(Earth.vendor_dir, 'clean_find_in_batches', 'init')
data/lib/earth/air.rb CHANGED
@@ -1,12 +1,3 @@
1
- require 'earth/air/aircraft'
2
- require 'earth/air/aircraft_instance'
3
- require 'earth/air/aircraft_instance_seat_class'
4
- require 'earth/air/airline'
5
- require 'earth/air/airport'
6
- require 'earth/air/bts_aircraft'
7
- require 'earth/air/flight_distance_class'
8
- require 'earth/air/flight_distance_class_seat_class'
9
- require 'earth/air/flight_seat_class'
10
- require 'earth/air/flight_segment'
11
-
12
- require 'earth/locality'
1
+ # lib/earth/air.rb
2
+ require 'earth'
3
+ Earth.require_related __FILE__
@@ -2,12 +2,12 @@ class Aircraft < ActiveRecord::Base
2
2
  set_primary_key :icao_code
3
3
 
4
4
  # Fuzzy association with FlightSegment
5
- cache_loose_tight_dictionary_matches_with :flight_segments, :primary_key => :description, :foreign_key => :aircraft_description
5
+ cache_fuzzy_match_with :flight_segments, :primary_key => :description, :foreign_key => :aircraft_description
6
6
 
7
7
  class << self
8
- # set up a loose_tight_dictionary for matching Aircraft description with FlightSegment aircraft_description
9
- def loose_tight_dictionary
10
- @loose_tight_dictionary ||= LooseTightDictionary.new(Aircraft.all,
8
+ # set up a fuzzy_match for matching Aircraft description with FlightSegment aircraft_description
9
+ def fuzzy_match
10
+ @fuzzy_match ||= FuzzyMatch.new(Aircraft.all,
11
11
  :haystack_reader => lambda { |record| record.description },
12
12
  :blockings => RemoteTable.new(:url => 'https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdDlRR2NmdzE2ZjZwTy1ucjh4cWFYOFE&gid=0&output=csv').map { |record| record['blocking'] },
13
13
  :identities => RemoteTable.new(:url => 'https://spreadsheets.google.com/spreadsheet/pub?key=0AoQJbWqPrREqdDlRR2NmdzE2ZjZwTy1ucjh4cWFYOFE&gid=1&output=csv').map { |record| record['identity'] },
@@ -61,7 +61,7 @@ class Aircraft < ActiveRecord::Base
61
61
  # Cache fuzzy matches between FlightSegment aircraft_description and Aircraft description
62
62
  def manually_cache_flight_segments!
63
63
  FlightSegment.run_data_miner!
64
- LooseTightDictionary::CachedResult.setup
64
+ FuzzyMatch::CachedResult.setup
65
65
  connection.select_values("SELECT DISTINCT(aircraft_description) FROM flight_segments WHERE aircraft_description IS NOT NULL").each do |original_description|
66
66
  # If the flight segment's aircraft_description contains '/' then it describes multiple aircraft.
67
67
  # We need to synthesize descriptions for those aircraft, find all Aircraft that fuzzily match the
@@ -86,20 +86,20 @@ class Aircraft < ActiveRecord::Base
86
86
  suffixes.map{ |suffix| root + suffix }.each do |synthesized_description|
87
87
  # Look up the Aircraft that match each synthesized description and associate
88
88
  # them with the original flight segment aircraft_description
89
- Aircraft.loose_tight_dictionary.find_all(synthesized_description).each do |aircraft|
89
+ Aircraft.fuzzy_match.find_all(synthesized_description).each do |aircraft|
90
90
  attrs = {
91
91
  :a_class => "Aircraft",
92
92
  :a => aircraft.description,
93
93
  :b_class => "FlightSegment",
94
94
  :b => original_description
95
95
  }
96
- unless ::LooseTightDictionary::CachedResult.exists? attrs
97
- ::LooseTightDictionary::CachedResult.create! attrs
96
+ unless ::FuzzyMatch::CachedResult.exists? attrs
97
+ ::FuzzyMatch::CachedResult.create! attrs
98
98
  end
99
99
  end
100
100
  end
101
101
  # If the flight segment's aircraft_description doesn't contain '/' we can use
102
- # a method provided by loose_tight_dictionary to associate it with Aircraft
102
+ # a method provided by fuzzy_match to associate it with Aircraft
103
103
  else
104
104
  FlightSegment.find_by_aircraft_description(original_description).cache_aircraft!
105
105
  end
@@ -1,3 +1,6 @@
1
+ require ::File.join(Earth.vendor_dir, 'geokit-rails', 'lib', 'geokit-rails')
2
+ require 'earth/locality'
3
+
1
4
  class Airport < ActiveRecord::Base
2
5
  set_primary_key :iata_code
3
6
 
@@ -1,3 +1,5 @@
1
+ require 'earth/locality/data_miner'
2
+
1
3
  Airport.class_eval do
2
4
  class Airport::Guru
3
5
  def method_missing(method_id, *args, &block)
@@ -1,10 +1,3 @@
1
- require 'earth/air/aircraft/data_miner'
2
- require 'earth/air/airline/data_miner'
3
- require 'earth/air/airport/data_miner'
4
- require 'earth/air/bts_aircraft/data_miner'
5
- require 'earth/air/flight_distance_class/data_miner'
6
- require 'earth/air/flight_distance_class_seat_class/data_miner'
7
- require 'earth/air/flight_seat_class/data_miner'
8
- require 'earth/air/flight_segment/data_miner'
9
-
10
- require 'earth/locality/data_miner'
1
+ # lib/earth/air/data_miner.rb
2
+ require 'earth'
3
+ Earth.require_related __FILE__
@@ -1,3 +1,5 @@
1
+ require 'earth/locality'
2
+
1
3
  class FlightSegment < ActiveRecord::Base
2
4
  set_primary_key :row_hash
3
5
 
@@ -12,7 +14,7 @@ class FlightSegment < ActiveRecord::Base
12
14
  # has_many :destination_city_airports, :foreign_key => 'city', :primary_key => 'destination_airport_city', :class_name => 'Airport'
13
15
 
14
16
  # Enable flight_segment.aircraft
15
- cache_loose_tight_dictionary_matches_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :description
17
+ cache_fuzzy_match_with :aircraft, :primary_key => :aircraft_description, :foreign_key => :description
16
18
 
17
19
  class << self
18
20
  def update_averages!
@@ -1,3 +1,5 @@
1
+ require 'earth/locality/data_miner'
2
+
1
3
  FlightSegment.class_eval do
2
4
  # For import errata
3
5
  class FlightSegment::Guru
data/lib/earth/all.rb CHANGED
@@ -1,13 +1,3 @@
1
- require 'earth/air'
2
- require 'earth/automobile'
3
- require 'earth/bus'
4
- require 'earth/computation'
5
- require 'earth/diet'
6
- require 'earth/fuel'
7
- require 'earth/hospitality'
8
- require 'earth/industry'
9
- require 'earth/locality'
10
- require 'earth/pet'
11
- require 'earth/rail'
12
- require 'earth/residence'
13
- require 'earth/shipping'
1
+ # lib/earth/all.rb
2
+ require 'earth'
3
+ Earth.require_all