earth 0.12.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -1
- data/Gemfile +0 -5
- data/README.markdown +32 -19
- data/Rakefile +4 -0
- data/bin/earth_tester.rb +66 -54
- data/earth.gemspec +6 -3
- data/lib/earth.rb +67 -114
- data/lib/earth/air.rb +1 -1
- data/lib/earth/air/aircraft.rb +37 -21
- data/lib/earth/air/aircraft/data_miner.rb +0 -4
- data/lib/earth/air/airline.rb +19 -5
- data/lib/earth/air/airline/data_miner.rb +0 -4
- data/lib/earth/air/airport.rb +23 -9
- data/lib/earth/air/airport/data_miner.rb +1 -5
- data/lib/earth/air/bts_aircraft.rb +14 -2
- data/lib/earth/air/flight_distance_class.rb +19 -7
- data/lib/earth/air/flight_distance_class_seat_class.rb +21 -4
- data/lib/earth/air/flight_seat_class.rb +13 -1
- data/lib/earth/air/flight_seat_class/data_miner.rb +2 -0
- data/lib/earth/air/flight_segment.rb +64 -59
- data/lib/earth/air/flight_segment/data_miner.rb +4 -5
- data/lib/earth/all.rb +1 -1
- data/lib/earth/automobile.rb +1 -1
- data/lib/earth/automobile/automobile_activity_year.rb +17 -3
- data/lib/earth/automobile/automobile_activity_year/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_activity_year_type.rb +21 -7
- data/lib/earth/automobile/automobile_activity_year_type/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_activity_year_type_fuel.rb +20 -8
- data/lib/earth/automobile/automobile_activity_year_type_fuel/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_fuel.rb +37 -23
- data/lib/earth/automobile/automobile_fuel/data_miner.rb +3 -5
- data/lib/earth/automobile/automobile_make.rb +17 -3
- data/lib/earth/automobile/automobile_make/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_make_model.rb +29 -14
- data/lib/earth/automobile/automobile_make_model/data_miner.rb +2 -5
- data/lib/earth/automobile/automobile_make_model_year.rb +31 -18
- data/lib/earth/automobile/automobile_make_model_year/data_miner.rb +3 -4
- data/lib/earth/automobile/automobile_make_model_year_variant.rb +43 -31
- data/lib/earth/automobile/automobile_make_model_year_variant/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_make_year.rb +18 -7
- data/lib/earth/automobile/automobile_make_year/data_miner.rb +4 -4
- data/lib/earth/automobile/automobile_make_year_fleet.rb +19 -7
- data/lib/earth/automobile/automobile_make_year_fleet/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_model.rb +13 -1
- data/lib/earth/automobile/automobile_model/data_miner.rb +2 -4
- data/lib/earth/automobile/automobile_size_class.rb +24 -10
- data/lib/earth/automobile/automobile_size_class/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_type_fuel.rb +26 -12
- data/lib/earth/automobile/automobile_type_fuel/data_miner.rb +2 -4
- data/lib/earth/automobile/automobile_type_fuel_control.rb +20 -8
- data/lib/earth/automobile/automobile_type_fuel_control/data_miner.rb +0 -1
- data/lib/earth/automobile/automobile_type_fuel_year.rb +25 -11
- data/lib/earth/automobile/automobile_type_fuel_year/data_miner.rb +2 -4
- data/lib/earth/automobile/automobile_type_fuel_year_control.rb +21 -7
- data/lib/earth/automobile/automobile_type_fuel_year_control/data_miner.rb +0 -4
- data/lib/earth/automobile/automobile_year.rb +13 -1
- data/lib/earth/automobile/automobile_year/data_miner.rb +2 -0
- data/lib/earth/bus.rb +1 -1
- data/lib/earth/bus/bus_class.rb +38 -25
- data/lib/earth/bus/bus_class/data_miner.rb +0 -1
- data/lib/earth/bus/bus_fuel.rb +27 -13
- data/lib/earth/bus/bus_fuel/data_miner.rb +5 -1
- data/lib/earth/bus/bus_fuel_control.rb +19 -7
- data/lib/earth/bus/bus_fuel_control/data_miner.rb +0 -1
- data/lib/earth/bus/bus_fuel_year_control.rb +20 -7
- data/lib/earth/bus/bus_fuel_year_control/data_miner.rb +2 -1
- data/lib/earth/computation.rb +1 -1
- data/lib/earth/computation/computation_carrier.rb +16 -3
- data/lib/earth/computation/computation_carrier/data_miner.rb +0 -1
- data/lib/earth/computation/computation_carrier_instance_class.rb +21 -6
- data/lib/earth/computation/computation_carrier_instance_class/data_miner.rb +0 -1
- data/lib/earth/computation/computation_carrier_region.rb +18 -5
- data/lib/earth/computation/computation_carrier_region/data_miner.rb +0 -1
- data/lib/earth/diet.rb +1 -1
- data/lib/earth/diet/diet_class.rb +25 -13
- data/lib/earth/diet/food_group.rb +18 -7
- data/lib/earth/electricity.rb +1 -1
- data/lib/earth/electricity/electric_market.rb +17 -5
- data/lib/earth/electricity/electric_market/data_miner.rb +0 -2
- data/lib/earth/electricity/electric_utility.rb +22 -7
- data/lib/earth/electricity/electric_utility/data_miner.rb +1 -1
- data/lib/earth/electricity/green_button_adoption.rb +15 -3
- data/lib/earth/fuel.rb +1 -1
- data/lib/earth/fuel/fuel.rb +32 -19
- data/lib/earth/fuel/fuel/data_miner.rb +2 -4
- data/lib/earth/fuel/fuel_price.rb +17 -3
- data/lib/earth/fuel/fuel_type.rb +23 -12
- data/lib/earth/fuel/fuel_year.rb +27 -13
- data/lib/earth/fuel/greenhouse_gas.rb +18 -6
- data/lib/earth/hospitality.rb +1 -1
- data/lib/earth/hospitality/commercial_building_energy_consumption_survey_response.rb +68 -56
- data/lib/earth/hospitality/commercial_building_energy_consumption_survey_response/data_miner.rb +0 -4
- data/lib/earth/hospitality/lodging_class.rb +13 -1
- data/lib/earth/industry.rb +1 -1
- data/lib/earth/industry/cbecs_energy_intensity.rb +43 -32
- data/lib/earth/industry/cbecs_energy_intensity/data_miner.rb +0 -1
- data/lib/earth/industry/industry.rb +20 -2
- data/lib/earth/industry/industry/data_miner.rb +0 -4
- data/lib/earth/industry/industry_product.rb +20 -6
- data/lib/earth/industry/industry_product_line.rb +19 -5
- data/lib/earth/industry/industry_sector.rb +19 -6
- data/lib/earth/industry/mecs_energy.rb +34 -22
- data/lib/earth/industry/mecs_ratio.rb +17 -6
- data/lib/earth/industry/mecs_ratio/data_miner.rb +1 -1
- data/lib/earth/industry/merchant_category.rb +17 -2
- data/lib/earth/industry/merchant_category/data_miner.rb +0 -4
- data/lib/earth/industry/merchant_category_industry.rb +19 -4
- data/lib/earth/industry/naics_2002.rb +20 -3
- data/lib/earth/industry/naics_2002_naics_2007_concordance.rb +19 -4
- data/lib/earth/industry/naics_2002_sic_1987_concordance.rb +19 -4
- data/lib/earth/industry/naics_2007.rb +17 -2
- data/lib/earth/industry/product_line.rb +20 -5
- data/lib/earth/industry/product_line_industry_product.rb +19 -5
- data/lib/earth/industry/sector.rb +18 -5
- data/lib/earth/industry/sic_1987.rb +17 -2
- data/lib/earth/loader.rb +47 -0
- data/lib/earth/locality.rb +1 -1
- data/lib/earth/locality/census_division.rb +32 -13
- data/lib/earth/locality/census_division/data_miner.rb +0 -1
- data/lib/earth/locality/census_region.rb +18 -5
- data/lib/earth/locality/climate_division.rb +21 -6
- data/lib/earth/locality/country.rb +68 -56
- data/lib/earth/locality/country/data_miner.rb +8 -8
- data/lib/earth/locality/egrid_country.rb +22 -10
- data/lib/earth/locality/egrid_region.rb +25 -10
- data/lib/earth/locality/egrid_region/data_miner.rb +0 -4
- data/lib/earth/locality/egrid_subregion.rb +31 -16
- data/lib/earth/locality/egrid_subregion/data_miner.rb +7 -6
- data/lib/earth/locality/electricity_mix.rb +27 -13
- data/lib/earth/locality/electricity_mix/data_miner.rb +8 -8
- data/lib/earth/locality/petroleum_administration_for_defense_district.rb +17 -5
- data/lib/earth/locality/state.rb +36 -16
- data/lib/earth/locality/state/data_miner.rb +0 -4
- data/lib/earth/locality/zip_code.rb +34 -15
- data/lib/earth/locality/zip_code/data_miner.rb +0 -4
- data/lib/earth/model.rb +90 -0
- data/lib/earth/pet.rb +1 -1
- data/lib/earth/pet/breed.rb +19 -4
- data/lib/earth/pet/breed_gender.rb +20 -5
- data/lib/earth/pet/gender.rb +15 -1
- data/lib/earth/pet/species.rb +30 -14
- data/lib/earth/rail.rb +1 -1
- data/lib/earth/rail/country_rail_class.rb +26 -15
- data/lib/earth/rail/country_rail_class/data_miner.rb +0 -1
- data/lib/earth/rail/country_rail_traction.rb +21 -11
- data/lib/earth/rail/country_rail_traction/data_miner.rb +2 -2
- data/lib/earth/rail/country_rail_traction_class.rb +22 -12
- data/lib/earth/rail/country_rail_traction_class/data_miner.rb +2 -2
- data/lib/earth/rail/national_transit_database_company.rb +20 -6
- data/lib/earth/rail/national_transit_database_company/data_miner.rb +0 -1
- data/lib/earth/rail/national_transit_database_mode.rb +15 -3
- data/lib/earth/rail/national_transit_database_record.rb +45 -31
- data/lib/earth/rail/national_transit_database_record/data_miner.rb +0 -1
- data/lib/earth/rail/rail_class.rb +13 -1
- data/lib/earth/rail/rail_company.rb +34 -22
- data/lib/earth/rail/rail_company/data_miner.rb +7 -2
- data/lib/earth/rail/rail_company_traction.rb +21 -10
- data/lib/earth/rail/rail_company_traction/data_miner.rb +0 -1
- data/lib/earth/rail/rail_company_traction_class.rb +22 -11
- data/lib/earth/rail/rail_company_traction_class/data_miner.rb +0 -1
- data/lib/earth/rail/rail_fuel.rb +20 -7
- data/lib/earth/rail/rail_fuel/data_miner.rb +3 -1
- data/lib/earth/rail/rail_traction.rb +13 -1
- data/lib/earth/rail/rail_traction/data_miner.rb +5 -0
- data/lib/earth/residence.rb +1 -1
- data/lib/earth/residence/air_conditioner_use.rb +19 -3
- data/lib/earth/residence/clothes_machine_use.rb +17 -3
- data/lib/earth/residence/dishwasher_use.rb +17 -3
- data/lib/earth/residence/residence_appliance.rb +15 -3
- data/lib/earth/residence/residence_appliance/data_miner.rb +2 -0
- data/lib/earth/residence/residence_class.rb +15 -1
- data/lib/earth/residence/residence_fuel_price.rb +25 -12
- data/lib/earth/residence/residence_fuel_price/data_miner.rb +0 -1
- data/lib/earth/residence/residence_fuel_type.rb +17 -6
- data/lib/earth/residence/residential_energy_consumption_survey_response.rb +110 -91
- data/lib/earth/residence/residential_energy_consumption_survey_response/data_miner.rb +0 -1
- data/lib/earth/residence/urbanity.rb +15 -1
- data/lib/earth/shipping.rb +1 -1
- data/lib/earth/shipping/carrier.rb +24 -8
- data/lib/earth/shipping/carrier_mode.rb +22 -7
- data/lib/earth/shipping/shipment_mode.rb +18 -4
- data/lib/earth/tasks.rb +62 -0
- data/lib/earth/version.rb +1 -1
- data/lib/earth/warnings.rb +12 -0
- data/spec/data_mining_spec.rb +31 -0
- data/spec/earth/air/aircraft_spec.rb +0 -10
- data/spec/earth/air/airline_spec.rb +1 -11
- data/spec/earth/air/airport_spec.rb +0 -10
- data/spec/earth/air/bts_aircraft_spec.rb +0 -10
- data/spec/earth/air/flight_distance_class_spec.rb +0 -10
- data/spec/earth/air/flight_segment_spec.rb +27 -10
- data/spec/earth/automobile/automobile_activity_year_spec.rb +0 -7
- data/spec/earth/automobile/automobile_activity_year_type_fuel_spec.rb +0 -7
- data/spec/earth/automobile/automobile_activity_year_type_spec.rb +0 -7
- data/spec/earth/automobile/automobile_fuel_spec.rb +0 -10
- data/spec/earth/automobile/automobile_make_model_spec.rb +0 -8
- data/spec/earth/automobile/automobile_make_model_year_spec.rb +0 -8
- data/spec/earth/automobile/automobile_make_model_year_variant_spec.rb +0 -8
- data/spec/earth/automobile/automobile_make_spec.rb +0 -11
- data/spec/earth/automobile/automobile_make_year_fleet_spec.rb +0 -11
- data/spec/earth/automobile/automobile_make_year_spec.rb +0 -8
- data/spec/earth/automobile/automobile_model_spec.rb +0 -10
- data/spec/earth/automobile/automobile_size_class_spec.rb +0 -10
- data/spec/earth/automobile/automobile_type_fuel_control_spec.rb +0 -7
- data/spec/earth/automobile/automobile_type_fuel_spec.rb +0 -10
- data/spec/earth/automobile/automobile_type_fuel_year_control_spec.rb +0 -7
- data/spec/earth/automobile/automobile_type_fuel_year_spec.rb +1 -8
- data/spec/earth/automobile/automobile_year_spec.rb +2 -11
- data/spec/earth/bus/bus_fuel_control_spec.rb +0 -10
- data/spec/earth/bus/bus_fuel_spec.rb +0 -10
- data/spec/earth/bus/bus_fuel_year_control_spec.rb +0 -10
- data/spec/earth/electricity/electric_market_spec.rb +0 -10
- data/spec/earth/electricity/electric_utility_spec.rb +0 -10
- data/spec/earth/electricity/green_button_adoption_spec.rb +0 -10
- data/spec/earth/fuel/fuel_spec.rb +1 -10
- data/spec/earth/hospitality/commercial_building_energy_consumption_survey_response_spec.rb +0 -10
- data/spec/earth/hospitality/lodging_class_spec.rb +0 -10
- data/spec/earth/industry/cbecs_energy_intensity_spec.rb +3 -11
- data/spec/earth/industry/industry_spec.rb +0 -10
- data/spec/earth/industry/mecs_energy_spec.rb +0 -9
- data/spec/earth/industry/mecs_ratio_spec.rb +0 -9
- data/spec/earth/industry/merchant_category_spec.rb +2 -7
- data/spec/earth/industry/naics_2002_naics_2007_concordance_spec.rb +1 -12
- data/spec/earth/industry/naics_2002_sic_1987_concordance_spec.rb +3 -13
- data/spec/earth/industry/naics_2002_spec.rb +0 -10
- data/spec/earth/industry/naics_2007_spec.rb +0 -10
- data/spec/earth/industry/sic_1987_spec.rb +0 -10
- data/spec/earth/locality/country_spec.rb +0 -10
- data/spec/earth/locality/egrid_country_spec.rb +0 -11
- data/spec/earth/locality/egrid_region_spec.rb +0 -10
- data/spec/earth/locality/egrid_subregion_spec.rb +0 -10
- data/spec/earth/locality/electricity_mix_spec.rb +0 -10
- data/spec/earth/locality/state_spec.rb +0 -10
- data/spec/earth/locality/zip_code_spec.rb +7 -15
- data/spec/earth/model_spec.rb +27 -0
- data/spec/earth/pet/species_spec.rb +1 -1
- data/spec/earth_spec.rb +1 -30
- data/spec/factories/airline.rb +10 -0
- data/spec/factories/airport.rb +12 -0
- data/spec/factories/flight_segment.rb +38 -0
- data/spec/spec_helper.rb +27 -36
- metadata +249 -223
- data/lib/earth/active_record_base_class_methods.rb +0 -25
- data/lib/earth/air/data_miner.rb +0 -3
- data/lib/earth/automobile/data_miner.rb +0 -3
- data/lib/earth/bus/data_miner.rb +0 -3
- data/lib/earth/computation/data_miner.rb +0 -3
- data/lib/earth/data_miner.rb +0 -3
- data/lib/earth/diet/data_miner.rb +0 -3
- data/lib/earth/fuel/data_miner.rb +0 -3
- data/lib/earth/hospitality/data_miner.rb +0 -3
- data/lib/earth/industry/data_miner.rb +0 -3
- data/lib/earth/locality/data_miner.rb +0 -3
- data/lib/earth/pet/data_miner.rb +0 -3
- data/lib/earth/rail/data_miner.rb +0 -3
- data/lib/earth/residence/data_miner.rb +0 -3
- data/lib/earth/shipping/data_miner.rb +0 -3
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/README.markdown
CHANGED
@@ -1,26 +1,28 @@
|
|
1
1
|
# earth
|
2
2
|
|
3
|
-
Earth is a collection of data models that represent various things found here on Earth, such as
|
3
|
+
Earth is a collection of *data models* that represent various things found here on Earth, such as countries, automobiles, aircraft, zip codes, and pet breeds.
|
4
4
|
|
5
|
-
|
5
|
+
By default the data that these models represent is pulled from [Brighter Planet's open reference data site](http://data.brighterplanet.com) using the [taps gem](http://rubygems.org/gems/taps). The data can also be imported directly from preconfigured authoritative sources.
|
6
6
|
|
7
7
|
## Usage
|
8
8
|
|
9
9
|
``` ruby
|
10
10
|
require 'earth'
|
11
|
-
|
11
|
+
require 'earth/automobile/automobile_fuel'
|
12
|
+
|
13
|
+
Earth.init
|
12
14
|
ft = AutomobileFuel.first
|
13
15
|
# ...
|
14
16
|
```
|
15
17
|
|
16
|
-
`Earth.init`
|
18
|
+
`Earth.init` prepares the environment to load and download data for each data model. You can load all data models at once with `Earth.init :all`. There are several other options to `init` that configure data mining sources and database connections. See the [rdocs](http://rdoc.info/github/brighterplanet/earth) for more details on the Earth module.
|
17
19
|
|
18
|
-
###
|
20
|
+
### Data model categories
|
19
21
|
|
20
22
|
<table>
|
21
23
|
<thead>
|
22
24
|
<tr>
|
23
|
-
<th>
|
25
|
+
<th>Category</th>
|
24
26
|
<th>Models</th>
|
25
27
|
</tr>
|
26
28
|
</thead>
|
@@ -83,15 +85,7 @@ ft = AutomobileFuel.first
|
|
83
85
|
|
84
86
|
### Data storage
|
85
87
|
|
86
|
-
You can store Earth data in any relational database. On your very first run, you will need to create the tables for data each model.
|
87
|
-
|
88
|
-
``` ruby
|
89
|
-
require 'activerecord'
|
90
|
-
ActiveRecord::Base.establish_connection :adapter => ... # Not needed if using Rails
|
91
|
-
|
92
|
-
require 'earth'
|
93
|
-
Earth.init :all, :apply_schemas => true
|
94
|
-
```
|
88
|
+
You can store Earth data in any relational database. On your very first run, you will need to create the tables for data each model. You can either use the Rails standard rake tasks (see below) or with a call to `Earth.reset_schemas!`
|
95
89
|
|
96
90
|
### Pulling data from data.brighterplanet.com
|
97
91
|
|
@@ -99,22 +93,41 @@ By default, Earth will pull data from [data.brighterplanet.com](http://data.brig
|
|
99
93
|
|
100
94
|
``` ruby
|
101
95
|
require 'earth'
|
102
|
-
|
96
|
+
require 'earth/locality/zip_code'
|
97
|
+
|
98
|
+
Earth.init
|
103
99
|
ZipCode.run_data_miner!
|
104
100
|
```
|
105
101
|
|
106
102
|
### Pulling data from the original sources
|
107
103
|
|
108
|
-
If you'd like to bypass the [data.brighterplanet.com](http://data.brighterplanet.com) proxy and pull data directly from authoritative sources (*e.g.,* automobile data from EPA), simply
|
104
|
+
If you'd like to bypass the [data.brighterplanet.com](http://data.brighterplanet.com) proxy and pull data directly from authoritative sources (*e.g.,* automobile data from EPA), simply specify the :mine_original_sources option to `Earth.init`
|
109
105
|
|
110
106
|
``` ruby
|
111
107
|
require 'earth'
|
112
|
-
Earth.init :
|
108
|
+
Earth.init :mine_original_sources => true
|
113
109
|
|
114
|
-
require 'earth/automobile
|
110
|
+
require 'earth/automobile'
|
115
111
|
AutomobileMake.run_data_miner!
|
116
112
|
```
|
117
113
|
|
114
|
+
### Rake tasks
|
115
|
+
|
116
|
+
Earth provides handy rails tasks for creating, migrating, and data mining models whether you're using it from a Rails app or a standalone Ruby app.
|
117
|
+
|
118
|
+
In your Rakefile, add:
|
119
|
+
|
120
|
+
require 'earth/tasks'
|
121
|
+
Earth::Tasks.new
|
122
|
+
|
123
|
+
If you're using Earth outside of Rails, all of the default `rake db:*` tasks will now be available. Within rails, certain tasks are augmented to
|
124
|
+
help manage your Earth models using data_miner and active_record_inline_schema in addition to standard migrations.
|
125
|
+
|
126
|
+
Of note are the following tasks:
|
127
|
+
|
128
|
+
* `rake db:migrate` runs `.create_table!` on each Earth resource model.
|
129
|
+
* `rake db:seed` runs `.run_data_miner!` on each Earth resource model.
|
130
|
+
|
118
131
|
## Collaboration cycle
|
119
132
|
Brighter Planet vigorously encourages collaborative improvement.
|
120
133
|
|
data/Rakefile
CHANGED
data/bin/earth_tester.rb
CHANGED
@@ -16,66 +16,78 @@ if File.exist?(File.join(Dir.pwd, 'earth.gemspec'))
|
|
16
16
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
17
|
end
|
18
18
|
|
19
|
-
require '
|
20
|
-
require 'active_record'
|
19
|
+
require 'thor'
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
'password' => 'password'
|
43
|
-
)
|
44
|
-
end
|
21
|
+
class EarthTester < Thor
|
22
|
+
desc :console, "Get a console"
|
23
|
+
method_option :domains, :type => :array, :default => []
|
24
|
+
method_option :database, :type => :string, :default => 'mysql'
|
25
|
+
method_option :reset, :type => :boolean, :desc => 'Fully reset databases'
|
26
|
+
method_option :mine_original_sources, :type => :boolean, :desc => "Load full data_miner blocks"
|
27
|
+
def console
|
28
|
+
earth_options = options.inject({}) do |memo, (k, v)|
|
29
|
+
k = k.to_sym
|
30
|
+
if [:mine_original_sources].include?(k)
|
31
|
+
memo[k] = v
|
32
|
+
end
|
33
|
+
memo
|
34
|
+
end
|
35
|
+
environment
|
36
|
+
Earth.init(*(options.domains.map(&:to_sym)+[earth_options]))
|
37
|
+
require 'pry'
|
38
|
+
Pry.color = false
|
39
|
+
binding.pry
|
40
|
+
end
|
45
41
|
|
46
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def environment
|
45
|
+
require 'active_support/all'
|
46
|
+
require 'active_record'
|
47
47
|
|
48
|
-
|
48
|
+
# TODO convert to @dkastner's Earth.database_configurations
|
49
|
+
case options.database
|
50
|
+
when /postgr/i
|
51
|
+
if options.reset
|
52
|
+
system %{dropdb test_earth}
|
53
|
+
system %{createdb test_earth}
|
54
|
+
end
|
55
|
+
ActiveRecord::Base.establish_connection(
|
56
|
+
'adapter' => 'postgresql',
|
57
|
+
'encoding' => 'utf8',
|
58
|
+
'database' => 'test_earth',
|
59
|
+
'username' => `whoami`.chomp
|
60
|
+
)
|
61
|
+
when /sqlite/i
|
62
|
+
if options.reset
|
63
|
+
FileUtils.rm_f 'earth_tester.sqlite3'
|
64
|
+
end
|
65
|
+
ActiveRecord::Base.establish_connection('adapter' => 'sqlite3', 'database' => 'earth_tester.sqlite3')
|
66
|
+
when /mysql/i
|
67
|
+
if options.reset
|
68
|
+
system %{mysql -u root -ppassword -e "DROP DATABASE test_earth"}
|
69
|
+
system %{mysql -u root -ppassword -e "CREATE DATABASE test_earth CHARSET utf8"}
|
70
|
+
end
|
71
|
+
ActiveRecord::Base.establish_connection(
|
72
|
+
'adapter' => (RUBY_PLATFORM == 'java' ? 'mysql' : 'mysql2'),
|
73
|
+
'encoding' => 'utf8',
|
74
|
+
'database' => 'test_earth',
|
75
|
+
'username' => 'root',
|
76
|
+
'password' => 'password'
|
77
|
+
)
|
78
|
+
else
|
79
|
+
raise "not sure what database to connect to - #{options.database.inspect}"
|
80
|
+
end
|
49
81
|
|
50
|
-
|
82
|
+
require 'earth'
|
51
83
|
|
52
|
-
|
53
|
-
ActiveRecord::Base.logger.level = Logger::INFO
|
84
|
+
DataMiner.unit_converter = :conversions
|
54
85
|
|
55
|
-
|
56
|
-
resource_model = resource.constantize
|
57
|
-
if (warnings = resource_model.table_warnings).any?
|
58
|
-
$stderr.puts
|
59
|
-
$stderr.puts '#'*50
|
60
|
-
$stderr.puts "# #{resource}"
|
61
|
-
$stderr.puts '#'*50
|
62
|
-
$stderr.puts
|
63
|
-
warnings.each do |warning|
|
64
|
-
$stderr.puts "* #{warning}"
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
def init(domain)
|
70
|
-
Earth.init domain, :load_data_miner => true, :apply_schemas => true
|
71
|
-
end
|
86
|
+
DataMiner::Run.auto_upgrade!
|
72
87
|
|
73
|
-
|
74
|
-
|
88
|
+
ActiveRecord::Base.logger = Logger.new $stdout
|
89
|
+
ActiveRecord::Base.logger.level = Logger::DEBUG
|
90
|
+
end
|
75
91
|
end
|
76
92
|
|
77
|
-
|
78
|
-
Pry.color = false
|
79
|
-
|
80
|
-
# you prob want to init() something
|
81
|
-
binding.pry
|
93
|
+
EarthTester.start
|
data/earth.gemspec
CHANGED
@@ -20,12 +20,11 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.add_runtime_dependency 'activesupport'
|
21
21
|
s.add_runtime_dependency 'cohort_analysis'
|
22
22
|
s.add_runtime_dependency 'conversions'
|
23
|
-
s.add_runtime_dependency 'data_miner', '>=2.
|
23
|
+
s.add_runtime_dependency 'data_miner', '>=2.4.0'
|
24
24
|
s.add_runtime_dependency 'falls_back_on'
|
25
25
|
s.add_runtime_dependency 'fixed_width-multibyte'
|
26
26
|
s.add_runtime_dependency 'fuzzy_match', '>=1.3.3'
|
27
27
|
s.add_runtime_dependency 'geokit'
|
28
|
-
s.add_runtime_dependency 'active_record_inline_schema'
|
29
28
|
s.add_runtime_dependency 'remote_table', '>=2.0.2'
|
30
29
|
s.add_runtime_dependency 'table_warnings', '>=1.0.1'
|
31
30
|
s.add_runtime_dependency 'to_regexp'
|
@@ -35,10 +34,14 @@ Gem::Specification.new do |s|
|
|
35
34
|
s.add_development_dependency 'bundler'
|
36
35
|
s.add_development_dependency 'charisma'
|
37
36
|
s.add_development_dependency 'cucumber'
|
37
|
+
s.add_development_dependency 'factory_girl'
|
38
38
|
s.add_development_dependency 'mysql2' # for bin/earth_tester.rb; use mysql2 for utf-8 compatibility
|
39
39
|
s.add_development_dependency 'pg'
|
40
40
|
s.add_development_dependency 'rake'
|
41
41
|
s.add_development_dependency 'rdoc'
|
42
42
|
s.add_development_dependency 'rspec'
|
43
|
-
s.add_development_dependency '
|
43
|
+
s.add_development_dependency 'sandbox'
|
44
|
+
s.add_development_dependency 'sqlite3'
|
45
|
+
s.add_development_dependency 'thor'
|
46
|
+
s.add_development_dependency 'pry'
|
44
47
|
end
|
data/lib/earth.rb
CHANGED
@@ -1,147 +1,100 @@
|
|
1
1
|
require 'active_support/core_ext'
|
2
|
+
require 'active_support/string_inquirer'
|
2
3
|
require 'active_record'
|
3
4
|
require 'data_miner'
|
4
|
-
require 'falls_back_on'
|
5
5
|
require 'weighted_average'
|
6
6
|
require 'fixed_width'
|
7
7
|
require 'errata'
|
8
|
-
require 'active_record_inline_schema'
|
9
|
-
require 'table_warnings'
|
10
8
|
require 'fuzzy_match'
|
11
9
|
|
12
|
-
require 'earth/utils'
|
13
10
|
require 'earth/conversions_ext'
|
14
11
|
require 'earth/inflectors'
|
12
|
+
require 'earth/loader'
|
13
|
+
require 'earth/model'
|
14
|
+
require 'earth/utils'
|
15
|
+
require 'earth/warnings'
|
15
16
|
|
16
|
-
|
17
|
-
ActiveRecord::Base.extend Earth::ActiveRecordBaseClassMethods
|
18
|
-
|
19
|
-
# The earth module is an interface for loading data models from various domains.
|
17
|
+
# The earth module is an interface for loading data models
|
20
18
|
module Earth
|
21
|
-
TAPS_SOURCE = 'http://carbon:neutral@data.brighterplanet.com:5000'
|
22
|
-
TAPS_DESCRIPTION = "Brighter Planet's reference data web service"
|
23
19
|
VENDOR_DIR = ::File.expand_path '../../vendor', __FILE__
|
24
20
|
LIB_DIR = ::File.expand_path '../earth', __FILE__
|
25
21
|
DATA_DIR = ::File.expand_path '../../data', __FILE__
|
26
22
|
ERRATA_DIR = ::File.expand_path '../../errata', __FILE__
|
27
23
|
|
28
|
-
|
29
|
-
|
30
|
-
if ::File.directory? path
|
31
|
-
::File.basename path
|
32
|
-
end
|
33
|
-
end.compact.uniq.sort
|
34
|
-
end
|
35
|
-
|
36
|
-
def Earth.resources(*search_domains)
|
37
|
-
search_domains = search_domains.flatten.compact.map(&:to_s)
|
38
|
-
if search_domains.empty?
|
39
|
-
search_domains = domains
|
40
|
-
end
|
41
|
-
search_domains.map do |domain|
|
42
|
-
::Dir[::File.join(LIB_DIR, domain, '**', '*.rb')].map do |possible_resource|
|
43
|
-
unless possible_resource.include?('data_miner')
|
44
|
-
::File.basename(possible_resource, '.rb').camelcase
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end.flatten.compact.sort
|
48
|
-
end
|
24
|
+
mattr_accessor :mine_original_sources
|
25
|
+
mattr_accessor :database_configurations
|
49
26
|
|
50
|
-
# Earth.init
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
27
|
+
# Earth.init is the gateway to using Earth. It can load all models at
|
28
|
+
# once, connect to the database using Rails conventions, and set up
|
29
|
+
# the models to pull data from original sources instead of Brighter
|
30
|
+
# Planet's pre-processed data service.
|
54
31
|
#
|
55
|
-
#
|
56
|
-
#
|
32
|
+
# @param [Symbol] load_directive use `:all` to load all models at once (optional)
|
33
|
+
# @param [Hash] options load options
|
34
|
+
# * :mine_original_sources, if true, will load files necessary to data mine from scratch rather than downloading from data.brighterplanet.com. Note that you must run Earth.init before requiring models in order for this option to work properly.
|
35
|
+
# * :connect will connect to the database for you
|
57
36
|
def Earth.init(*args)
|
58
37
|
options = args.extract_options!
|
59
|
-
domains = args
|
60
38
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
elsif domains.include?(:all) or domains.empty?
|
67
|
-
require_all options
|
68
|
-
else
|
69
|
-
domains.each do |domain|
|
70
|
-
require_domain domain, options
|
71
|
-
end
|
72
|
-
end
|
39
|
+
connect if options[:connect]
|
40
|
+
|
41
|
+
Warnings.check_mysql_ansi_mode
|
42
|
+
|
43
|
+
Earth.mine_original_sources = options[:load_data_miner] || options[:mine_original_sources]
|
73
44
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
resource_model.data_miner_script.append_once :process, :run_data_miner_on_parent_associations!
|
81
|
-
end
|
82
|
-
if options[:load_data_miner]
|
83
|
-
resource_model.data_miner_script.prepend_once :process, :auto_upgrade!
|
84
|
-
else
|
85
|
-
resource_model.data_miner_script.prepend_once :tap, TAPS_DESCRIPTION, TAPS_SOURCE
|
86
|
-
end
|
87
|
-
if options[:apply_schemas]
|
88
|
-
resource_model.auto_upgrade!
|
45
|
+
if args.include? :all
|
46
|
+
require 'earth/all'
|
47
|
+
elsif args.length > 0
|
48
|
+
Kernel.warn "Deprecation Warning: `Earth.init :domain` will be removed. Use `require 'earth/domain'` instead"
|
49
|
+
args.each do |argh|
|
50
|
+
require "earth/#{argh}"
|
89
51
|
end
|
90
52
|
end
|
91
53
|
end
|
92
|
-
|
93
|
-
# internal use
|
94
|
-
def Earth.require_related(path)
|
95
|
-
path = ::File.expand_path path
|
96
|
-
raise ::ArgumentError, %{[earth gem] #{path} is not in #{LIB_DIR}} unless path.start_with?(LIB_DIR)
|
97
|
-
domain = %r{#{LIB_DIR}/([^\./]+)}.match(path).captures.first
|
98
|
-
require_domain domain, :load_data_miner => path.include?('data_miner')
|
99
|
-
end
|
100
54
|
|
101
|
-
#
|
102
|
-
|
103
|
-
|
55
|
+
# List the currently loaded data model class names.
|
56
|
+
#
|
57
|
+
# @return [Array] a list of camelized resource names
|
58
|
+
def Earth.resources
|
59
|
+
@resources ||= Earth.resource_models.map(&:to_s).sort
|
104
60
|
end
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
61
|
+
|
62
|
+
# List the currently loaded data model classes
|
63
|
+
#
|
64
|
+
# @return [Array] a list of resource classes
|
65
|
+
def Earth.resource_models
|
66
|
+
Earth::Model.registry
|
110
67
|
end
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
@require_glob << args
|
117
|
-
data_miner_paths = []
|
118
|
-
::Dir[glob].each do |path|
|
119
|
-
if path.include?('data_miner')
|
120
|
-
data_miner_paths << path
|
121
|
-
else
|
122
|
-
require path
|
123
|
-
end
|
124
|
-
end
|
125
|
-
# load data_miner blocks second to make sure they override
|
126
|
-
data_miner_paths.each do |path|
|
127
|
-
require path
|
128
|
-
end if options[:load_data_miner]
|
129
|
-
nil
|
68
|
+
|
69
|
+
# Connect to the database using ActiveRecord's default behavior
|
70
|
+
def Earth.connect
|
71
|
+
ActiveRecord::Base.establish_connection
|
72
|
+
ActiveRecord::Base.connection
|
130
73
|
end
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
74
|
+
|
75
|
+
# The current environment. Earth detects the following environment variables:
|
76
|
+
#
|
77
|
+
# * EARTH_ENV (for CLI apps and daemons)
|
78
|
+
# * RAILS_ENV
|
79
|
+
# * RACK_ENV
|
80
|
+
#
|
81
|
+
# Default is `development`
|
82
|
+
def Earth.env
|
83
|
+
@env ||= ActiveSupport::StringInquirer.new(ENV['EARTH_ENV'] || ENV['RAILS_ENV'] || ENV['RACK_ENV'] || 'development')
|
139
84
|
end
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
85
|
+
|
86
|
+
# Drop and recreate tables for all currently loaded data models.
|
87
|
+
#
|
88
|
+
def Earth.reset_schemas!
|
89
|
+
Earth.resource_models.each(&:create_table!)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Run data miner on all currently loaded data models.
|
93
|
+
#
|
94
|
+
# @note By default, data is mined from data.brighterplanet.com
|
95
|
+
# via taps. In order to mine from scratch, call Earth.init
|
96
|
+
# with the :mine_original_sources option.
|
97
|
+
def Earth.run_data_miner!
|
98
|
+
DataMiner.run(Earth.resources)
|
146
99
|
end
|
147
100
|
end
|