data_miner 0.3.11 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/test/data_miner_test.rb +132 -170
- data/test/test_helper.rb +27 -22
- metadata +1 -1
    
        data/VERSION
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            0.3. | 
| 1 | 
            +
            0.3.12
         | 
    
        data/data_miner.gemspec
    CHANGED
    
    | @@ -5,7 +5,7 @@ | |
| 5 5 |  | 
| 6 6 | 
             
            Gem::Specification.new do |s|
         | 
| 7 7 | 
             
              s.name = %q{data_miner}
         | 
| 8 | 
            -
              s.version = "0.3. | 
| 8 | 
            +
              s.version = "0.3.12"
         | 
| 9 9 |  | 
| 10 10 | 
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         | 
| 11 11 | 
             
              s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
         | 
    
        data/test/data_miner_test.rb
    CHANGED
    
    | @@ -551,13 +551,12 @@ end | |
| 551 551 |  | 
| 552 552 | 
             
            class Airport < ActiveRecord::Base
         | 
| 553 553 | 
             
              set_primary_key :iata_code
         | 
| 554 | 
            -
              belongs_to :country
         | 
| 554 | 
            +
              # belongs_to :country
         | 
| 555 555 |  | 
| 556 556 | 
             
              data_miner do
         | 
| 557 557 | 
             
                unique_index 'iata_code'
         | 
| 558 558 |  | 
| 559 | 
            -
                 | 
| 560 | 
            -
                import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? }) do |attr|
         | 
| 559 | 
            +
                import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do |attr|
         | 
| 561 560 | 
             
                  attr.store 'name', :field_number => 1
         | 
| 562 561 | 
             
                  attr.store 'city', :field_number => 2
         | 
| 563 562 | 
             
                  attr.store 'country_name', :field_number => 3
         | 
| @@ -580,6 +579,7 @@ class CensusRegion < ActiveRecord::Base | |
| 580 579 | 
             
                end
         | 
| 581 580 |  | 
| 582 581 | 
             
                # pretend this is a different data source
         | 
| 582 | 
            +
                # fake! just for testing purposes
         | 
| 583 583 | 
             
                import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do |attr|
         | 
| 584 584 | 
             
                  attr.store 'name', :field_name => 'Name'
         | 
| 585 585 | 
             
                  attr.store 'number', :field_name => 'Region'
         | 
| @@ -587,28 +587,58 @@ class CensusRegion < ActiveRecord::Base | |
| 587 587 | 
             
              end
         | 
| 588 588 | 
             
            end
         | 
| 589 589 |  | 
| 590 | 
            +
            # smaller than a region
         | 
| 591 | 
            +
            class CensusDivision < ActiveRecord::Base
         | 
| 592 | 
            +
              set_primary_key :number
         | 
| 593 | 
            +
              # belongs_to :census_region
         | 
| 594 | 
            +
              # has_many :states
         | 
| 595 | 
            +
              # has_many :zip_codes, :through => :states
         | 
| 596 | 
            +
              # has_many :climate_divisions, :through => :states
         | 
| 597 | 
            +
              # has_many :residence_survey_responses
         | 
| 598 | 
            +
              
         | 
| 599 | 
            +
              data_miner do
         | 
| 600 | 
            +
                unique_index 'number'
         | 
| 601 | 
            +
                
         | 
| 602 | 
            +
                import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do |attr|
         | 
| 603 | 
            +
                  attr.store 'name', :field_name => 'Name'
         | 
| 604 | 
            +
                  attr.store 'number', :field_name => 'Division'
         | 
| 605 | 
            +
                  attr.store 'census_region_number', :field_name => 'Region'
         | 
| 606 | 
            +
                  attr.store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
         | 
| 607 | 
            +
                end
         | 
| 608 | 
            +
              end
         | 
| 609 | 
            +
            end
         | 
| 610 | 
            +
             | 
| 590 611 | 
             
            class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
         | 
| 591 612 | 
             
              set_primary_key :department_of_energy_identifier
         | 
| 592 613 |  | 
| 593 614 | 
             
              data_miner do
         | 
| 615 | 
            +
                unique_index 'department_of_energy_identifier'
         | 
| 616 | 
            +
                
         | 
| 617 | 
            +
                process 'Define some unit conversions' do
         | 
| 618 | 
            +
                  Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
         | 
| 619 | 
            +
                  Conversions.register :square_feet, :square_metres, 0.09290304
         | 
| 620 | 
            +
                end
         | 
| 621 | 
            +
                
         | 
| 594 622 | 
             
                # conversions are NOT performed here, since we first have to zero out legitimate skips
         | 
| 595 623 | 
             
                # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
         | 
| 596 | 
            -
                import :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do |attr|
         | 
| 597 | 
            -
                  unique_index 'department_of_energy_identifier'
         | 
| 598 | 
            -
                  
         | 
| 624 | 
            +
                import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do |attr|
         | 
| 599 625 | 
             
                  attr.store 'department_of_energy_identifier', :field_name => 'DOEID'
         | 
| 600 626 |  | 
| 601 | 
            -
                  attr.store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 602 | 
            -
                  attr.store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/ | 
| 603 | 
            -
                  attr.store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 604 | 
            -
                  attr.store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 605 | 
            -
                  attr.store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 606 | 
            -
                  attr.store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 607 | 
            -
                  attr.store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 608 | 
            -
                  attr.store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 609 | 
            -
                  attr.store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/ | 
| 627 | 
            +
                  attr.store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
         | 
| 628 | 
            +
                  attr.store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
         | 
| 629 | 
            +
                  attr.store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
         | 
| 630 | 
            +
                  attr.store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/urbrur/urbrur.csv' }
         | 
| 631 | 
            +
                  attr.store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dwashuse/dwashuse.csv' }
         | 
| 632 | 
            +
                  attr.store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usecenac/usecenac.csv' }
         | 
| 633 | 
            +
                  attr.store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
         | 
| 634 | 
            +
                  attr.store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
         | 
| 635 | 
            +
                  attr.store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
         | 
| 610 636 |  | 
| 611 637 | 
             
                  attr.store 'census_division_number', :field_name => 'DIVISION'
         | 
| 638 | 
            +
                  attr.store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
         | 
| 639 | 
            +
                  attr.store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
         | 
| 640 | 
            +
                  attr.store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
         | 
| 641 | 
            +
                  
         | 
| 612 642 | 
             
                  attr.store 'floorspace', :field_name => 'TOTSQFT'
         | 
| 613 643 | 
             
                  attr.store 'residents', :field_name => 'NHSLDMEM'
         | 
| 614 644 | 
             
                  attr.store 'ownership', :field_name => 'KOWNRENT'
         | 
| @@ -657,169 +687,91 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base | |
| 657 687 | 
             
                  attr.store 'outdoor_all_night_gas_lights', :field_name => 'NGASLIGHT'
         | 
| 658 688 | 
             
                end
         | 
| 659 689 |  | 
| 660 | 
            -
                #  | 
| 661 | 
            -
             | 
| 662 | 
            -
                 | 
| 663 | 
            -
             | 
| 664 | 
            -
             | 
| 665 | 
            -
             | 
| 666 | 
            -
             | 
| 667 | 
            -
             | 
| 668 | 
            -
             | 
| 669 | 
            -
             | 
| 670 | 
            -
             | 
| 671 | 
            -
             | 
| 672 | 
            -
             | 
| 673 | 
            -
             | 
| 674 | 
            -
             | 
| 675 | 
            -
             | 
| 676 | 
            -
             | 
| 677 | 
            -
             | 
| 678 | 
            -
             | 
| 679 | 
            -
             | 
| 680 | 
            -
             | 
| 681 | 
            -
             | 
| 682 | 
            -
             | 
| 683 | 
            -
             | 
| 684 | 
            -
             | 
| 685 | 
            -
             | 
| 686 | 
            -
             | 
| 687 | 
            -
             | 
| 688 | 
            -
             | 
| 689 | 
            -
             | 
| 690 | 
            -
             | 
| 691 | 
            -
             | 
| 692 | 
            -
             | 
| 693 | 
            -
             | 
| 694 | 
            -
             | 
| 695 | 
            -
             | 
| 696 | 
            -
             | 
| 697 | 
            -
             | 
| 698 | 
            -
             | 
| 699 | 
            -
             | 
| 700 | 
            -
             | 
| 701 | 
            -
             | 
| 702 | 
            -
             | 
| 703 | 
            -
             | 
| 704 | 
            -
             | 
| 705 | 
            -
                # attr.affect :efficient_lights_on_1_to_4_hours
         | 
| 706 | 
            -
                # attr.affect :lights_on_4_to_12_hours
         | 
| 707 | 
            -
                # attr.affect :efficient_lights_on_4_to_12_hours
         | 
| 708 | 
            -
                # attr.affect :outdoor_all_night_gas_lights
         | 
| 709 | 
            -
                # attr.affect :outdoor_all_night_lights
         | 
| 710 | 
            -
                # # booleans for which legitimate skip is effectively zero
         | 
| 711 | 
            -
                # attr.affect :thermostat_programmability
         | 
| 712 | 
            -
                # attr.affect :detached_1car_garage
         | 
| 713 | 
            -
                # attr.affect :detached_2car_garage
         | 
| 714 | 
            -
                # attr.affect :detached_3car_garage
         | 
| 715 | 
            -
                # attr.affect :attached_1car_garage
         | 
| 716 | 
            -
                # attr.affect :attached_2car_garage
         | 
| 717 | 
            -
                # attr.affect :attached_3car_garage
         | 
| 718 | 
            -
                # attr.affect :heated_garage
         | 
| 719 | 
            -
                def zero_out_legitimate_skips
         | 
| 720 | 
            -
                  max = maximum(attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)")
         | 
| 721 | 
            -
                  if /^9+$/.match(max.to_i.to_s) # the max is all 999's... it must be a LEGITIMATE SKIP
         | 
| 722 | 
            -
                    logger.info "Zeroing #{attr_name} if it's #{max}"
         | 
| 723 | 
            -
                    update_all("#{attr_name} = 0", "#{attr_name} = #{max}")
         | 
| 690 | 
            +
                # Rather than nullify the continuous variables that EIA identifies as LEGITIMATE SKIPS, we convert them to zero
         | 
| 691 | 
            +
                # This makes it easier to derive useful information like "how many rooms does the house have?"
         | 
| 692 | 
            +
                process 'Zero out what the EIA calls "LEGITIMATE SKIPS"' do
         | 
| 693 | 
            +
                  %w{
         | 
| 694 | 
            +
                    annual_energy_from_electricity_for_air_conditioners
         | 
| 695 | 
            +
                    annual_energy_from_electricity_for_clothes_driers
         | 
| 696 | 
            +
                    annual_energy_from_electricity_for_dishwashers
         | 
| 697 | 
            +
                    annual_energy_from_electricity_for_freezers
         | 
| 698 | 
            +
                    annual_energy_from_electricity_for_heating_space
         | 
| 699 | 
            +
                    annual_energy_from_electricity_for_heating_water
         | 
| 700 | 
            +
                    annual_energy_from_electricity_for_other_appliances
         | 
| 701 | 
            +
                    annual_energy_from_electricity_for_refrigerators
         | 
| 702 | 
            +
                    annual_energy_from_fuel_oil_for_appliances
         | 
| 703 | 
            +
                    annual_energy_from_fuel_oil_for_heating_space
         | 
| 704 | 
            +
                    annual_energy_from_fuel_oil_for_heating_water
         | 
| 705 | 
            +
                    annual_energy_from_kerosene
         | 
| 706 | 
            +
                    annual_energy_from_propane_for_appliances
         | 
| 707 | 
            +
                    annual_energy_from_propane_for_heating_space
         | 
| 708 | 
            +
                    annual_energy_from_propane_for_heating_water
         | 
| 709 | 
            +
                    annual_energy_from_natural_gas_for_appliances
         | 
| 710 | 
            +
                    annual_energy_from_natural_gas_for_heating_space
         | 
| 711 | 
            +
                    annual_energy_from_natural_gas_for_heating_water
         | 
| 712 | 
            +
                    annual_energy_from_wood
         | 
| 713 | 
            +
                    lights_on_1_to_4_hours
         | 
| 714 | 
            +
                    lights_on_over_12_hours
         | 
| 715 | 
            +
                    efficient_lights_on_over_12_hours
         | 
| 716 | 
            +
                    efficient_lights_on_1_to_4_hours
         | 
| 717 | 
            +
                    lights_on_4_to_12_hours
         | 
| 718 | 
            +
                    efficient_lights_on_4_to_12_hours
         | 
| 719 | 
            +
                    outdoor_all_night_gas_lights
         | 
| 720 | 
            +
                    outdoor_all_night_lights
         | 
| 721 | 
            +
                    thermostat_programmability
         | 
| 722 | 
            +
                    detached_1car_garage
         | 
| 723 | 
            +
                    detached_2car_garage
         | 
| 724 | 
            +
                    detached_3car_garage
         | 
| 725 | 
            +
                    attached_1car_garage
         | 
| 726 | 
            +
                    attached_2car_garage
         | 
| 727 | 
            +
                    attached_3car_garage
         | 
| 728 | 
            +
                    heated_garage
         | 
| 729 | 
            +
                  }.each do |attr_name|
         | 
| 730 | 
            +
                    max = maximum attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)"
         | 
| 731 | 
            +
                    # if the maximum value of a row is all 999's, then it's a LEGITIMATE SKIP and we should set it to zero
         | 
| 732 | 
            +
                    if /^9+$/.match(max.to_i.to_s)
         | 
| 733 | 
            +
                      update_all "#{attr_name} = 0", "#{attr_name} = #{max}"
         | 
| 734 | 
            +
                    end
         | 
| 724 735 | 
             
                  end
         | 
| 725 736 | 
             
                end
         | 
| 726 | 
            -
             | 
| 727 | 
            -
                 | 
| 728 | 
            -
             | 
| 729 | 
            -
             | 
| 730 | 
            -
             | 
| 731 | 
            -
             | 
| 732 | 
            -
             | 
| 733 | 
            -
             | 
| 734 | 
            -
             | 
| 735 | 
            -
             | 
| 736 | 
            -
             | 
| 737 | 
            -
             | 
| 738 | 
            -
             | 
| 739 | 
            -
             | 
| 740 | 
            -
             | 
| 741 | 
            -
             | 
| 742 | 
            -
             | 
| 743 | 
            -
             | 
| 744 | 
            -
             | 
| 745 | 
            -
             | 
| 746 | 
            -
             | 
| 747 | 
            -
             | 
| 748 | 
            -
             | 
| 737 | 
            +
             | 
| 738 | 
            +
                process 'Convert units to metric after zeroing out LEGITIMATE SKIPS' do
         | 
| 739 | 
            +
                  [
         | 
| 740 | 
            +
                    [ 'floorspace', :square_feet, :square_metres ],
         | 
| 741 | 
            +
                    [ 'annual_energy_from_fuel_oil_for_heating_space', :kbtus, :joules ],
         | 
| 742 | 
            +
                    [ 'annual_energy_from_fuel_oil_for_heating_water', :kbtus, :joules ],
         | 
| 743 | 
            +
                    [ 'annual_energy_from_fuel_oil_for_appliances', :kbtus, :joules ],
         | 
| 744 | 
            +
                    [ 'annual_energy_from_natural_gas_for_heating_space', :kbtus, :joules ],
         | 
| 745 | 
            +
                    [ 'annual_energy_from_natural_gas_for_heating_water', :kbtus, :joules ],
         | 
| 746 | 
            +
                    [ 'annual_energy_from_natural_gas_for_appliances', :kbtus, :joules ],
         | 
| 747 | 
            +
                    [ 'annual_energy_from_propane_for_heating_space', :kbtus, :joules ],
         | 
| 748 | 
            +
                    [ 'annual_energy_from_propane_for_heating_water', :kbtus, :joules ],
         | 
| 749 | 
            +
                    [ 'annual_energy_from_propane_for_appliances', :kbtus, :joules ],
         | 
| 750 | 
            +
                    [ 'annual_energy_from_wood', :kbtus, :joules ],
         | 
| 751 | 
            +
                    [ 'annual_energy_from_kerosene', :kbtus, :joules ],
         | 
| 752 | 
            +
                    [ 'annual_energy_from_electricity_for_clothes_driers', :kbtus, :joules ],
         | 
| 753 | 
            +
                    [ 'annual_energy_from_electricity_for_dishwashers', :kbtus, :joules ],
         | 
| 754 | 
            +
                    [ 'annual_energy_from_electricity_for_freezers', :kbtus, :joules ],
         | 
| 755 | 
            +
                    [ 'annual_energy_from_electricity_for_refrigerators', :kbtus, :joules ],
         | 
| 756 | 
            +
                    [ 'annual_energy_from_electricity_for_air_conditioners', :kbtus, :joules ],
         | 
| 757 | 
            +
                    [ 'annual_energy_from_electricity_for_heating_space', :kbtus, :joules ],
         | 
| 758 | 
            +
                    [ 'annual_energy_from_electricity_for_heating_water', :kbtus, :joules ],
         | 
| 759 | 
            +
                    [ 'annual_energy_from_electricity_for_other_appliances', :kbtus, :joules ],
         | 
| 760 | 
            +
                  ].each do |attr_name, from_units, to_units|
         | 
| 761 | 
            +
                    update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
         | 
| 762 | 
            +
                  end
         | 
| 749 763 | 
             
                end
         | 
| 750 | 
            -
                
         | 
| 751 | 
            -
                def derive_rooms
         | 
| 752 | 
            -
                  'total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
         | 
| 753 764 |  | 
| 765 | 
            +
                process 'Add a new field "rooms" that estimates how many rooms are in the house' do
         | 
| 766 | 
            +
                  update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
         | 
| 754 767 | 
             
                end
         | 
| 755 768 |  | 
| 756 | 
            -
                 | 
| 757 | 
            -
                  '2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
         | 
| 769 | 
            +
                process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
         | 
| 770 | 
            +
                  update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
         | 
| 758 771 | 
             
                end
         | 
| 759 772 |  | 
| 760 | 
            -
                 | 
| 761 | 
            -
             | 
| 762 | 
            -
                  '(2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
         | 
| 763 | 
            -
                end
         | 
| 764 | 
            -
                
         | 
| 765 | 
            -
                def derive_residence_air_conditioner_use_id
         | 
| 766 | 
            -
                  find_in_batches do |batch|
         | 
| 767 | 
            -
                    batch.each do |record|
         | 
| 768 | 
            -
                      ce = record.usecenac.to_i
         | 
| 769 | 
            -
                      ww = record.usewwac.to_i
         | 
| 770 | 
            -
                      if ce == 3 or ww == 3
         | 
| 771 | 
            -
                        selector = 3
         | 
| 772 | 
            -
                      elsif ce == 2 or ww == 2
         | 
| 773 | 
            -
                        selector = 2
         | 
| 774 | 
            -
                      elsif ce == 1 or ww == 1
         | 
| 775 | 
            -
                        selector = 1
         | 
| 776 | 
            -
                      elsif ce == 0 or ww == 0
         | 
| 777 | 
            -
                        selector = 0
         | 
| 778 | 
            -
                      elsif ce == 9 or ww == 9
         | 
| 779 | 
            -
                        selector = 9
         | 
| 780 | 
            -
                      else
         | 
| 781 | 
            -
                        raise "something's wrong. usecenac => #{ce}, usewwac => #{ww}"
         | 
| 782 | 
            -
                      end
         | 
| 783 | 
            -
                      record.air_conditioner_use = ResidenceAirConditionerUse.find_by_code(selector)
         | 
| 784 | 
            -
                      record.save if record.changed?
         | 
| 785 | 
            -
                    end
         | 
| 786 | 
            -
                  end
         | 
| 787 | 
            -
                end
         | 
| 788 | 
            -
             | 
| 789 | 
            -
                def derive_residence_clothes_drier_use_id
         | 
| 790 | 
            -
                  find_in_batches do |batch|
         | 
| 791 | 
            -
                    batch.each do |record|
         | 
| 792 | 
            -
                      dr = record.dryruse.to_i
         | 
| 793 | 
            -
                      wa = record.washload.to_i
         | 
| 794 | 
            -
                      selector = case dr
         | 
| 795 | 
            -
                      when 9
         | 
| 796 | 
            -
                        9
         | 
| 797 | 
            -
                      when 1
         | 
| 798 | 
            -
                        wa
         | 
| 799 | 
            -
                      when 2
         | 
| 800 | 
            -
                        if wa == 9
         | 
| 801 | 
            -
                          9
         | 
| 802 | 
            -
                        elsif [ 2, 3, 4, 5 ].include?(wa)
         | 
| 803 | 
            -
                          wa - 1
         | 
| 804 | 
            -
                        else
         | 
| 805 | 
            -
                          1
         | 
| 806 | 
            -
                        end
         | 
| 807 | 
            -
                      when 3
         | 
| 808 | 
            -
                        if wa == 9
         | 
| 809 | 
            -
                          9
         | 
| 810 | 
            -
                        elsif [ 3, 4, 5 ].include?(wa)
         | 
| 811 | 
            -
                          wa - 2
         | 
| 812 | 
            -
                        else
         | 
| 813 | 
            -
                          1
         | 
| 814 | 
            -
                        end
         | 
| 815 | 
            -
                      else
         | 
| 816 | 
            -
                        raise "A something's wrong. dryruse => #{dr}, washload => #{wa}"
         | 
| 817 | 
            -
                      end
         | 
| 818 | 
            -
                      record.clothes_drier_use = ResidenceClothesDrierUse.find_by_code(selector)
         | 
| 819 | 
            -
                      raise "B something's wrong. dryruse => #{dr}, washload => #{wa}" if record.clothes_drier_use.nil?
         | 
| 820 | 
            -
                      record.save if record.changed?
         | 
| 821 | 
            -
                    end
         | 
| 822 | 
            -
                  end
         | 
| 773 | 
            +
                process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
         | 
| 774 | 
            +
                  update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
         | 
| 823 775 | 
             
                end
         | 
| 824 776 | 
             
              end
         | 
| 825 777 | 
             
            end
         | 
| @@ -827,6 +779,16 @@ end | |
| 827 779 | 
             
            # todo: have somebody properly organize these
         | 
| 828 780 | 
             
            class DataMinerTest < Test::Unit::TestCase
         | 
| 829 781 | 
             
              if ENV['FAST'] == 'true'
         | 
| 782 | 
            +
                should "import airports" do
         | 
| 783 | 
            +
                  Airport.run_data_miner!
         | 
| 784 | 
            +
                  assert Airport.count > 0
         | 
| 785 | 
            +
                end
         | 
| 786 | 
            +
                
         | 
| 787 | 
            +
                should "pull in census divisions using a data.brighterplanet.com dictionary" do
         | 
| 788 | 
            +
                  CensusDivision.run_data_miner!
         | 
| 789 | 
            +
                  assert CensusDivision.count > 0
         | 
| 790 | 
            +
                end
         | 
| 791 | 
            +
                
         | 
| 830 792 | 
             
                should "have a way to queue up runs that works with delated_job's send_later" do
         | 
| 831 793 | 
             
                  assert AutomobileVariant.respond_to?(:run_data_miner!)
         | 
| 832 794 | 
             
                end
         | 
    
        data/test/test_helper.rb
    CHANGED
    
    | @@ -18,21 +18,19 @@ class Test::Unit::TestCase | |
| 18 18 | 
             
            end
         | 
| 19 19 |  | 
| 20 20 | 
             
            ActiveRecord::Schema.define(:version => 20090819143429) do
         | 
| 21 | 
            -
              create_table  | 
| 22 | 
            -
                t.string | 
| 23 | 
            -
                
         | 
| 24 | 
            -
                t.string    | 
| 25 | 
            -
                t.string    | 
| 26 | 
            -
                t. | 
| 27 | 
            -
                t. | 
| 28 | 
            -
                t. | 
| 29 | 
            -
                t. | 
| 30 | 
            -
                t.datetime "created_at"
         | 
| 31 | 
            -
                t.datetime "updated_at"
         | 
| 21 | 
            +
              create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
         | 
| 22 | 
            +
                t.string   'iata_code'
         | 
| 23 | 
            +
                t.string   'name'
         | 
| 24 | 
            +
                t.string   'city'
         | 
| 25 | 
            +
                t.string   'country_name'
         | 
| 26 | 
            +
                t.float    'latitude'
         | 
| 27 | 
            +
                t.float    'longitude'
         | 
| 28 | 
            +
                t.datetime 'created_at'
         | 
| 29 | 
            +
                t.datetime 'updated_at'
         | 
| 32 30 | 
             
                t.integer 'data_miner_touch_count'
         | 
| 33 31 | 
             
                t.integer 'data_miner_last_run_id'
         | 
| 34 32 | 
             
              end
         | 
| 35 | 
            -
              execute  | 
| 33 | 
            +
              execute 'ALTER TABLE airports ADD PRIMARY KEY (iata_code);'
         | 
| 36 34 |  | 
| 37 35 | 
             
              create_table "countries", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
         | 
| 38 36 | 
             
                t.string   "iso_3166"
         | 
| @@ -45,15 +43,28 @@ ActiveRecord::Schema.define(:version => 20090819143429) do | |
| 45 43 | 
             
              execute "ALTER TABLE countries ADD PRIMARY KEY (iso_3166);"
         | 
| 46 44 |  | 
| 47 45 | 
             
              create_table "census_regions", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
         | 
| 46 | 
            +
                t.integer  "number"
         | 
| 48 47 | 
             
                t.string   "name"
         | 
| 49 48 | 
             
                t.datetime "updated_at"
         | 
| 50 49 | 
             
                t.datetime "created_at"
         | 
| 51 | 
            -
                t.integer  "number"
         | 
| 52 50 | 
             
                t.integer 'data_miner_touch_count'
         | 
| 53 51 | 
             
                t.integer 'data_miner_last_run_id'
         | 
| 54 52 | 
             
              end
         | 
| 55 53 | 
             
              execute "ALTER TABLE census_regions ADD PRIMARY KEY (number);"
         | 
| 56 54 |  | 
| 55 | 
            +
              create_table 'census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
         | 
| 56 | 
            +
                t.integer  'number'
         | 
| 57 | 
            +
                t.string   'name'
         | 
| 58 | 
            +
                t.datetime 'updated_at'
         | 
| 59 | 
            +
                t.datetime 'created_at'
         | 
| 60 | 
            +
                t.string   'census_region_name'
         | 
| 61 | 
            +
                t.integer  'census_region_number'
         | 
| 62 | 
            +
                
         | 
| 63 | 
            +
                t.integer 'data_miner_touch_count'
         | 
| 64 | 
            +
                t.integer 'data_miner_last_run_id'
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
              execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
         | 
| 67 | 
            +
              
         | 
| 57 68 | 
             
              create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
         | 
| 58 69 | 
             
                t.string   "automobile_make_id"
         | 
| 59 70 | 
             
                t.string   "automobile_model_id"
         | 
| @@ -178,17 +189,11 @@ ActiveRecord::Schema.define(:version => 20090819143429) do | |
| 178 189 | 
             
                t.string   "clothes_washer_use"
         | 
| 179 190 | 
             
                t.string   "clothes_dryer_use"
         | 
| 180 191 |  | 
| 181 | 
            -
                # not done
         | 
| 182 192 | 
             
                t.integer "census_division_number"
         | 
| 183 | 
            -
                t.string " | 
| 184 | 
            -
                t. | 
| 185 | 
            -
                 | 
| 193 | 
            +
                t.string "census_division_name"
         | 
| 194 | 
            +
                t.integer "census_region_number"
         | 
| 195 | 
            +
                t.string "census_region_name"
         | 
| 186 196 |  | 
| 187 | 
            -
                # not done
         | 
| 188 | 
            -
                t.integer  "residence_air_conditioner_use_id"
         | 
| 189 | 
            -
                t.integer  "residence_clothes_drier_use_id"
         | 
| 190 | 
            -
                # ---
         | 
| 191 | 
            -
             | 
| 192 197 | 
             
                t.float    "rooms"
         | 
| 193 198 | 
             
                t.float    "floorspace"
         | 
| 194 199 | 
             
                t.integer  "residents"
         |