data_miner 0.3.11 → 0.3.12

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.11
1
+ 0.3.12
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.3.11"
8
+ s.version = "0.3.12"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -551,13 +551,12 @@ end
551
551
 
552
552
  class Airport < ActiveRecord::Base
553
553
  set_primary_key :iata_code
554
- belongs_to :country
554
+ # belongs_to :country
555
555
 
556
556
  data_miner do
557
557
  unique_index 'iata_code'
558
558
 
559
- # import airport iata_code, name, etc.
560
- import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? }) do |attr|
559
+ import :url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false, :select => lambda { |row| row[4].present? } do |attr|
561
560
  attr.store 'name', :field_number => 1
562
561
  attr.store 'city', :field_number => 2
563
562
  attr.store 'country_name', :field_number => 3
@@ -580,6 +579,7 @@ class CensusRegion < ActiveRecord::Base
580
579
  end
581
580
 
582
581
  # pretend this is a different data source
582
+ # fake! just for testing purposes
583
583
  import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Region'].to_i > 0 and row['Division'].to_s.strip == 'X'} do |attr|
584
584
  attr.store 'name', :field_name => 'Name'
585
585
  attr.store 'number', :field_name => 'Region'
@@ -587,28 +587,58 @@ class CensusRegion < ActiveRecord::Base
587
587
  end
588
588
  end
589
589
 
590
+ # smaller than a region
591
+ class CensusDivision < ActiveRecord::Base
592
+ set_primary_key :number
593
+ # belongs_to :census_region
594
+ # has_many :states
595
+ # has_many :zip_codes, :through => :states
596
+ # has_many :climate_divisions, :through => :states
597
+ # has_many :residence_survey_responses
598
+
599
+ data_miner do
600
+ unique_index 'number'
601
+
602
+ import :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do |attr|
603
+ attr.store 'name', :field_name => 'Name'
604
+ attr.store 'number', :field_name => 'Division'
605
+ attr.store 'census_region_number', :field_name => 'Region'
606
+ attr.store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
607
+ end
608
+ end
609
+ end
610
+
590
611
  class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
591
612
  set_primary_key :department_of_energy_identifier
592
613
 
593
614
  data_miner do
615
+ unique_index 'department_of_energy_identifier'
616
+
617
+ process 'Define some unit conversions' do
618
+ Conversions.register :kbtus, :joules, 1_000.0 * 1_055.05585
619
+ Conversions.register :square_feet, :square_metres, 0.09290304
620
+ end
621
+
594
622
  # conversions are NOT performed here, since we first have to zero out legitimate skips
595
623
  # otherwise you will get values like "999 pounds = 453.138778 kilograms" (where 999 is really a legit skip)
596
- import :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do |attr|
597
- unique_index 'department_of_energy_identifier'
598
-
624
+ import 'RECs 2005 (but not converting units to metric just yet)', :url => 'http://www.eia.doe.gov/emeu/recs/recspubuse05/datafiles/RECS05alldata.csv', :headers => :upcase do |attr|
599
625
  attr.store 'department_of_energy_identifier', :field_name => 'DOEID'
600
626
 
601
- attr.store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/typehuq/typehuq.csv' }
602
- attr.store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/yearmade/yearmade.csv' }
603
- attr.store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/yearmade/yearmade.csv' }
604
- attr.store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/urbrur/urbrur.csv' }
605
- attr.store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/dwashuse/dwashuse.csv' }
606
- attr.store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/usecenac/usecenac.csv' }
607
- attr.store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/usewwac/usewwac.csv' }
608
- attr.store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/washload/washload.csv' }
609
- attr.store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/dryruse/dryruse.csv' }
627
+ attr.store 'residence_class', :field_name => 'TYPEHUQ', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/typehuq/typehuq.csv' }
628
+ attr.store 'construction_year', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Date in the middle (synthetic)', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
629
+ attr.store 'construction_period', :field_name => 'YEARMADE', :dictionary => { :input => 'Code', :sprintf => '%02d', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/yearmade/yearmade.csv' }
630
+ attr.store 'urbanity', :field_name => 'URBRUR', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/urbrur/urbrur.csv' }
631
+ attr.store 'dishwasher_use', :field_name => 'DWASHUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dwashuse/dwashuse.csv' }
632
+ attr.store 'central_ac_use', :field_name => 'USECENAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usecenac/usecenac.csv' }
633
+ attr.store 'window_ac_use', :field_name => 'USEWWAC', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/usewwac/usewwac.csv' }
634
+ attr.store 'clothes_washer_use', :field_name => 'WASHLOAD', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/washload/washload.csv' }
635
+ attr.store 'clothes_dryer_use', :field_name => 'DRYRUSE', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/manually_curated_data/raw/master/dryruse/dryruse.csv' }
610
636
 
611
637
  attr.store 'census_division_number', :field_name => 'DIVISION'
638
+ attr.store 'census_division_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
639
+ attr.store 'census_region_number', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_number', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
640
+ attr.store 'census_region_name', :field_name => 'DIVISION', :dictionary => { :input => 'number', :output => 'census_region_name', :url => 'http://data.brighterplanet.com/census_divisions.csv' }
641
+
612
642
  attr.store 'floorspace', :field_name => 'TOTSQFT'
613
643
  attr.store 'residents', :field_name => 'NHSLDMEM'
614
644
  attr.store 'ownership', :field_name => 'KOWNRENT'
@@ -657,169 +687,91 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
657
687
  attr.store 'outdoor_all_night_gas_lights', :field_name => 'NGASLIGHT'
658
688
  end
659
689
 
660
- # process :zero_out_legitimate_skips
661
-
662
- # process :convert_units_after_zeroing_legitimate_skips
663
-
664
- # process :derive_rooms
665
-
666
- # process :derive_lighting_use
667
-
668
- # process :derive_lighting_efficiency
669
-
670
- # CensusDivision needs its own dataminer
671
- # attr.store 'census_division', :field_name => 'DIVISION', :dictionary => { :input => 'Code', :output => 'Description', :url => 'http://github.com/brighterplanet/data_helpers/raw/master/division/division.csv' }
672
-
673
- # this is basically process :derive_census_region
674
- # step.derive :census_region_id, :set => '(SELECT census_regions.id FROM census_regions INNER JOIN census_divisions ON census_regions.id = census_divisions.census_region_id WHERE census_divisions.id = residence_survey_responses.census_division_id)'
675
-
676
- # process :derive_residence_air_conditioner_use_id
677
-
678
- # process :derive_residence_clothes_drier_use_id
679
- end
680
-
681
- class << self
682
- # # continuous variables for which legitimate skip is effectively zero
683
- # attr.affect :annual_energy_from_electricity_for_air_conditioners
684
- # attr.affect :annual_energy_from_electricity_for_clothes_driers
685
- # attr.affect :annual_energy_from_electricity_for_dishwashers
686
- # attr.affect :annual_energy_from_electricity_for_freezers
687
- # attr.affect :annual_energy_from_electricity_for_heating_space
688
- # attr.affect :annual_energy_from_electricity_for_heating_water
689
- # attr.affect :annual_energy_from_electricity_for_other_appliances
690
- # attr.affect :annual_energy_from_electricity_for_refrigerators
691
- # attr.affect :annual_energy_from_fuel_oil_for_appliances
692
- # attr.affect :annual_energy_from_fuel_oil_for_heating_space
693
- # attr.affect :annual_energy_from_fuel_oil_for_heating_water
694
- # attr.affect :annual_energy_from_kerosene
695
- # attr.affect :annual_energy_from_propane_for_appliances
696
- # attr.affect :annual_energy_from_propane_for_heating_space
697
- # attr.affect :annual_energy_from_propane_for_heating_water
698
- # attr.affect :annual_energy_from_natural_gas_for_appliances
699
- # attr.affect :annual_energy_from_natural_gas_for_heating_space
700
- # attr.affect :annual_energy_from_natural_gas_for_heating_water
701
- # attr.affect :annual_energy_from_wood
702
- # attr.affect :lights_on_1_to_4_hours
703
- # attr.affect :lights_on_over_12_hours
704
- # attr.affect :efficient_lights_on_over_12_hours
705
- # attr.affect :efficient_lights_on_1_to_4_hours
706
- # attr.affect :lights_on_4_to_12_hours
707
- # attr.affect :efficient_lights_on_4_to_12_hours
708
- # attr.affect :outdoor_all_night_gas_lights
709
- # attr.affect :outdoor_all_night_lights
710
- # # booleans for which legitimate skip is effectively zero
711
- # attr.affect :thermostat_programmability
712
- # attr.affect :detached_1car_garage
713
- # attr.affect :detached_2car_garage
714
- # attr.affect :detached_3car_garage
715
- # attr.affect :attached_1car_garage
716
- # attr.affect :attached_2car_garage
717
- # attr.affect :attached_3car_garage
718
- # attr.affect :heated_garage
719
- def zero_out_legitimate_skips
720
- max = maximum(attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)")
721
- if /^9+$/.match(max.to_i.to_s) # the max is all 999's... it must be a LEGITIMATE SKIP
722
- logger.info "Zeroing #{attr_name} if it's #{max}"
723
- update_all("#{attr_name} = 0", "#{attr_name} = #{max}")
690
+ # Rather than nullify the continuous variables that EIA identifies as LEGITIMATE SKIPS, we convert them to zero
691
+ # This makes it easier to derive useful information like "how many rooms does the house have?"
692
+ process 'Zero out what the EIA calls "LEGITIMATE SKIPS"' do
693
+ %w{
694
+ annual_energy_from_electricity_for_air_conditioners
695
+ annual_energy_from_electricity_for_clothes_driers
696
+ annual_energy_from_electricity_for_dishwashers
697
+ annual_energy_from_electricity_for_freezers
698
+ annual_energy_from_electricity_for_heating_space
699
+ annual_energy_from_electricity_for_heating_water
700
+ annual_energy_from_electricity_for_other_appliances
701
+ annual_energy_from_electricity_for_refrigerators
702
+ annual_energy_from_fuel_oil_for_appliances
703
+ annual_energy_from_fuel_oil_for_heating_space
704
+ annual_energy_from_fuel_oil_for_heating_water
705
+ annual_energy_from_kerosene
706
+ annual_energy_from_propane_for_appliances
707
+ annual_energy_from_propane_for_heating_space
708
+ annual_energy_from_propane_for_heating_water
709
+ annual_energy_from_natural_gas_for_appliances
710
+ annual_energy_from_natural_gas_for_heating_space
711
+ annual_energy_from_natural_gas_for_heating_water
712
+ annual_energy_from_wood
713
+ lights_on_1_to_4_hours
714
+ lights_on_over_12_hours
715
+ efficient_lights_on_over_12_hours
716
+ efficient_lights_on_1_to_4_hours
717
+ lights_on_4_to_12_hours
718
+ efficient_lights_on_4_to_12_hours
719
+ outdoor_all_night_gas_lights
720
+ outdoor_all_night_lights
721
+ thermostat_programmability
722
+ detached_1car_garage
723
+ detached_2car_garage
724
+ detached_3car_garage
725
+ attached_1car_garage
726
+ attached_2car_garage
727
+ attached_3car_garage
728
+ heated_garage
729
+ }.each do |attr_name|
730
+ max = maximum attr_name, :select => "CONVERT(#{attr_name}, UNSIGNED INTEGER)"
731
+ # if the maximum value of a row is all 999's, then it's a LEGITIMATE SKIP and we should set it to zero
732
+ if /^9+$/.match(max.to_i.to_s)
733
+ update_all "#{attr_name} = 0", "#{attr_name} = #{max}"
734
+ end
724
735
  end
725
736
  end
726
-
727
- # attr.affect :annual_energy_from_fuel_oil_for_heating_space, :from => :kbtus, :to => :joules
728
- # attr.affect :annual_energy_from_fuel_oil_for_heating_water, :from => :kbtus, :to => :joules
729
- # attr.affect :annual_energy_from_fuel_oil_for_appliances, :from => :kbtus, :to => :joules
730
- # attr.affect :annual_energy_from_natural_gas_for_heating_space, :from => :kbtus, :to => :joules
731
- # attr.affect :annual_energy_from_natural_gas_for_heating_water, :from => :kbtus, :to => :joules
732
- # attr.affect :annual_energy_from_natural_gas_for_appliances, :from => :kbtus, :to => :joules
733
- # attr.affect :annual_energy_from_propane_for_heating_space, :from => :kbtus, :to => :joules
734
- # attr.affect :annual_energy_from_propane_for_heating_water, :from => :kbtus, :to => :joules
735
- # attr.affect :annual_energy_from_propane_for_appliances, :from => :kbtus, :to => :joules
736
- # attr.affect :annual_energy_from_wood, :from => :kbtus, :to => :joules
737
- # attr.affect :annual_energy_from_kerosene, :from => :kbtus, :to => :joules
738
- # attr.affect :annual_energy_from_electricity_for_clothes_driers, :from => :kbtus, :to => :joules
739
- # attr.affect :annual_energy_from_electricity_for_dishwashers, :from => :kbtus, :to => :joules
740
- # attr.affect :annual_energy_from_electricity_for_freezers, :from => :kbtus, :to => :joules
741
- # attr.affect :annual_energy_from_electricity_for_refrigerators, :from => :kbtus, :to => :joules
742
- # attr.affect :annual_energy_from_electricity_for_air_conditioners, :from => :kbtus, :to => :joules
743
- # attr.affect :annual_energy_from_electricity_for_heating_space, :from => :kbtus, :to => :joules
744
- # attr.affect :annual_energy_from_electricity_for_heating_water, :from => :kbtus, :to => :joules
745
- # attr.affect :annual_energy_from_electricity_for_other_appliances, :from => :kbtus, :to => :joules
746
- # attr.affect :floorspace, :from => :square_feet, :to => :square_metres
747
- def convert_units_after_zeroing_legitimate_skips
748
- update_all("#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate(attr_options[:from], attr_options[:to])}")
737
+
738
+ process 'Convert units to metric after zeroing out LEGITIMATE SKIPS' do
739
+ [
740
+ [ 'floorspace', :square_feet, :square_metres ],
741
+ [ 'annual_energy_from_fuel_oil_for_heating_space', :kbtus, :joules ],
742
+ [ 'annual_energy_from_fuel_oil_for_heating_water', :kbtus, :joules ],
743
+ [ 'annual_energy_from_fuel_oil_for_appliances', :kbtus, :joules ],
744
+ [ 'annual_energy_from_natural_gas_for_heating_space', :kbtus, :joules ],
745
+ [ 'annual_energy_from_natural_gas_for_heating_water', :kbtus, :joules ],
746
+ [ 'annual_energy_from_natural_gas_for_appliances', :kbtus, :joules ],
747
+ [ 'annual_energy_from_propane_for_heating_space', :kbtus, :joules ],
748
+ [ 'annual_energy_from_propane_for_heating_water', :kbtus, :joules ],
749
+ [ 'annual_energy_from_propane_for_appliances', :kbtus, :joules ],
750
+ [ 'annual_energy_from_wood', :kbtus, :joules ],
751
+ [ 'annual_energy_from_kerosene', :kbtus, :joules ],
752
+ [ 'annual_energy_from_electricity_for_clothes_driers', :kbtus, :joules ],
753
+ [ 'annual_energy_from_electricity_for_dishwashers', :kbtus, :joules ],
754
+ [ 'annual_energy_from_electricity_for_freezers', :kbtus, :joules ],
755
+ [ 'annual_energy_from_electricity_for_refrigerators', :kbtus, :joules ],
756
+ [ 'annual_energy_from_electricity_for_air_conditioners', :kbtus, :joules ],
757
+ [ 'annual_energy_from_electricity_for_heating_space', :kbtus, :joules ],
758
+ [ 'annual_energy_from_electricity_for_heating_water', :kbtus, :joules ],
759
+ [ 'annual_energy_from_electricity_for_other_appliances', :kbtus, :joules ],
760
+ ].each do |attr_name, from_units, to_units|
761
+ update_all "#{attr_name} = #{attr_name} * #{Conversions::Unit.exchange_rate from_units, to_units}"
762
+ end
749
763
  end
750
-
751
- def derive_rooms
752
- 'total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
753
764
 
765
+ process 'Add a new field "rooms" that estimates how many rooms are in the house' do
766
+ update_all 'rooms = total_rooms + bathrooms/2 + halfbaths/4 + heated_garage*(attached_1car_garage + detached_1car_garage + 2*(attached_2car_garage + detached_2car_garage) + 3*(attached_3car_garage + detached_3car_garage))'
754
767
  end
755
768
 
756
- def derive_lighting_use
757
- '2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
769
+ process 'Add a new field "lighting_use" that estimates how many hours light bulbs are turned on in the house' do
770
+ update_all 'lighting_use = 2*(lights_on_1_to_4_hours + efficient_lights_on_1_to_4_hours) + 8*(lights_on_4_to_12_hours + efficient_lights_on_4_to_12_hours) + 16*(lights_on_over_12_hours + efficient_lights_on_over_12_hours) + 12*(outdoor_all_night_lights + outdoor_all_night_gas_lights)'
758
771
  end
759
772
 
760
- # will be null if lighting_use is zero
761
- def derive_lighting_efficiency
762
- '(2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
763
- end
764
-
765
- def derive_residence_air_conditioner_use_id
766
- find_in_batches do |batch|
767
- batch.each do |record|
768
- ce = record.usecenac.to_i
769
- ww = record.usewwac.to_i
770
- if ce == 3 or ww == 3
771
- selector = 3
772
- elsif ce == 2 or ww == 2
773
- selector = 2
774
- elsif ce == 1 or ww == 1
775
- selector = 1
776
- elsif ce == 0 or ww == 0
777
- selector = 0
778
- elsif ce == 9 or ww == 9
779
- selector = 9
780
- else
781
- raise "something's wrong. usecenac => #{ce}, usewwac => #{ww}"
782
- end
783
- record.air_conditioner_use = ResidenceAirConditionerUse.find_by_code(selector)
784
- record.save if record.changed?
785
- end
786
- end
787
- end
788
-
789
- def derive_residence_clothes_drier_use_id
790
- find_in_batches do |batch|
791
- batch.each do |record|
792
- dr = record.dryruse.to_i
793
- wa = record.washload.to_i
794
- selector = case dr
795
- when 9
796
- 9
797
- when 1
798
- wa
799
- when 2
800
- if wa == 9
801
- 9
802
- elsif [ 2, 3, 4, 5 ].include?(wa)
803
- wa - 1
804
- else
805
- 1
806
- end
807
- when 3
808
- if wa == 9
809
- 9
810
- elsif [ 3, 4, 5 ].include?(wa)
811
- wa - 2
812
- else
813
- 1
814
- end
815
- else
816
- raise "A something's wrong. dryruse => #{dr}, washload => #{wa}"
817
- end
818
- record.clothes_drier_use = ResidenceClothesDrierUse.find_by_code(selector)
819
- raise "B something's wrong. dryruse => #{dr}, washload => #{wa}" if record.clothes_drier_use.nil?
820
- record.save if record.changed?
821
- end
822
- end
773
+ process 'Add a new field "lighting_efficiency" that estimates what percentage of light bulbs in a house are energy-efficient' do
774
+ update_all 'lighting_efficiency = (2*efficient_lights_on_1_to_4_hours + 8*efficient_lights_on_4_to_12_hours + 16*efficient_lights_on_over_12_hours) / lighting_use'
823
775
  end
824
776
  end
825
777
  end
@@ -827,6 +779,16 @@ end
827
779
  # todo: have somebody properly organize these
828
780
  class DataMinerTest < Test::Unit::TestCase
829
781
  if ENV['FAST'] == 'true'
782
+ should "import airports" do
783
+ Airport.run_data_miner!
784
+ assert Airport.count > 0
785
+ end
786
+
787
+ should "pull in census divisions using a data.brighterplanet.com dictionary" do
788
+ CensusDivision.run_data_miner!
789
+ assert CensusDivision.count > 0
790
+ end
791
+
830
792
  should "have a way to queue up runs that works with delated_job's send_later" do
831
793
  assert AutomobileVariant.respond_to?(:run_data_miner!)
832
794
  end
data/test/test_helper.rb CHANGED
@@ -18,21 +18,19 @@ class Test::Unit::TestCase
18
18
  end
19
19
 
20
20
  ActiveRecord::Schema.define(:version => 20090819143429) do
21
- create_table "airports", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
22
- t.string "country_id"
23
-
24
- t.string "iata_code"
25
- t.string "name"
26
- t.string "city"
27
- t.string "country_name"
28
- t.float "latitude"
29
- t.float "longitude"
30
- t.datetime "created_at"
31
- t.datetime "updated_at"
21
+ create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
22
+ t.string 'iata_code'
23
+ t.string 'name'
24
+ t.string 'city'
25
+ t.string 'country_name'
26
+ t.float 'latitude'
27
+ t.float 'longitude'
28
+ t.datetime 'created_at'
29
+ t.datetime 'updated_at'
32
30
  t.integer 'data_miner_touch_count'
33
31
  t.integer 'data_miner_last_run_id'
34
32
  end
35
- execute "ALTER TABLE airports ADD PRIMARY KEY (iata_code);"
33
+ execute 'ALTER TABLE airports ADD PRIMARY KEY (iata_code);'
36
34
 
37
35
  create_table "countries", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
38
36
  t.string "iso_3166"
@@ -45,15 +43,28 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
45
43
  execute "ALTER TABLE countries ADD PRIMARY KEY (iso_3166);"
46
44
 
47
45
  create_table "census_regions", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
46
+ t.integer "number"
48
47
  t.string "name"
49
48
  t.datetime "updated_at"
50
49
  t.datetime "created_at"
51
- t.integer "number"
52
50
  t.integer 'data_miner_touch_count'
53
51
  t.integer 'data_miner_last_run_id'
54
52
  end
55
53
  execute "ALTER TABLE census_regions ADD PRIMARY KEY (number);"
56
54
 
55
+ create_table 'census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
56
+ t.integer 'number'
57
+ t.string 'name'
58
+ t.datetime 'updated_at'
59
+ t.datetime 'created_at'
60
+ t.string 'census_region_name'
61
+ t.integer 'census_region_number'
62
+
63
+ t.integer 'data_miner_touch_count'
64
+ t.integer 'data_miner_last_run_id'
65
+ end
66
+ execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
67
+
57
68
  create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
58
69
  t.string "automobile_make_id"
59
70
  t.string "automobile_model_id"
@@ -178,17 +189,11 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
178
189
  t.string "clothes_washer_use"
179
190
  t.string "clothes_dryer_use"
180
191
 
181
- # not done
182
192
  t.integer "census_division_number"
183
- t.string "census_division"
184
- t.string "census_region"
185
- # ------
193
+ t.string "census_division_name"
194
+ t.integer "census_region_number"
195
+ t.string "census_region_name"
186
196
 
187
- # not done
188
- t.integer "residence_air_conditioner_use_id"
189
- t.integer "residence_clothes_drier_use_id"
190
- # ---
191
-
192
197
  t.float "rooms"
193
198
  t.float "floorspace"
194
199
  t.integer "residents"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere