data_miner 0.3.10 → 0.3.11
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/data_miner.gemspec +1 -1
- data/lib/data_miner.rb +3 -0
- data/test/data_miner_test.rb +89 -82
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.11
|
data/data_miner.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.11"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
data/lib/data_miner.rb
CHANGED
@@ -64,6 +64,9 @@ ActiveRecord::Base.class_eval do
|
|
64
64
|
def self.data_miner_runs
|
65
65
|
DataMiner::Run.scoped :conditions => { :resource_name => name }
|
66
66
|
end
|
67
|
+
def self.run_data_miner!(options = {})
|
68
|
+
data_miner_config.run options
|
69
|
+
end
|
67
70
|
end
|
68
71
|
self.data_miner_config = DataMiner::Configuration.new self
|
69
72
|
|
data/test/data_miner_test.rb
CHANGED
@@ -824,105 +824,112 @@ class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
824
824
|
end
|
825
825
|
end
|
826
826
|
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
a
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
827
|
+
# todo: have somebody properly organize these
|
828
|
+
class DataMinerTest < Test::Unit::TestCase
|
829
|
+
if ENV['FAST'] == 'true'
|
830
|
+
should "have a way to queue up runs that works with delated_job's send_later" do
|
831
|
+
assert AutomobileVariant.respond_to?(:run_data_miner!)
|
832
|
+
end
|
833
|
+
|
834
|
+
should "be idempotent" do
|
835
|
+
Country.data_miner_config.run
|
836
|
+
a = Country.count
|
837
|
+
Country.data_miner_config.run
|
838
|
+
b = Country.count
|
839
|
+
assert_equal a, b
|
840
|
+
|
841
|
+
CensusRegion.data_miner_config.run
|
842
|
+
a = CensusRegion.count
|
843
|
+
CensusRegion.data_miner_config.run
|
844
|
+
b = CensusRegion.count
|
845
|
+
assert_equal a, b
|
846
|
+
end
|
847
|
+
|
848
|
+
should "assume that no unique indices means it wants a big hash" do
|
849
|
+
assert_raises DataMiner::MissingHashColumn do
|
850
|
+
class IncompleteCountry < ActiveRecord::Base
|
851
|
+
set_table_name 'countries'
|
846
852
|
|
847
|
-
|
848
|
-
|
853
|
+
data_miner do
|
854
|
+
# no unique index
|
849
855
|
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
856
|
+
# get a complete list
|
857
|
+
import :url => 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';' do |attr|
|
858
|
+
attr.store 'iso_3166', :field_number => 1
|
859
|
+
attr.store 'name', :field_number => 0
|
860
|
+
end
|
855
861
|
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
862
|
+
# get nicer names
|
863
|
+
import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr|
|
864
|
+
attr.store 'iso_3166', :field_name => 'country code'
|
865
|
+
attr.store 'name', :field_name => 'country'
|
866
|
+
end
|
860
867
|
end
|
861
868
|
end
|
862
869
|
end
|
863
870
|
end
|
864
|
-
end
|
865
871
|
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
872
|
+
should "hash things if no unique index is listed" do
|
873
|
+
AutomobileVariant.data_miner_config.runnables[0].run(nil)
|
874
|
+
assert AutomobileVariant.first.row_hash.present?
|
875
|
+
end
|
870
876
|
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
877
|
+
should "process a callback block instead of a method" do
|
878
|
+
AutomobileVariant.delete_all
|
879
|
+
AutomobileVariant.data_miner_config.runnables[0].run(nil)
|
880
|
+
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
881
|
+
AutomobileVariant.data_miner_config.runnables.last.run(nil)
|
882
|
+
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
883
|
+
end
|
878
884
|
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
885
|
+
should "keep a log when it does a run" do
|
886
|
+
approx_started_at = Time.now
|
887
|
+
DataMiner.run :resource_names => %w{ Country }
|
888
|
+
approx_ended_at = Time.now
|
889
|
+
last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
|
890
|
+
assert (last_run.started_at - approx_started_at).abs < 5 # seconds
|
891
|
+
assert (last_run.ended_at - approx_ended_at).abs < 5 # seconds
|
892
|
+
end
|
887
893
|
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
894
|
+
should "request a re-import from scratch" do
|
895
|
+
c = Country.new
|
896
|
+
c.iso_3166 = 'JUNK'
|
897
|
+
c.save!
|
898
|
+
assert Country.exists?(:iso_3166 => 'JUNK')
|
899
|
+
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
900
|
+
assert !Country.exists?(:iso_3166 => 'JUNK')
|
901
|
+
end
|
896
902
|
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
+
should "track how many times a row was touched" do
|
904
|
+
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
905
|
+
assert_equal 1, Country.first.data_miner_touch_count
|
906
|
+
DataMiner.run :resource_names => %w{ Country }
|
907
|
+
assert_equal 1, Country.first.data_miner_touch_count
|
908
|
+
end
|
903
909
|
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
910
|
+
should "keep track of what the last import run that touched a row was" do
|
911
|
+
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
912
|
+
a = DataMiner::Run.last
|
913
|
+
assert_equal a, Country.first.data_miner_last_run
|
914
|
+
DataMiner.run :resource_names => %w{ Country }
|
915
|
+
b = DataMiner::Run.last
|
916
|
+
assert a != b
|
917
|
+
assert_equal a, Country.first.data_miner_last_run
|
918
|
+
end
|
913
919
|
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
920
|
+
should "be able to get how many rows affected by a run" do
|
921
|
+
DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
|
922
|
+
assert_equal Country.first.data_miner_last_run.resource_records_last_touched_by_me.count, Country.count
|
923
|
+
end
|
918
924
|
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
925
|
+
should "know what runs were on a resource" do
|
926
|
+
DataMiner.run :resource_names => %w{ Country }
|
927
|
+
DataMiner.run :resource_names => %w{ Country }
|
928
|
+
assert Country.data_miner_runs.count > 0
|
929
|
+
end
|
923
930
|
end
|
924
931
|
|
925
|
-
|
932
|
+
if ENV['SLOW'] == 'true'
|
926
933
|
should "import using a dictionary" do
|
927
934
|
DataMiner.run :resource_names => %w{ ResidentialEnergyConsumptionSurveyResponse }
|
928
935
|
assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
|