data_miner 0.4.17 → 0.4.18
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +5 -5
- data/lib/data_miner/attribute.rb +4 -4
- data/lib/data_miner/configuration.rb +26 -18
- data/lib/data_miner.rb +1 -4
- data/test/data_miner_test.rb +56 -3
- data/test/test_helper.rb +13 -0
- metadata +5 -5
data/Rakefile
CHANGED
@@ -10,7 +10,7 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
gem.add_dependency 'remote_table', '>=0.2.
|
13
|
+
gem.add_dependency 'remote_table', '>=0.2.10'
|
14
14
|
gem.add_dependency 'activerecord', '>=2.3.4'
|
15
15
|
gem.add_dependency 'activesupport', '>=2.3.4'
|
16
16
|
gem.add_dependency 'andand', '>=1.3.1'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.18
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.18"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-04-
|
12
|
+
s.date = %q{2010-04-16}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -52,7 +52,7 @@ Gem::Specification.new do |s|
|
|
52
52
|
s.specification_version = 3
|
53
53
|
|
54
54
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
55
|
-
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.
|
55
|
+
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.10"])
|
56
56
|
s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
|
57
57
|
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
|
58
58
|
s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
|
@@ -61,7 +61,7 @@ Gem::Specification.new do |s|
|
|
61
61
|
s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
62
62
|
s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
|
63
63
|
else
|
64
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
64
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.10"])
|
65
65
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
66
66
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
67
67
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
@@ -71,7 +71,7 @@ Gem::Specification.new do |s|
|
|
71
71
|
s.add_dependency(%q<log4r>, [">= 1.1.7"])
|
72
72
|
end
|
73
73
|
else
|
74
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
74
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.10"])
|
75
75
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
76
76
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
77
77
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
module DataMiner
|
2
2
|
class Attribute
|
3
|
-
attr_accessor :
|
3
|
+
attr_accessor :step
|
4
4
|
attr_accessor :name
|
5
5
|
attr_accessor :options
|
6
6
|
|
7
|
-
delegate :resource, :to => :
|
7
|
+
delegate :resource, :to => :step
|
8
8
|
|
9
|
-
def initialize(
|
9
|
+
def initialize(step, name, options = {})
|
10
10
|
options.symbolize_keys!
|
11
11
|
@options = options
|
12
12
|
|
13
|
-
@
|
13
|
+
@step = step
|
14
14
|
@name = name
|
15
15
|
end
|
16
16
|
|
@@ -2,23 +2,23 @@ module DataMiner
|
|
2
2
|
class Configuration
|
3
3
|
include Blockenspiel::DSL
|
4
4
|
|
5
|
-
attr_accessor :resource, :
|
5
|
+
attr_accessor :resource, :steps, :step_counter, :attributes
|
6
6
|
|
7
7
|
def initialize(resource)
|
8
|
-
@
|
8
|
+
@steps = Array.new
|
9
9
|
@resource = resource
|
10
|
-
@
|
10
|
+
@step_counter = 0
|
11
11
|
@attributes = HashWithIndifferentAccess.new
|
12
12
|
end
|
13
13
|
|
14
14
|
def process(method_name_or_block_description, &block)
|
15
|
-
|
16
|
-
self.
|
15
|
+
steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
|
16
|
+
self.step_counter += 1
|
17
17
|
end
|
18
18
|
|
19
19
|
def clone(description, options = {})
|
20
|
-
|
21
|
-
self.
|
20
|
+
steps << DataMiner::Clone.new(self, step_counter, description, options)
|
21
|
+
self.step_counter += 1
|
22
22
|
end
|
23
23
|
|
24
24
|
def import(*args, &block)
|
@@ -29,16 +29,19 @@ module DataMiner
|
|
29
29
|
end
|
30
30
|
options = args.last
|
31
31
|
|
32
|
-
|
33
|
-
Blockenspiel.invoke block,
|
34
|
-
|
35
|
-
self.
|
32
|
+
step = DataMiner::Import.new self, step_counter, description, options
|
33
|
+
Blockenspiel.invoke block, step
|
34
|
+
steps << step
|
35
|
+
self.step_counter += 1
|
36
36
|
end
|
37
37
|
|
38
38
|
# Mine data for this class.
|
39
39
|
def run(options = {})
|
40
40
|
options.symbolize_keys!
|
41
41
|
|
42
|
+
return if DataMiner::Configuration.call_stack.include? resource.name
|
43
|
+
DataMiner::Configuration.call_stack.push resource.name
|
44
|
+
|
42
45
|
finished = false
|
43
46
|
if DataMiner::Run.table_exists?
|
44
47
|
run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name if DataMiner::Run.table_exists?
|
@@ -48,16 +51,17 @@ module DataMiner
|
|
48
51
|
end
|
49
52
|
resource.delete_all if options[:from_scratch]
|
50
53
|
begin
|
51
|
-
|
54
|
+
steps.each { |step| step.run run }
|
52
55
|
finished = true
|
53
56
|
ensure
|
54
57
|
run.update_attributes! :ended_at => Time.now, :finished => finished if DataMiner::Run.table_exists?
|
58
|
+
DataMiner::Configuration.call_stack.clear if DataMiner::Configuration.call_stack.first == resource.name
|
55
59
|
end
|
56
60
|
nil
|
57
61
|
end
|
58
62
|
|
59
|
-
def
|
60
|
-
|
63
|
+
def import_steps
|
64
|
+
steps.select { |step| step.is_a? Import }
|
61
65
|
end
|
62
66
|
|
63
67
|
def before_invoke
|
@@ -65,6 +69,7 @@ module DataMiner
|
|
65
69
|
end
|
66
70
|
|
67
71
|
def after_invoke
|
72
|
+
return unless resource.table_exists?
|
68
73
|
make_sure_unit_definitions_make_sense
|
69
74
|
suggest_missing_column_migrations
|
70
75
|
end
|
@@ -79,8 +84,8 @@ module DataMiner
|
|
79
84
|
]
|
80
85
|
|
81
86
|
def make_sure_unit_definitions_make_sense
|
82
|
-
|
83
|
-
|
87
|
+
import_steps.each do |step|
|
88
|
+
step.attributes.each do |_, attribute|
|
84
89
|
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
85
90
|
DataMiner.log_or_raise %{
|
86
91
|
|
@@ -111,8 +116,8 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
111
116
|
DataMiner.log_info "Not recording which run touched a row."
|
112
117
|
end
|
113
118
|
|
114
|
-
|
115
|
-
|
119
|
+
import_steps.each do |step|
|
120
|
+
step.attributes.each do |_, attribute|
|
116
121
|
DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
117
122
|
unless resource.column_names.include? attribute.name
|
118
123
|
missing_columns << attribute.name
|
@@ -160,6 +165,9 @@ On the other hand, if you're working directly with create_table, this might be h
|
|
160
165
|
|
161
166
|
cattr_accessor :resource_names
|
162
167
|
self.resource_names = Array.new
|
168
|
+
|
169
|
+
cattr_accessor :call_stack
|
170
|
+
self.call_stack = Array.new
|
163
171
|
class << self
|
164
172
|
# Mine data. Defaults to all resource_names touched by DataMiner.
|
165
173
|
#
|
data/lib/data_miner.rb
CHANGED
@@ -72,10 +72,7 @@ ActiveRecord::Base.class_eval do
|
|
72
72
|
def self.data_miner(&block)
|
73
73
|
DataMiner.start_logging
|
74
74
|
|
75
|
-
unless table_exists?
|
76
|
-
DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
|
77
|
-
return
|
78
|
-
end
|
75
|
+
DataMiner.log_info "Database table `#{table_name}` doesn't exist. It might be created in the data_miner block, but if it's not, DataMiner probably won't work properly until you run a migration or otherwise fix the schema." unless table_exists?
|
79
76
|
|
80
77
|
DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
|
81
78
|
|
data/test/data_miner_test.rb
CHANGED
@@ -488,6 +488,47 @@ class CensusDivision < ActiveRecord::Base
|
|
488
488
|
end
|
489
489
|
end
|
490
490
|
|
491
|
+
class CrosscallingCensusRegion < ActiveRecord::Base
|
492
|
+
set_primary_key :number
|
493
|
+
|
494
|
+
has_many :crosscalling_census_divisions
|
495
|
+
|
496
|
+
data_miner do
|
497
|
+
process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
|
498
|
+
CrosscallingCensusDivision.run_data_miner!
|
499
|
+
connection.drop_table :crosscalling_census_regions rescue nil
|
500
|
+
connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
501
|
+
t.column :number, :integer
|
502
|
+
t.column :name, :string
|
503
|
+
end
|
504
|
+
connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
|
505
|
+
connection.execute %{
|
506
|
+
INSERT IGNORE INTO crosscalling_census_regions(number, name)
|
507
|
+
SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
|
508
|
+
}
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
513
|
+
class CrosscallingCensusDivision < ActiveRecord::Base
|
514
|
+
set_primary_key :number
|
515
|
+
|
516
|
+
belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
|
517
|
+
|
518
|
+
data_miner do
|
519
|
+
import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
520
|
+
key 'number', :field_name => 'Division'
|
521
|
+
store 'name', :field_name => 'Name'
|
522
|
+
store 'census_region_number', :field_name => 'Region'
|
523
|
+
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
524
|
+
end
|
525
|
+
|
526
|
+
process "make sure my parent object is set up (i.e., cross-call it)" do
|
527
|
+
CrosscallingCensusRegion.run_data_miner!
|
528
|
+
end
|
529
|
+
end
|
530
|
+
end
|
531
|
+
|
491
532
|
class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
492
533
|
set_primary_key :department_of_energy_identifier
|
493
534
|
|
@@ -906,6 +947,18 @@ class DataMinerTest < Test::Unit::TestCase
|
|
906
947
|
end
|
907
948
|
|
908
949
|
if ENV['FAST'] == 'true'
|
950
|
+
should "keep a call stack so that you can call run_data_miner! on a child" do
|
951
|
+
CrosscallingCensusDivision.run_data_miner!
|
952
|
+
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
953
|
+
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
954
|
+
end
|
955
|
+
|
956
|
+
should "keep a call stack so that you can call run_data_miner! on a parent" do
|
957
|
+
CrosscallingCensusRegion.run_data_miner!
|
958
|
+
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
959
|
+
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
960
|
+
end
|
961
|
+
|
909
962
|
should "clone airports" do
|
910
963
|
ClonedAirport.run_data_miner!
|
911
964
|
assert ClonedAirport.count > 0
|
@@ -946,15 +999,15 @@ class DataMinerTest < Test::Unit::TestCase
|
|
946
999
|
end
|
947
1000
|
|
948
1001
|
should "hash things" do
|
949
|
-
AutomobileVariant.data_miner_config.
|
1002
|
+
AutomobileVariant.data_miner_config.steps[0].run(nil)
|
950
1003
|
assert AutomobileVariant.first.row_hash.present?
|
951
1004
|
end
|
952
1005
|
|
953
1006
|
should "process a callback block instead of a method" do
|
954
1007
|
AutomobileVariant.delete_all
|
955
|
-
AutomobileVariant.data_miner_config.
|
1008
|
+
AutomobileVariant.data_miner_config.steps[0].run(nil)
|
956
1009
|
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
957
|
-
AutomobileVariant.data_miner_config.
|
1010
|
+
AutomobileVariant.data_miner_config.steps.last.run(nil)
|
958
1011
|
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
959
1012
|
end
|
960
1013
|
|
data/test/test_helper.rb
CHANGED
@@ -134,6 +134,19 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
134
134
|
end
|
135
135
|
execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
|
136
136
|
|
137
|
+
create_table 'crosscalling_census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
138
|
+
t.integer 'number'
|
139
|
+
t.string 'name'
|
140
|
+
t.datetime 'updated_at'
|
141
|
+
t.datetime 'created_at'
|
142
|
+
t.string 'census_region_name'
|
143
|
+
t.integer 'census_region_number'
|
144
|
+
|
145
|
+
t.integer 'data_miner_touch_count'
|
146
|
+
t.integer 'data_miner_last_run_id'
|
147
|
+
end
|
148
|
+
execute 'ALTER TABLE crosscalling_census_divisions ADD PRIMARY KEY (number);'
|
149
|
+
|
137
150
|
create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
138
151
|
t.float "fuel_efficiency_city"
|
139
152
|
t.float "fuel_efficiency_highway"
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 18
|
9
|
+
version: 0.4.18
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-04-
|
18
|
+
date: 2010-04-16 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -28,8 +28,8 @@ dependencies:
|
|
28
28
|
segments:
|
29
29
|
- 0
|
30
30
|
- 2
|
31
|
-
-
|
32
|
-
version: 0.2.
|
31
|
+
- 10
|
32
|
+
version: 0.2.10
|
33
33
|
type: :runtime
|
34
34
|
version_requirements: *id001
|
35
35
|
- !ruby/object:Gem::Dependency
|