data_miner 0.4.17 → 0.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/data_miner.gemspec +5 -5
- data/lib/data_miner/attribute.rb +4 -4
- data/lib/data_miner/configuration.rb +26 -18
- data/lib/data_miner.rb +1 -4
- data/test/data_miner_test.rb +56 -3
- data/test/test_helper.rb +13 -0
- metadata +5 -5
data/Rakefile
CHANGED
|
@@ -10,7 +10,7 @@ begin
|
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
|
13
|
-
gem.add_dependency 'remote_table', '>=0.2.
|
|
13
|
+
gem.add_dependency 'remote_table', '>=0.2.10'
|
|
14
14
|
gem.add_dependency 'activerecord', '>=2.3.4'
|
|
15
15
|
gem.add_dependency 'activesupport', '>=2.3.4'
|
|
16
16
|
gem.add_dependency 'andand', '>=1.3.1'
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.4.
|
|
1
|
+
0.4.18
|
data/data_miner.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{data_miner}
|
|
8
|
-
s.version = "0.4.
|
|
8
|
+
s.version = "0.4.18"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
|
12
|
-
s.date = %q{2010-04-
|
|
12
|
+
s.date = %q{2010-04-16}
|
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
|
14
14
|
s.email = %q{seamus@abshere.net}
|
|
15
15
|
s.extra_rdoc_files = [
|
|
@@ -52,7 +52,7 @@ Gem::Specification.new do |s|
|
|
|
52
52
|
s.specification_version = 3
|
|
53
53
|
|
|
54
54
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
55
|
-
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.
|
|
55
|
+
s.add_runtime_dependency(%q<remote_table>, [">= 0.2.10"])
|
|
56
56
|
s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
|
|
57
57
|
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
|
|
58
58
|
s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
|
|
@@ -61,7 +61,7 @@ Gem::Specification.new do |s|
|
|
|
61
61
|
s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
|
|
62
62
|
s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
|
|
63
63
|
else
|
|
64
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
|
64
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.10"])
|
|
65
65
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
|
66
66
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
|
67
67
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
|
@@ -71,7 +71,7 @@ Gem::Specification.new do |s|
|
|
|
71
71
|
s.add_dependency(%q<log4r>, [">= 1.1.7"])
|
|
72
72
|
end
|
|
73
73
|
else
|
|
74
|
-
s.add_dependency(%q<remote_table>, [">= 0.2.
|
|
74
|
+
s.add_dependency(%q<remote_table>, [">= 0.2.10"])
|
|
75
75
|
s.add_dependency(%q<activerecord>, [">= 2.3.4"])
|
|
76
76
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
|
77
77
|
s.add_dependency(%q<andand>, [">= 1.3.1"])
|
data/lib/data_miner/attribute.rb
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
module DataMiner
|
|
2
2
|
class Attribute
|
|
3
|
-
attr_accessor :
|
|
3
|
+
attr_accessor :step
|
|
4
4
|
attr_accessor :name
|
|
5
5
|
attr_accessor :options
|
|
6
6
|
|
|
7
|
-
delegate :resource, :to => :
|
|
7
|
+
delegate :resource, :to => :step
|
|
8
8
|
|
|
9
|
-
def initialize(
|
|
9
|
+
def initialize(step, name, options = {})
|
|
10
10
|
options.symbolize_keys!
|
|
11
11
|
@options = options
|
|
12
12
|
|
|
13
|
-
@
|
|
13
|
+
@step = step
|
|
14
14
|
@name = name
|
|
15
15
|
end
|
|
16
16
|
|
|
@@ -2,23 +2,23 @@ module DataMiner
|
|
|
2
2
|
class Configuration
|
|
3
3
|
include Blockenspiel::DSL
|
|
4
4
|
|
|
5
|
-
attr_accessor :resource, :
|
|
5
|
+
attr_accessor :resource, :steps, :step_counter, :attributes
|
|
6
6
|
|
|
7
7
|
def initialize(resource)
|
|
8
|
-
@
|
|
8
|
+
@steps = Array.new
|
|
9
9
|
@resource = resource
|
|
10
|
-
@
|
|
10
|
+
@step_counter = 0
|
|
11
11
|
@attributes = HashWithIndifferentAccess.new
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def process(method_name_or_block_description, &block)
|
|
15
|
-
|
|
16
|
-
self.
|
|
15
|
+
steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
|
|
16
|
+
self.step_counter += 1
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def clone(description, options = {})
|
|
20
|
-
|
|
21
|
-
self.
|
|
20
|
+
steps << DataMiner::Clone.new(self, step_counter, description, options)
|
|
21
|
+
self.step_counter += 1
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
def import(*args, &block)
|
|
@@ -29,16 +29,19 @@ module DataMiner
|
|
|
29
29
|
end
|
|
30
30
|
options = args.last
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
Blockenspiel.invoke block,
|
|
34
|
-
|
|
35
|
-
self.
|
|
32
|
+
step = DataMiner::Import.new self, step_counter, description, options
|
|
33
|
+
Blockenspiel.invoke block, step
|
|
34
|
+
steps << step
|
|
35
|
+
self.step_counter += 1
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
# Mine data for this class.
|
|
39
39
|
def run(options = {})
|
|
40
40
|
options.symbolize_keys!
|
|
41
41
|
|
|
42
|
+
return if DataMiner::Configuration.call_stack.include? resource.name
|
|
43
|
+
DataMiner::Configuration.call_stack.push resource.name
|
|
44
|
+
|
|
42
45
|
finished = false
|
|
43
46
|
if DataMiner::Run.table_exists?
|
|
44
47
|
run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name if DataMiner::Run.table_exists?
|
|
@@ -48,16 +51,17 @@ module DataMiner
|
|
|
48
51
|
end
|
|
49
52
|
resource.delete_all if options[:from_scratch]
|
|
50
53
|
begin
|
|
51
|
-
|
|
54
|
+
steps.each { |step| step.run run }
|
|
52
55
|
finished = true
|
|
53
56
|
ensure
|
|
54
57
|
run.update_attributes! :ended_at => Time.now, :finished => finished if DataMiner::Run.table_exists?
|
|
58
|
+
DataMiner::Configuration.call_stack.clear if DataMiner::Configuration.call_stack.first == resource.name
|
|
55
59
|
end
|
|
56
60
|
nil
|
|
57
61
|
end
|
|
58
62
|
|
|
59
|
-
def
|
|
60
|
-
|
|
63
|
+
def import_steps
|
|
64
|
+
steps.select { |step| step.is_a? Import }
|
|
61
65
|
end
|
|
62
66
|
|
|
63
67
|
def before_invoke
|
|
@@ -65,6 +69,7 @@ module DataMiner
|
|
|
65
69
|
end
|
|
66
70
|
|
|
67
71
|
def after_invoke
|
|
72
|
+
return unless resource.table_exists?
|
|
68
73
|
make_sure_unit_definitions_make_sense
|
|
69
74
|
suggest_missing_column_migrations
|
|
70
75
|
end
|
|
@@ -79,8 +84,8 @@ module DataMiner
|
|
|
79
84
|
]
|
|
80
85
|
|
|
81
86
|
def make_sure_unit_definitions_make_sense
|
|
82
|
-
|
|
83
|
-
|
|
87
|
+
import_steps.each do |step|
|
|
88
|
+
step.attributes.each do |_, attribute|
|
|
84
89
|
if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
|
|
85
90
|
DataMiner.log_or_raise %{
|
|
86
91
|
|
|
@@ -111,8 +116,8 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
|
|
|
111
116
|
DataMiner.log_info "Not recording which run touched a row."
|
|
112
117
|
end
|
|
113
118
|
|
|
114
|
-
|
|
115
|
-
|
|
119
|
+
import_steps.each do |step|
|
|
120
|
+
step.attributes.each do |_, attribute|
|
|
116
121
|
DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
|
|
117
122
|
unless resource.column_names.include? attribute.name
|
|
118
123
|
missing_columns << attribute.name
|
|
@@ -160,6 +165,9 @@ On the other hand, if you're working directly with create_table, this might be h
|
|
|
160
165
|
|
|
161
166
|
cattr_accessor :resource_names
|
|
162
167
|
self.resource_names = Array.new
|
|
168
|
+
|
|
169
|
+
cattr_accessor :call_stack
|
|
170
|
+
self.call_stack = Array.new
|
|
163
171
|
class << self
|
|
164
172
|
# Mine data. Defaults to all resource_names touched by DataMiner.
|
|
165
173
|
#
|
data/lib/data_miner.rb
CHANGED
|
@@ -72,10 +72,7 @@ ActiveRecord::Base.class_eval do
|
|
|
72
72
|
def self.data_miner(&block)
|
|
73
73
|
DataMiner.start_logging
|
|
74
74
|
|
|
75
|
-
unless table_exists?
|
|
76
|
-
DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
|
|
77
|
-
return
|
|
78
|
-
end
|
|
75
|
+
DataMiner.log_info "Database table `#{table_name}` doesn't exist. It might be created in the data_miner block, but if it's not, DataMiner probably won't work properly until you run a migration or otherwise fix the schema." unless table_exists?
|
|
79
76
|
|
|
80
77
|
DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
|
|
81
78
|
|
data/test/data_miner_test.rb
CHANGED
|
@@ -488,6 +488,47 @@ class CensusDivision < ActiveRecord::Base
|
|
|
488
488
|
end
|
|
489
489
|
end
|
|
490
490
|
|
|
491
|
+
class CrosscallingCensusRegion < ActiveRecord::Base
|
|
492
|
+
set_primary_key :number
|
|
493
|
+
|
|
494
|
+
has_many :crosscalling_census_divisions
|
|
495
|
+
|
|
496
|
+
data_miner do
|
|
497
|
+
process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
|
|
498
|
+
CrosscallingCensusDivision.run_data_miner!
|
|
499
|
+
connection.drop_table :crosscalling_census_regions rescue nil
|
|
500
|
+
connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
|
501
|
+
t.column :number, :integer
|
|
502
|
+
t.column :name, :string
|
|
503
|
+
end
|
|
504
|
+
connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
|
|
505
|
+
connection.execute %{
|
|
506
|
+
INSERT IGNORE INTO crosscalling_census_regions(number, name)
|
|
507
|
+
SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
|
|
508
|
+
}
|
|
509
|
+
end
|
|
510
|
+
end
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
class CrosscallingCensusDivision < ActiveRecord::Base
|
|
514
|
+
set_primary_key :number
|
|
515
|
+
|
|
516
|
+
belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
|
|
517
|
+
|
|
518
|
+
data_miner do
|
|
519
|
+
import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
|
|
520
|
+
key 'number', :field_name => 'Division'
|
|
521
|
+
store 'name', :field_name => 'Name'
|
|
522
|
+
store 'census_region_number', :field_name => 'Region'
|
|
523
|
+
store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
process "make sure my parent object is set up (i.e., cross-call it)" do
|
|
527
|
+
CrosscallingCensusRegion.run_data_miner!
|
|
528
|
+
end
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
|
|
491
532
|
class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
|
|
492
533
|
set_primary_key :department_of_energy_identifier
|
|
493
534
|
|
|
@@ -906,6 +947,18 @@ class DataMinerTest < Test::Unit::TestCase
|
|
|
906
947
|
end
|
|
907
948
|
|
|
908
949
|
if ENV['FAST'] == 'true'
|
|
950
|
+
should "keep a call stack so that you can call run_data_miner! on a child" do
|
|
951
|
+
CrosscallingCensusDivision.run_data_miner!
|
|
952
|
+
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
|
953
|
+
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
|
954
|
+
end
|
|
955
|
+
|
|
956
|
+
should "keep a call stack so that you can call run_data_miner! on a parent" do
|
|
957
|
+
CrosscallingCensusRegion.run_data_miner!
|
|
958
|
+
assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
|
|
959
|
+
assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
|
|
960
|
+
end
|
|
961
|
+
|
|
909
962
|
should "clone airports" do
|
|
910
963
|
ClonedAirport.run_data_miner!
|
|
911
964
|
assert ClonedAirport.count > 0
|
|
@@ -946,15 +999,15 @@ class DataMinerTest < Test::Unit::TestCase
|
|
|
946
999
|
end
|
|
947
1000
|
|
|
948
1001
|
should "hash things" do
|
|
949
|
-
AutomobileVariant.data_miner_config.
|
|
1002
|
+
AutomobileVariant.data_miner_config.steps[0].run(nil)
|
|
950
1003
|
assert AutomobileVariant.first.row_hash.present?
|
|
951
1004
|
end
|
|
952
1005
|
|
|
953
1006
|
should "process a callback block instead of a method" do
|
|
954
1007
|
AutomobileVariant.delete_all
|
|
955
|
-
AutomobileVariant.data_miner_config.
|
|
1008
|
+
AutomobileVariant.data_miner_config.steps[0].run(nil)
|
|
956
1009
|
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
|
957
|
-
AutomobileVariant.data_miner_config.
|
|
1010
|
+
AutomobileVariant.data_miner_config.steps.last.run(nil)
|
|
958
1011
|
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
|
959
1012
|
end
|
|
960
1013
|
|
data/test/test_helper.rb
CHANGED
|
@@ -134,6 +134,19 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
|
134
134
|
end
|
|
135
135
|
execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
|
|
136
136
|
|
|
137
|
+
create_table 'crosscalling_census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
|
138
|
+
t.integer 'number'
|
|
139
|
+
t.string 'name'
|
|
140
|
+
t.datetime 'updated_at'
|
|
141
|
+
t.datetime 'created_at'
|
|
142
|
+
t.string 'census_region_name'
|
|
143
|
+
t.integer 'census_region_number'
|
|
144
|
+
|
|
145
|
+
t.integer 'data_miner_touch_count'
|
|
146
|
+
t.integer 'data_miner_last_run_id'
|
|
147
|
+
end
|
|
148
|
+
execute 'ALTER TABLE crosscalling_census_divisions ADD PRIMARY KEY (number);'
|
|
149
|
+
|
|
137
150
|
create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
|
138
151
|
t.float "fuel_efficiency_city"
|
|
139
152
|
t.float "fuel_efficiency_highway"
|
metadata
CHANGED
|
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
|
5
5
|
segments:
|
|
6
6
|
- 0
|
|
7
7
|
- 4
|
|
8
|
-
-
|
|
9
|
-
version: 0.4.
|
|
8
|
+
- 18
|
|
9
|
+
version: 0.4.18
|
|
10
10
|
platform: ruby
|
|
11
11
|
authors:
|
|
12
12
|
- Seamus Abshere
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2010-04-
|
|
18
|
+
date: 2010-04-16 00:00:00 -04:00
|
|
19
19
|
default_executable:
|
|
20
20
|
dependencies:
|
|
21
21
|
- !ruby/object:Gem::Dependency
|
|
@@ -28,8 +28,8 @@ dependencies:
|
|
|
28
28
|
segments:
|
|
29
29
|
- 0
|
|
30
30
|
- 2
|
|
31
|
-
-
|
|
32
|
-
version: 0.2.
|
|
31
|
+
- 10
|
|
32
|
+
version: 0.2.10
|
|
33
33
|
type: :runtime
|
|
34
34
|
version_requirements: *id001
|
|
35
35
|
- !ruby/object:Gem::Dependency
|