data_miner 0.4.17 → 0.4.18

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/data_miner"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- gem.add_dependency 'remote_table', '>=0.2.9'
13
+ gem.add_dependency 'remote_table', '>=0.2.10'
14
14
  gem.add_dependency 'activerecord', '>=2.3.4'
15
15
  gem.add_dependency 'activesupport', '>=2.3.4'
16
16
  gem.add_dependency 'andand', '>=1.3.1'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.17
1
+ 0.4.18
data/data_miner.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.17"
8
+ s.version = "0.4.18"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-04-15}
12
+ s.date = %q{2010-04-16}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -52,7 +52,7 @@ Gem::Specification.new do |s|
52
52
  s.specification_version = 3
53
53
 
54
54
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
- s.add_runtime_dependency(%q<remote_table>, [">= 0.2.9"])
55
+ s.add_runtime_dependency(%q<remote_table>, [">= 0.2.10"])
56
56
  s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
57
57
  s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
58
58
  s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
@@ -61,7 +61,7 @@ Gem::Specification.new do |s|
61
61
  s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
62
62
  s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
63
63
  else
64
- s.add_dependency(%q<remote_table>, [">= 0.2.9"])
64
+ s.add_dependency(%q<remote_table>, [">= 0.2.10"])
65
65
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
66
66
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
67
67
  s.add_dependency(%q<andand>, [">= 1.3.1"])
@@ -71,7 +71,7 @@ Gem::Specification.new do |s|
71
71
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
72
72
  end
73
73
  else
74
- s.add_dependency(%q<remote_table>, [">= 0.2.9"])
74
+ s.add_dependency(%q<remote_table>, [">= 0.2.10"])
75
75
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
76
76
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
77
77
  s.add_dependency(%q<andand>, [">= 1.3.1"])
@@ -1,16 +1,16 @@
1
1
  module DataMiner
2
2
  class Attribute
3
- attr_accessor :runnable
3
+ attr_accessor :step
4
4
  attr_accessor :name
5
5
  attr_accessor :options
6
6
 
7
- delegate :resource, :to => :runnable
7
+ delegate :resource, :to => :step
8
8
 
9
- def initialize(runnable, name, options = {})
9
+ def initialize(step, name, options = {})
10
10
  options.symbolize_keys!
11
11
  @options = options
12
12
 
13
- @runnable = runnable
13
+ @step = step
14
14
  @name = name
15
15
  end
16
16
 
@@ -2,23 +2,23 @@ module DataMiner
2
2
  class Configuration
3
3
  include Blockenspiel::DSL
4
4
 
5
- attr_accessor :resource, :runnables, :runnable_counter, :attributes
5
+ attr_accessor :resource, :steps, :step_counter, :attributes
6
6
 
7
7
  def initialize(resource)
8
- @runnables = Array.new
8
+ @steps = Array.new
9
9
  @resource = resource
10
- @runnable_counter = 0
10
+ @step_counter = 0
11
11
  @attributes = HashWithIndifferentAccess.new
12
12
  end
13
13
 
14
14
  def process(method_name_or_block_description, &block)
15
- runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
16
- self.runnable_counter += 1
15
+ steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
16
+ self.step_counter += 1
17
17
  end
18
18
 
19
19
  def clone(description, options = {})
20
- runnables << DataMiner::Clone.new(self, runnable_counter, description, options)
21
- self.runnable_counter += 1
20
+ steps << DataMiner::Clone.new(self, step_counter, description, options)
21
+ self.step_counter += 1
22
22
  end
23
23
 
24
24
  def import(*args, &block)
@@ -29,16 +29,19 @@ module DataMiner
29
29
  end
30
30
  options = args.last
31
31
 
32
- runnable = DataMiner::Import.new self, runnable_counter, description, options
33
- Blockenspiel.invoke block, runnable
34
- runnables << runnable
35
- self.runnable_counter += 1
32
+ step = DataMiner::Import.new self, step_counter, description, options
33
+ Blockenspiel.invoke block, step
34
+ steps << step
35
+ self.step_counter += 1
36
36
  end
37
37
 
38
38
  # Mine data for this class.
39
39
  def run(options = {})
40
40
  options.symbolize_keys!
41
41
 
42
+ return if DataMiner::Configuration.call_stack.include? resource.name
43
+ DataMiner::Configuration.call_stack.push resource.name
44
+
42
45
  finished = false
43
46
  if DataMiner::Run.table_exists?
44
47
  run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name if DataMiner::Run.table_exists?
@@ -48,16 +51,17 @@ module DataMiner
48
51
  end
49
52
  resource.delete_all if options[:from_scratch]
50
53
  begin
51
- runnables.each { |runnable| runnable.run run }
54
+ steps.each { |step| step.run run }
52
55
  finished = true
53
56
  ensure
54
57
  run.update_attributes! :ended_at => Time.now, :finished => finished if DataMiner::Run.table_exists?
58
+ DataMiner::Configuration.call_stack.clear if DataMiner::Configuration.call_stack.first == resource.name
55
59
  end
56
60
  nil
57
61
  end
58
62
 
59
- def import_runnables
60
- runnables.select { |runnable| runnable.is_a? Import }
63
+ def import_steps
64
+ steps.select { |step| step.is_a? Import }
61
65
  end
62
66
 
63
67
  def before_invoke
@@ -65,6 +69,7 @@ module DataMiner
65
69
  end
66
70
 
67
71
  def after_invoke
72
+ return unless resource.table_exists?
68
73
  make_sure_unit_definitions_make_sense
69
74
  suggest_missing_column_migrations
70
75
  end
@@ -79,8 +84,8 @@ module DataMiner
79
84
  ]
80
85
 
81
86
  def make_sure_unit_definitions_make_sense
82
- import_runnables.each do |runnable|
83
- runnable.attributes.each do |_, attribute|
87
+ import_steps.each do |step|
88
+ step.attributes.each do |_, attribute|
84
89
  if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
85
90
  DataMiner.log_or_raise %{
86
91
 
@@ -111,8 +116,8 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
111
116
  DataMiner.log_info "Not recording which run touched a row."
112
117
  end
113
118
 
114
- import_runnables.each do |runnable|
115
- runnable.attributes.each do |_, attribute|
119
+ import_steps.each do |step|
120
+ step.attributes.each do |_, attribute|
116
121
  DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
117
122
  unless resource.column_names.include? attribute.name
118
123
  missing_columns << attribute.name
@@ -160,6 +165,9 @@ On the other hand, if you're working directly with create_table, this might be h
160
165
 
161
166
  cattr_accessor :resource_names
162
167
  self.resource_names = Array.new
168
+
169
+ cattr_accessor :call_stack
170
+ self.call_stack = Array.new
163
171
  class << self
164
172
  # Mine data. Defaults to all resource_names touched by DataMiner.
165
173
  #
data/lib/data_miner.rb CHANGED
@@ -72,10 +72,7 @@ ActiveRecord::Base.class_eval do
72
72
  def self.data_miner(&block)
73
73
  DataMiner.start_logging
74
74
 
75
- unless table_exists?
76
- DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
77
- return
78
- end
75
+ DataMiner.log_info "Database table `#{table_name}` doesn't exist. It might be created in the data_miner block, but if it's not, DataMiner probably won't work properly until you run a migration or otherwise fix the schema." unless table_exists?
79
76
 
80
77
  DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
81
78
 
@@ -488,6 +488,47 @@ class CensusDivision < ActiveRecord::Base
488
488
  end
489
489
  end
490
490
 
491
+ class CrosscallingCensusRegion < ActiveRecord::Base
492
+ set_primary_key :number
493
+
494
+ has_many :crosscalling_census_divisions
495
+
496
+ data_miner do
497
+ process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
498
+ CrosscallingCensusDivision.run_data_miner!
499
+ connection.drop_table :crosscalling_census_regions rescue nil
500
+ connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
501
+ t.column :number, :integer
502
+ t.column :name, :string
503
+ end
504
+ connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
505
+ connection.execute %{
506
+ INSERT IGNORE INTO crosscalling_census_regions(number, name)
507
+ SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
508
+ }
509
+ end
510
+ end
511
+ end
512
+
513
+ class CrosscallingCensusDivision < ActiveRecord::Base
514
+ set_primary_key :number
515
+
516
+ belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
517
+
518
+ data_miner do
519
+ import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
520
+ key 'number', :field_name => 'Division'
521
+ store 'name', :field_name => 'Name'
522
+ store 'census_region_number', :field_name => 'Region'
523
+ store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
524
+ end
525
+
526
+ process "make sure my parent object is set up (i.e., cross-call it)" do
527
+ CrosscallingCensusRegion.run_data_miner!
528
+ end
529
+ end
530
+ end
531
+
491
532
  class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
492
533
  set_primary_key :department_of_energy_identifier
493
534
 
@@ -906,6 +947,18 @@ class DataMinerTest < Test::Unit::TestCase
906
947
  end
907
948
 
908
949
  if ENV['FAST'] == 'true'
950
+ should "keep a call stack so that you can call run_data_miner! on a child" do
951
+ CrosscallingCensusDivision.run_data_miner!
952
+ assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
953
+ assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
954
+ end
955
+
956
+ should "keep a call stack so that you can call run_data_miner! on a parent" do
957
+ CrosscallingCensusRegion.run_data_miner!
958
+ assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
959
+ assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
960
+ end
961
+
909
962
  should "clone airports" do
910
963
  ClonedAirport.run_data_miner!
911
964
  assert ClonedAirport.count > 0
@@ -946,15 +999,15 @@ class DataMinerTest < Test::Unit::TestCase
946
999
  end
947
1000
 
948
1001
  should "hash things" do
949
- AutomobileVariant.data_miner_config.runnables[0].run(nil)
1002
+ AutomobileVariant.data_miner_config.steps[0].run(nil)
950
1003
  assert AutomobileVariant.first.row_hash.present?
951
1004
  end
952
1005
 
953
1006
  should "process a callback block instead of a method" do
954
1007
  AutomobileVariant.delete_all
955
- AutomobileVariant.data_miner_config.runnables[0].run(nil)
1008
+ AutomobileVariant.data_miner_config.steps[0].run(nil)
956
1009
  assert !AutomobileVariant.first.fuel_efficiency_city.present?
957
- AutomobileVariant.data_miner_config.runnables.last.run(nil)
1010
+ AutomobileVariant.data_miner_config.steps.last.run(nil)
958
1011
  assert AutomobileVariant.first.fuel_efficiency_city.present?
959
1012
  end
960
1013
 
data/test/test_helper.rb CHANGED
@@ -134,6 +134,19 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
134
134
  end
135
135
  execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
136
136
 
137
+ create_table 'crosscalling_census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
138
+ t.integer 'number'
139
+ t.string 'name'
140
+ t.datetime 'updated_at'
141
+ t.datetime 'created_at'
142
+ t.string 'census_region_name'
143
+ t.integer 'census_region_number'
144
+
145
+ t.integer 'data_miner_touch_count'
146
+ t.integer 'data_miner_last_run_id'
147
+ end
148
+ execute 'ALTER TABLE crosscalling_census_divisions ADD PRIMARY KEY (number);'
149
+
137
150
  create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
138
151
  t.float "fuel_efficiency_city"
139
152
  t.float "fuel_efficiency_highway"
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 17
9
- version: 0.4.17
8
+ - 18
9
+ version: 0.4.18
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-04-15 00:00:00 -04:00
18
+ date: 2010-04-16 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -28,8 +28,8 @@ dependencies:
28
28
  segments:
29
29
  - 0
30
30
  - 2
31
- - 9
32
- version: 0.2.9
31
+ - 10
32
+ version: 0.2.10
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
35
  - !ruby/object:Gem::Dependency