data_miner 0.4.17 → 0.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -10,7 +10,7 @@ begin
10
10
  gem.email = "seamus@abshere.net"
11
11
  gem.homepage = "http://github.com/seamusabshere/data_miner"
12
12
  gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- gem.add_dependency 'remote_table', '>=0.2.9'
13
+ gem.add_dependency 'remote_table', '>=0.2.10'
14
14
  gem.add_dependency 'activerecord', '>=2.3.4'
15
15
  gem.add_dependency 'activesupport', '>=2.3.4'
16
16
  gem.add_dependency 'andand', '>=1.3.1'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.17
1
+ 0.4.18
data/data_miner.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.17"
8
+ s.version = "0.4.18"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-04-15}
12
+ s.date = %q{2010-04-16}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -52,7 +52,7 @@ Gem::Specification.new do |s|
52
52
  s.specification_version = 3
53
53
 
54
54
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
- s.add_runtime_dependency(%q<remote_table>, [">= 0.2.9"])
55
+ s.add_runtime_dependency(%q<remote_table>, [">= 0.2.10"])
56
56
  s.add_runtime_dependency(%q<activerecord>, [">= 2.3.4"])
57
57
  s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
58
58
  s.add_runtime_dependency(%q<andand>, [">= 1.3.1"])
@@ -61,7 +61,7 @@ Gem::Specification.new do |s|
61
61
  s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
62
62
  s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
63
63
  else
64
- s.add_dependency(%q<remote_table>, [">= 0.2.9"])
64
+ s.add_dependency(%q<remote_table>, [">= 0.2.10"])
65
65
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
66
66
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
67
67
  s.add_dependency(%q<andand>, [">= 1.3.1"])
@@ -71,7 +71,7 @@ Gem::Specification.new do |s|
71
71
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
72
72
  end
73
73
  else
74
- s.add_dependency(%q<remote_table>, [">= 0.2.9"])
74
+ s.add_dependency(%q<remote_table>, [">= 0.2.10"])
75
75
  s.add_dependency(%q<activerecord>, [">= 2.3.4"])
76
76
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
77
77
  s.add_dependency(%q<andand>, [">= 1.3.1"])
@@ -1,16 +1,16 @@
1
1
  module DataMiner
2
2
  class Attribute
3
- attr_accessor :runnable
3
+ attr_accessor :step
4
4
  attr_accessor :name
5
5
  attr_accessor :options
6
6
 
7
- delegate :resource, :to => :runnable
7
+ delegate :resource, :to => :step
8
8
 
9
- def initialize(runnable, name, options = {})
9
+ def initialize(step, name, options = {})
10
10
  options.symbolize_keys!
11
11
  @options = options
12
12
 
13
- @runnable = runnable
13
+ @step = step
14
14
  @name = name
15
15
  end
16
16
 
@@ -2,23 +2,23 @@ module DataMiner
2
2
  class Configuration
3
3
  include Blockenspiel::DSL
4
4
 
5
- attr_accessor :resource, :runnables, :runnable_counter, :attributes
5
+ attr_accessor :resource, :steps, :step_counter, :attributes
6
6
 
7
7
  def initialize(resource)
8
- @runnables = Array.new
8
+ @steps = Array.new
9
9
  @resource = resource
10
- @runnable_counter = 0
10
+ @step_counter = 0
11
11
  @attributes = HashWithIndifferentAccess.new
12
12
  end
13
13
 
14
14
  def process(method_name_or_block_description, &block)
15
- runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
16
- self.runnable_counter += 1
15
+ steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
16
+ self.step_counter += 1
17
17
  end
18
18
 
19
19
  def clone(description, options = {})
20
- runnables << DataMiner::Clone.new(self, runnable_counter, description, options)
21
- self.runnable_counter += 1
20
+ steps << DataMiner::Clone.new(self, step_counter, description, options)
21
+ self.step_counter += 1
22
22
  end
23
23
 
24
24
  def import(*args, &block)
@@ -29,16 +29,19 @@ module DataMiner
29
29
  end
30
30
  options = args.last
31
31
 
32
- runnable = DataMiner::Import.new self, runnable_counter, description, options
33
- Blockenspiel.invoke block, runnable
34
- runnables << runnable
35
- self.runnable_counter += 1
32
+ step = DataMiner::Import.new self, step_counter, description, options
33
+ Blockenspiel.invoke block, step
34
+ steps << step
35
+ self.step_counter += 1
36
36
  end
37
37
 
38
38
  # Mine data for this class.
39
39
  def run(options = {})
40
40
  options.symbolize_keys!
41
41
 
42
+ return if DataMiner::Configuration.call_stack.include? resource.name
43
+ DataMiner::Configuration.call_stack.push resource.name
44
+
42
45
  finished = false
43
46
  if DataMiner::Run.table_exists?
44
47
  run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name if DataMiner::Run.table_exists?
@@ -48,16 +51,17 @@ module DataMiner
48
51
  end
49
52
  resource.delete_all if options[:from_scratch]
50
53
  begin
51
- runnables.each { |runnable| runnable.run run }
54
+ steps.each { |step| step.run run }
52
55
  finished = true
53
56
  ensure
54
57
  run.update_attributes! :ended_at => Time.now, :finished => finished if DataMiner::Run.table_exists?
58
+ DataMiner::Configuration.call_stack.clear if DataMiner::Configuration.call_stack.first == resource.name
55
59
  end
56
60
  nil
57
61
  end
58
62
 
59
- def import_runnables
60
- runnables.select { |runnable| runnable.is_a? Import }
63
+ def import_steps
64
+ steps.select { |step| step.is_a? Import }
61
65
  end
62
66
 
63
67
  def before_invoke
@@ -65,6 +69,7 @@ module DataMiner
65
69
  end
66
70
 
67
71
  def after_invoke
72
+ return unless resource.table_exists?
68
73
  make_sure_unit_definitions_make_sense
69
74
  suggest_missing_column_migrations
70
75
  end
@@ -79,8 +84,8 @@ module DataMiner
79
84
  ]
80
85
 
81
86
  def make_sure_unit_definitions_make_sense
82
- import_runnables.each do |runnable|
83
- runnable.attributes.each do |_, attribute|
87
+ import_steps.each do |step|
88
+ step.attributes.each do |_, attribute|
84
89
  if attribute.options.any? { |k, _| k.to_s =~ /unit/ } and COMPLETE_UNIT_DEFINITIONS.none? { |complete_definition| complete_definition.all? { |required_option| attribute.options[required_option].present? } }
85
90
  DataMiner.log_or_raise %{
86
91
 
@@ -111,8 +116,8 @@ You need to supply one of #{COMPLETE_UNIT_DEFINITIONS.map(&:inspect).to_sentence
111
116
  DataMiner.log_info "Not recording which run touched a row."
112
117
  end
113
118
 
114
- import_runnables.each do |runnable|
115
- runnable.attributes.each do |_, attribute|
119
+ import_steps.each do |step|
120
+ step.attributes.each do |_, attribute|
116
121
  DataMiner.log_or_raise "You can't have an attribute column that ends in _units (reserved): #{resource.table_name}.#{attribute.name}" if attribute.name.ends_with? '_units'
117
122
  unless resource.column_names.include? attribute.name
118
123
  missing_columns << attribute.name
@@ -160,6 +165,9 @@ On the other hand, if you're working directly with create_table, this might be h
160
165
 
161
166
  cattr_accessor :resource_names
162
167
  self.resource_names = Array.new
168
+
169
+ cattr_accessor :call_stack
170
+ self.call_stack = Array.new
163
171
  class << self
164
172
  # Mine data. Defaults to all resource_names touched by DataMiner.
165
173
  #
data/lib/data_miner.rb CHANGED
@@ -72,10 +72,7 @@ ActiveRecord::Base.class_eval do
72
72
  def self.data_miner(&block)
73
73
  DataMiner.start_logging
74
74
 
75
- unless table_exists?
76
- DataMiner.log_or_raise "Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
77
- return
78
- end
75
+ DataMiner.log_info "Database table `#{table_name}` doesn't exist. It might be created in the data_miner block, but if it's not, DataMiner probably won't work properly until you run a migration or otherwise fix the schema." unless table_exists?
79
76
 
80
77
  DataMiner.resource_names.push self.name unless DataMiner.resource_names.include? self.name
81
78
 
@@ -488,6 +488,47 @@ class CensusDivision < ActiveRecord::Base
488
488
  end
489
489
  end
490
490
 
491
+ class CrosscallingCensusRegion < ActiveRecord::Base
492
+ set_primary_key :number
493
+
494
+ has_many :crosscalling_census_divisions
495
+
496
+ data_miner do
497
+ process "derive ourselves from the census divisions table (i.e., cross call census divisions)" do
498
+ CrosscallingCensusDivision.run_data_miner!
499
+ connection.drop_table :crosscalling_census_regions rescue nil
500
+ connection.create_table :crosscalling_census_regions, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
501
+ t.column :number, :integer
502
+ t.column :name, :string
503
+ end
504
+ connection.execute 'ALTER TABLE crosscalling_census_regions ADD PRIMARY KEY (number);'
505
+ connection.execute %{
506
+ INSERT IGNORE INTO crosscalling_census_regions(number, name)
507
+ SELECT crosscalling_census_divisions.census_region_number, crosscalling_census_divisions.census_region_name FROM crosscalling_census_divisions
508
+ }
509
+ end
510
+ end
511
+ end
512
+
513
+ class CrosscallingCensusDivision < ActiveRecord::Base
514
+ set_primary_key :number
515
+
516
+ belongs_to :crosscalling_census_regions, :foreign_key => 'census_region_number'
517
+
518
+ data_miner do
519
+ import "get a list of census divisions and their regions", :url => 'http://www.census.gov/popest/geographic/codes02.csv', :skip => 9, :select => lambda { |row| row['Division'].to_s.strip != 'X' and row['FIPS CODE STATE'].to_s.strip == 'X'} do
520
+ key 'number', :field_name => 'Division'
521
+ store 'name', :field_name => 'Name'
522
+ store 'census_region_number', :field_name => 'Region'
523
+ store 'census_region_name', :field_name => 'Region', :dictionary => { :input => 'number', :output => 'name', :url => 'http://data.brighterplanet.com/census_regions.csv' }
524
+ end
525
+
526
+ process "make sure my parent object is set up (i.e., cross-call it)" do
527
+ CrosscallingCensusRegion.run_data_miner!
528
+ end
529
+ end
530
+ end
531
+
491
532
  class ResidentialEnergyConsumptionSurveyResponse < ActiveRecord::Base
492
533
  set_primary_key :department_of_energy_identifier
493
534
 
@@ -906,6 +947,18 @@ class DataMinerTest < Test::Unit::TestCase
906
947
  end
907
948
 
908
949
  if ENV['FAST'] == 'true'
950
+ should "keep a call stack so that you can call run_data_miner! on a child" do
951
+ CrosscallingCensusDivision.run_data_miner!
952
+ assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
953
+ assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
954
+ end
955
+
956
+ should "keep a call stack so that you can call run_data_miner! on a parent" do
957
+ CrosscallingCensusRegion.run_data_miner!
958
+ assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
959
+ assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
960
+ end
961
+
909
962
  should "clone airports" do
910
963
  ClonedAirport.run_data_miner!
911
964
  assert ClonedAirport.count > 0
@@ -946,15 +999,15 @@ class DataMinerTest < Test::Unit::TestCase
946
999
  end
947
1000
 
948
1001
  should "hash things" do
949
- AutomobileVariant.data_miner_config.runnables[0].run(nil)
1002
+ AutomobileVariant.data_miner_config.steps[0].run(nil)
950
1003
  assert AutomobileVariant.first.row_hash.present?
951
1004
  end
952
1005
 
953
1006
  should "process a callback block instead of a method" do
954
1007
  AutomobileVariant.delete_all
955
- AutomobileVariant.data_miner_config.runnables[0].run(nil)
1008
+ AutomobileVariant.data_miner_config.steps[0].run(nil)
956
1009
  assert !AutomobileVariant.first.fuel_efficiency_city.present?
957
- AutomobileVariant.data_miner_config.runnables.last.run(nil)
1010
+ AutomobileVariant.data_miner_config.steps.last.run(nil)
958
1011
  assert AutomobileVariant.first.fuel_efficiency_city.present?
959
1012
  end
960
1013
 
data/test/test_helper.rb CHANGED
@@ -134,6 +134,19 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
134
134
  end
135
135
  execute 'ALTER TABLE census_divisions ADD PRIMARY KEY (number);'
136
136
 
137
+ create_table 'crosscalling_census_divisions', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
138
+ t.integer 'number'
139
+ t.string 'name'
140
+ t.datetime 'updated_at'
141
+ t.datetime 'created_at'
142
+ t.string 'census_region_name'
143
+ t.integer 'census_region_number'
144
+
145
+ t.integer 'data_miner_touch_count'
146
+ t.integer 'data_miner_last_run_id'
147
+ end
148
+ execute 'ALTER TABLE crosscalling_census_divisions ADD PRIMARY KEY (number);'
149
+
137
150
  create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
138
151
  t.float "fuel_efficiency_city"
139
152
  t.float "fuel_efficiency_highway"
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 17
9
- version: 0.4.17
8
+ - 18
9
+ version: 0.4.18
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-04-15 00:00:00 -04:00
18
+ date: 2010-04-16 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -28,8 +28,8 @@ dependencies:
28
28
  segments:
29
29
  - 0
30
30
  - 2
31
- - 9
32
- version: 0.2.9
31
+ - 10
32
+ version: 0.2.10
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
35
  - !ruby/object:Gem::Dependency