data_miner 0.3.7 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -44,7 +44,7 @@ Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't kno
44
44
 
45
45
  namespace :data_miner do
46
46
  task :run => :environment do
47
- DataMiner.run :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact
47
+ DataMiner.run :resource_names => ENV['RESOURCES'].to_s.split(/\s*,\s*/).flatten.compact
48
48
  end
49
49
  end
50
50
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.7
1
+ 0.3.8
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.3.7"
8
+ s.version = "0.3.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -32,7 +32,6 @@ Gem::Specification.new do |s|
32
32
  "lib/data_miner/import.rb",
33
33
  "lib/data_miner/process.rb",
34
34
  "lib/data_miner/run.rb",
35
- "lib/data_miner/target.rb",
36
35
  "lib/data_miner/william_james_cartesian_product.rb",
37
36
  "test/data_miner_test.rb",
38
37
  "test/test_helper.rb"
data/lib/data_miner.rb CHANGED
@@ -12,7 +12,6 @@ require 'data_miner/configuration'
12
12
  require 'data_miner/dictionary'
13
13
  require 'data_miner/import'
14
14
  require 'data_miner/process'
15
- require 'data_miner/target'
16
15
  require 'data_miner/run'
17
16
 
18
17
  # TODO: move to gem
@@ -38,8 +37,8 @@ module DataMiner
38
37
  DataMiner::Configuration.run options
39
38
  end
40
39
 
41
- def self.classes
42
- DataMiner::Configuration.classes
40
+ def self.resource_names
41
+ DataMiner::Configuration.resource_names
43
42
  end
44
43
 
45
44
  def self.create_tables
@@ -53,10 +52,9 @@ ActiveRecord::Base.class_eval do
53
52
  logger.error "[DataMiner gem] Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
54
53
  return
55
54
  end
56
-
57
- DataMiner.classes.add self
55
+
56
+ DataMiner.resource_names.add self.name
58
57
  DataMiner.create_tables
59
- DataMiner::Target.find_or_create_by_name name
60
58
 
61
59
  belongs_to :data_miner_last_run, :class_name => 'DataMiner::Run'
62
60
 
@@ -1,15 +1,15 @@
1
1
  module DataMiner
2
2
  class Attribute
3
- attr_accessor :klass, :name, :options_for_import
3
+ attr_accessor :resource, :name, :options_for_import
4
4
 
5
- def initialize(klass, name)
6
- @klass = klass
5
+ def initialize(resource, name)
6
+ @resource = resource
7
7
  @name = name
8
8
  @options_for_import = {}
9
9
  end
10
10
 
11
11
  def inspect
12
- "Attribute(#{klass}##{name})"
12
+ "Attribute(#{resource}##{name})"
13
13
  end
14
14
 
15
15
  def stored_by?(import)
@@ -56,11 +56,21 @@ module DataMiner
56
56
  end
57
57
 
58
58
  # this will overwrite nils, even if wants_overwriting?(import) is false
59
+ # returns true if an attr was changed, otherwise false
59
60
  def set_record_from_row(import, record, row)
60
- return if !wants_overwriting?(import) and !record.send(name).nil?
61
- value = value_from_row(import, row)
62
- record.send "#{name}=", value
63
- DataMiner.logger.info("ActiveRecord didn't like trying to set #{klass}.#{name} = #{value}") if !value.nil? and record.send(name).nil?
61
+ return false if !wants_overwriting?(import) and !record.send(name).nil?
62
+ what_it_was = record.send name
63
+ what_it_should_be = value_from_row import, row
64
+ record.send "#{name}=", what_it_should_be
65
+ what_it_is = record.send name
66
+ if what_it_is.nil? and !what_it_should_be.nil?
67
+ DataMiner.logger.info "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
68
+ nil
69
+ elsif what_it_is == what_it_was
70
+ false
71
+ else
72
+ true
73
+ end
64
74
  end
65
75
 
66
76
  def unit_from_source(import, row)
@@ -87,7 +97,7 @@ module DataMiner
87
97
  end
88
98
 
89
99
  def column_type
90
- klass.columns_hash[name.to_s].type
100
+ resource.columns_hash[name.to_s].type
91
101
  end
92
102
 
93
103
  def dictionary(import)
@@ -2,12 +2,12 @@ module DataMiner
2
2
  class Configuration
3
3
  include Blockenspiel::DSL
4
4
 
5
- attr_accessor :klass, :runnables, :runnable_counter, :attributes, :unique_indices
5
+ attr_accessor :resource, :runnables, :runnable_counter, :attributes, :unique_indices
6
6
 
7
- def initialize(klass)
7
+ def initialize(resource)
8
8
  @runnables = Array.new
9
9
  @unique_indices = Set.new
10
- @klass = klass
10
+ @resource = resource
11
11
  @runnable_counter = 0
12
12
  @attributes = HashWithIndifferentAccess.new
13
13
  end
@@ -35,7 +35,7 @@ module DataMiner
35
35
 
36
36
  def after_invoke
37
37
  if unique_indices.empty?
38
- raise(MissingHashColumn, "No unique_index defined for #{klass.name}, so you need a row_hash:string column.") unless klass.column_names.include?('row_hash')
38
+ raise(MissingHashColumn, "No unique_index defined for #{resource.name}, so you need a row_hash:string column.") unless resource.column_names.include?('row_hash')
39
39
  unique_indices.add 'row_hash'
40
40
  end
41
41
  runnables.select { |runnable| runnable.is_a?(Import) }.each { |runnable| unique_indices.each { |unique_index| runnable.store(unique_index) unless runnable.stores?(unique_index) } }
@@ -43,10 +43,9 @@ module DataMiner
43
43
 
44
44
  # Mine data for this class.
45
45
  def run(options = {})
46
- target = DataMiner::Target.find(klass.name)
47
46
  finished = false
48
- run = target.runs.create! :started_at => Time.now
49
- klass.delete_all if options[:from_scratch]
47
+ run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name
48
+ resource.delete_all if options[:from_scratch]
50
49
  begin
51
50
  runnables.each { |runnable| runnable.run(run) }
52
51
  finished = true
@@ -56,34 +55,26 @@ module DataMiner
56
55
  nil
57
56
  end
58
57
 
59
- cattr_accessor :classes
60
- self.classes = Set.new
58
+ cattr_accessor :resource_names
59
+ self.resource_names = Set.new
61
60
  class << self
62
- # Mine data. Defaults to all classes touched by DataMiner.
61
+ # Mine data. Defaults to all resource_names touched by DataMiner.
63
62
  #
64
63
  # Options
65
- # * <tt>:class_names</tt>: provide an array class names to mine
64
+ # * <tt>:resource_names</tt>: array of resource (class) names to mine
66
65
  def run(options = {})
67
- classes.each do |klass|
68
- if options[:class_names].blank? or options[:class_names].include?(klass.name)
69
- klass.data_miner_config.run options
66
+ resource_names.each do |resource_name|
67
+ if options[:resource_names].blank? or options[:resource_names].include?(resource_name)
68
+ resource_name.constantize.data_miner_config.run options
70
69
  end
71
70
  end
72
71
  end
73
72
 
74
73
  def create_tables
75
74
  c = ActiveRecord::Base.connection
76
- unless c.table_exists?('data_miner_targets')
77
- c.create_table 'data_miner_targets', :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
78
- t.string 'name'
79
- t.datetime 'created_at'
80
- t.datetime 'updated_at'
81
- end
82
- c.execute 'ALTER TABLE data_miner_targets ADD PRIMARY KEY (name);'
83
- end
84
75
  unless c.table_exists?('data_miner_runs')
85
76
  c.create_table 'data_miner_runs', :options => 'ENGINE=InnoDB default charset=utf8' do |t|
86
- t.string 'data_miner_target_id'
77
+ t.string 'resource_name'
87
78
  t.boolean 'finished'
88
79
  t.datetime 'started_at'
89
80
  t.datetime 'ended_at'
@@ -2,7 +2,7 @@ module DataMiner
2
2
  class Import
3
3
  attr_accessor :configuration, :position_in_run, :options, :table, :errata
4
4
  attr_accessor :description
5
- delegate :klass, :to => :configuration
5
+ delegate :resource, :to => :configuration
6
6
  delegate :unique_indices, :to => :configuration
7
7
 
8
8
  def initialize(configuration, position_in_run, description, options = {}, &block)
@@ -11,12 +11,12 @@ module DataMiner
11
11
  @description = description
12
12
  @options = options
13
13
  yield self if block_given? # pull in attributes
14
- @errata = Errata.new(:url => options[:errata], :klass => klass) if options[:errata]
14
+ @errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
15
15
  @table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop))
16
16
  end
17
17
 
18
18
  def inspect
19
- "Import(#{klass}) position #{position_in_run} (#{description})"
19
+ "Import(#{resource}) position #{position_in_run} (#{description})"
20
20
  end
21
21
 
22
22
  def attributes
@@ -28,7 +28,7 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
- configuration.attributes[attr_name] ||= Attribute.new(klass, attr_name)
31
+ configuration.attributes[attr_name] ||= Attribute.new(resource, attr_name)
32
32
  configuration.attributes[attr_name].options_for_import[self] = attr_options
33
33
  end
34
34
 
@@ -45,14 +45,16 @@ module DataMiner
45
45
 
46
46
  record_set = WilliamJamesCartesianProduct.cart_prod(*unifying_values).map do |combination|
47
47
  next if combination.include?(nil)
48
- klass.send "find_or_initialize_by_#{unique_indices.to_a.join('_and_')}", *combination
48
+ resource.send "find_or_initialize_by_#{unique_indices.to_a.join('_and_')}", *combination
49
49
  end.flatten
50
50
 
51
51
  Array.wrap(record_set).each do |record|
52
- attributes.values.each { |attr| attr.set_record_from_row(self, record, row) }
52
+ hits = attributes.values.map { |attr| attr.set_record_from_row self, record, row }
53
53
  record.data_miner_touch_count ||= 0
54
- record.data_miner_touch_count += 1
55
- record.data_miner_last_run = run
54
+ if hits.any?
55
+ record.data_miner_touch_count += 1
56
+ record.data_miner_last_run = run
57
+ end
56
58
  record.save!
57
59
  end
58
60
  end
@@ -3,7 +3,7 @@ module DataMiner
3
3
  attr_accessor :configuration, :position_in_run
4
4
  attr_accessor :method_name
5
5
  attr_accessor :block_description, :block
6
- delegate :klass, :to => :configuration
6
+ delegate :resource, :to => :configuration
7
7
 
8
8
  def initialize(configuration, position_in_run, method_name_or_block_description, &block)
9
9
  @configuration = configuration
@@ -12,16 +12,16 @@ module DataMiner
12
12
  @block_description = method_name_or_block_description
13
13
  @block = block
14
14
  else
15
- @method_name = method_name
15
+ @method_name = method_name_or_block_description
16
16
  end
17
17
  end
18
18
 
19
19
  def inspect
20
- str = "Process(#{klass}) position #{position_in_run}"
20
+ str = "Process(#{resource}) position #{position_in_run}"
21
21
  if block
22
- str << " called :#{method_name}"
23
- else
24
22
  str << " ran block (#{block_description})"
23
+ else
24
+ str << " called :#{method_name}"
25
25
  end
26
26
  end
27
27
 
@@ -29,7 +29,7 @@ module DataMiner
29
29
  if block
30
30
  block.call
31
31
  else
32
- klass.send method_name
32
+ resource.send method_name
33
33
  end
34
34
  DataMiner.logger.info "ran #{inspect}"
35
35
  end
@@ -2,6 +2,5 @@ module DataMiner
2
2
  class Run < ActiveRecord::Base
3
3
  set_table_name 'data_miner_runs'
4
4
  default_scope :order => 'id ASC'
5
- belongs_to :target, :class_name => '::DataMiner::Target', :foreign_key => 'data_miner_target_id'
6
5
  end
7
6
  end
@@ -876,25 +876,13 @@ class DataMinerTest < Test::Unit::TestCase
876
876
  assert AutomobileVariant.first.fuel_efficiency_city.present?
877
877
  end
878
878
 
879
- # should "mine multiple classes in the correct order" do
880
- # DataMiner.run
881
- # uy = Country.find_by_iso_3166('UY')
882
- # assert_equal 'Uruguay', uy.name
883
- # end
884
-
885
- should "have a target record for every class that is mined" do
886
- DataMiner.run :class_names => %w{ Country }
887
- assert DataMiner::Target.exists?(:name => 'Country')
888
- assert_equal 1, DataMiner::Target.count(:conditions => {:name => 'country'})
889
- end
890
-
891
879
  should "keep a log when it does a run" do
892
880
  approx_started_at = Time.now
893
- DataMiner.run :class_names => %w{ Country }
881
+ DataMiner.run :resource_names => %w{ Country }
894
882
  approx_ended_at = Time.now
895
- target = DataMiner::Target.find_by_name('Country')
896
- assert (target.runs.last.started_at - approx_started_at).abs < 5 # seconds
897
- assert (target.runs.last.ended_at - approx_ended_at).abs < 5 # seconds
883
+ last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
884
+ assert (last_run.started_at - approx_started_at).abs < 5 # seconds
885
+ assert (last_run.ended_at - approx_ended_at).abs < 5 # seconds
898
886
  end
899
887
 
900
888
  should "request a re-import from scratch" do
@@ -902,31 +890,37 @@ class DataMinerTest < Test::Unit::TestCase
902
890
  c.iso_3166 = 'JUNK'
903
891
  c.save!
904
892
  assert Country.exists?(:iso_3166 => 'JUNK')
905
- DataMiner.run :class_names => %w{ Country }, :from_scratch => true
893
+ DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
906
894
  assert !Country.exists?(:iso_3166 => 'JUNK')
907
895
  end
908
896
 
909
897
  should "track how many times a row was touched" do
910
- DataMiner.run :class_names => %w{ Country }, :from_scratch => true
898
+ DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
899
+ assert_equal 1, Country.first.data_miner_touch_count
900
+ DataMiner.run :resource_names => %w{ Country }
911
901
  assert_equal 1, Country.first.data_miner_touch_count
912
- DataMiner.run :class_names => %w{ Country }
913
- assert_equal 2, Country.first.data_miner_touch_count
914
902
  end
915
903
 
916
904
  should "keep track of what the last import run that touched a row was" do
917
- DataMiner.run :class_names => %w{ Country }, :from_scratch => true
905
+ DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
918
906
  a = DataMiner::Run.last
919
907
  assert_equal a, Country.first.data_miner_last_run
920
- DataMiner.run :class_names => %w{ Country }
908
+ DataMiner.run :resource_names => %w{ Country }
921
909
  b = DataMiner::Run.last
922
910
  assert a != b
923
- assert_equal b, Country.first.data_miner_last_run
911
+ assert_equal a, Country.first.data_miner_last_run
924
912
  end
925
913
 
926
914
  unless ENV['FAST'] == 'true'
927
915
  should "import using a dictionary" do
928
- DataMiner.run :class_names => %w{ ResidentialEnergyConsumptionSurveyResponse }
916
+ DataMiner.run :resource_names => %w{ ResidentialEnergyConsumptionSurveyResponse }
929
917
  assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
930
918
  end
919
+
920
+ should "mine multiple classes in the correct order" do
921
+ DataMiner.run
922
+ uy = Country.find_by_iso_3166('UY')
923
+ assert_equal 'Uruguay', uy.name
924
+ end
931
925
  end
932
926
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -108,7 +108,6 @@ files:
108
108
  - lib/data_miner/import.rb
109
109
  - lib/data_miner/process.rb
110
110
  - lib/data_miner/run.rb
111
- - lib/data_miner/target.rb
112
111
  - lib/data_miner/william_james_cartesian_product.rb
113
112
  - test/data_miner_test.rb
114
113
  - test/test_helper.rb
@@ -1,26 +0,0 @@
1
- module DataMiner
2
- class Target < ActiveRecord::Base
3
- set_table_name 'data_miner_targets'
4
- set_primary_key :name
5
- has_many :runs, :class_name => '::DataMiner::Run', :foreign_key => 'data_miner_target_id'
6
-
7
- def klass
8
- name.constantize
9
- end
10
-
11
- def run(options = {})
12
- klass.data_miner_config.run options
13
- end
14
-
15
- def included_in_list_of_targets
16
- msg = "must have a data_miner block"
17
- unless DataMiner.classes.include?(name.constantize)
18
- errors.add :name, msg
19
- end
20
- rescue NameError
21
- errors.add :name, msg
22
- end
23
-
24
- validate :included_in_list_of_targets
25
- end
26
- end