data_miner 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -44,7 +44,7 @@ Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't kno
44
44
 
45
45
  namespace :data_miner do
46
46
  task :run => :environment do
47
- DataMiner.run :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact
47
+ DataMiner.run :resource_names => ENV['RESOURCES'].to_s.split(/\s*,\s*/).flatten.compact
48
48
  end
49
49
  end
50
50
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.7
1
+ 0.3.8
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.3.7"
8
+ s.version = "0.3.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -32,7 +32,6 @@ Gem::Specification.new do |s|
32
32
  "lib/data_miner/import.rb",
33
33
  "lib/data_miner/process.rb",
34
34
  "lib/data_miner/run.rb",
35
- "lib/data_miner/target.rb",
36
35
  "lib/data_miner/william_james_cartesian_product.rb",
37
36
  "test/data_miner_test.rb",
38
37
  "test/test_helper.rb"
data/lib/data_miner.rb CHANGED
@@ -12,7 +12,6 @@ require 'data_miner/configuration'
12
12
  require 'data_miner/dictionary'
13
13
  require 'data_miner/import'
14
14
  require 'data_miner/process'
15
- require 'data_miner/target'
16
15
  require 'data_miner/run'
17
16
 
18
17
  # TODO: move to gem
@@ -38,8 +37,8 @@ module DataMiner
38
37
  DataMiner::Configuration.run options
39
38
  end
40
39
 
41
- def self.classes
42
- DataMiner::Configuration.classes
40
+ def self.resource_names
41
+ DataMiner::Configuration.resource_names
43
42
  end
44
43
 
45
44
  def self.create_tables
@@ -53,10 +52,9 @@ ActiveRecord::Base.class_eval do
53
52
  logger.error "[DataMiner gem] Database table `#{table_name}` doesn't exist. DataMiner probably won't work properly until you run a migration or otherwise fix the schema."
54
53
  return
55
54
  end
56
-
57
- DataMiner.classes.add self
55
+
56
+ DataMiner.resource_names.add self.name
58
57
  DataMiner.create_tables
59
- DataMiner::Target.find_or_create_by_name name
60
58
 
61
59
  belongs_to :data_miner_last_run, :class_name => 'DataMiner::Run'
62
60
 
@@ -1,15 +1,15 @@
1
1
  module DataMiner
2
2
  class Attribute
3
- attr_accessor :klass, :name, :options_for_import
3
+ attr_accessor :resource, :name, :options_for_import
4
4
 
5
- def initialize(klass, name)
6
- @klass = klass
5
+ def initialize(resource, name)
6
+ @resource = resource
7
7
  @name = name
8
8
  @options_for_import = {}
9
9
  end
10
10
 
11
11
  def inspect
12
- "Attribute(#{klass}##{name})"
12
+ "Attribute(#{resource}##{name})"
13
13
  end
14
14
 
15
15
  def stored_by?(import)
@@ -56,11 +56,21 @@ module DataMiner
56
56
  end
57
57
 
58
58
  # this will overwrite nils, even if wants_overwriting?(import) is false
59
+ # returns true if an attr was changed, otherwise false
59
60
  def set_record_from_row(import, record, row)
60
- return if !wants_overwriting?(import) and !record.send(name).nil?
61
- value = value_from_row(import, row)
62
- record.send "#{name}=", value
63
- DataMiner.logger.info("ActiveRecord didn't like trying to set #{klass}.#{name} = #{value}") if !value.nil? and record.send(name).nil?
61
+ return false if !wants_overwriting?(import) and !record.send(name).nil?
62
+ what_it_was = record.send name
63
+ what_it_should_be = value_from_row import, row
64
+ record.send "#{name}=", what_it_should_be
65
+ what_it_is = record.send name
66
+ if what_it_is.nil? and !what_it_should_be.nil?
67
+ DataMiner.logger.info "ActiveRecord didn't like trying to set #{resource}.#{name} = #{what_it_should_be} (it came out as nil)"
68
+ nil
69
+ elsif what_it_is == what_it_was
70
+ false
71
+ else
72
+ true
73
+ end
64
74
  end
65
75
 
66
76
  def unit_from_source(import, row)
@@ -87,7 +97,7 @@ module DataMiner
87
97
  end
88
98
 
89
99
  def column_type
90
- klass.columns_hash[name.to_s].type
100
+ resource.columns_hash[name.to_s].type
91
101
  end
92
102
 
93
103
  def dictionary(import)
@@ -2,12 +2,12 @@ module DataMiner
2
2
  class Configuration
3
3
  include Blockenspiel::DSL
4
4
 
5
- attr_accessor :klass, :runnables, :runnable_counter, :attributes, :unique_indices
5
+ attr_accessor :resource, :runnables, :runnable_counter, :attributes, :unique_indices
6
6
 
7
- def initialize(klass)
7
+ def initialize(resource)
8
8
  @runnables = Array.new
9
9
  @unique_indices = Set.new
10
- @klass = klass
10
+ @resource = resource
11
11
  @runnable_counter = 0
12
12
  @attributes = HashWithIndifferentAccess.new
13
13
  end
@@ -35,7 +35,7 @@ module DataMiner
35
35
 
36
36
  def after_invoke
37
37
  if unique_indices.empty?
38
- raise(MissingHashColumn, "No unique_index defined for #{klass.name}, so you need a row_hash:string column.") unless klass.column_names.include?('row_hash')
38
+ raise(MissingHashColumn, "No unique_index defined for #{resource.name}, so you need a row_hash:string column.") unless resource.column_names.include?('row_hash')
39
39
  unique_indices.add 'row_hash'
40
40
  end
41
41
  runnables.select { |runnable| runnable.is_a?(Import) }.each { |runnable| unique_indices.each { |unique_index| runnable.store(unique_index) unless runnable.stores?(unique_index) } }
@@ -43,10 +43,9 @@ module DataMiner
43
43
 
44
44
  # Mine data for this class.
45
45
  def run(options = {})
46
- target = DataMiner::Target.find(klass.name)
47
46
  finished = false
48
- run = target.runs.create! :started_at => Time.now
49
- klass.delete_all if options[:from_scratch]
47
+ run = DataMiner::Run.create! :started_at => Time.now, :resource_name => resource.name
48
+ resource.delete_all if options[:from_scratch]
50
49
  begin
51
50
  runnables.each { |runnable| runnable.run(run) }
52
51
  finished = true
@@ -56,34 +55,26 @@ module DataMiner
56
55
  nil
57
56
  end
58
57
 
59
- cattr_accessor :classes
60
- self.classes = Set.new
58
+ cattr_accessor :resource_names
59
+ self.resource_names = Set.new
61
60
  class << self
62
- # Mine data. Defaults to all classes touched by DataMiner.
61
+ # Mine data. Defaults to all resource_names touched by DataMiner.
63
62
  #
64
63
  # Options
65
- # * <tt>:class_names</tt>: provide an array class names to mine
64
+ # * <tt>:resource_names</tt>: array of resource (class) names to mine
66
65
  def run(options = {})
67
- classes.each do |klass|
68
- if options[:class_names].blank? or options[:class_names].include?(klass.name)
69
- klass.data_miner_config.run options
66
+ resource_names.each do |resource_name|
67
+ if options[:resource_names].blank? or options[:resource_names].include?(resource_name)
68
+ resource_name.constantize.data_miner_config.run options
70
69
  end
71
70
  end
72
71
  end
73
72
 
74
73
  def create_tables
75
74
  c = ActiveRecord::Base.connection
76
- unless c.table_exists?('data_miner_targets')
77
- c.create_table 'data_miner_targets', :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
78
- t.string 'name'
79
- t.datetime 'created_at'
80
- t.datetime 'updated_at'
81
- end
82
- c.execute 'ALTER TABLE data_miner_targets ADD PRIMARY KEY (name);'
83
- end
84
75
  unless c.table_exists?('data_miner_runs')
85
76
  c.create_table 'data_miner_runs', :options => 'ENGINE=InnoDB default charset=utf8' do |t|
86
- t.string 'data_miner_target_id'
77
+ t.string 'resource_name'
87
78
  t.boolean 'finished'
88
79
  t.datetime 'started_at'
89
80
  t.datetime 'ended_at'
@@ -2,7 +2,7 @@ module DataMiner
2
2
  class Import
3
3
  attr_accessor :configuration, :position_in_run, :options, :table, :errata
4
4
  attr_accessor :description
5
- delegate :klass, :to => :configuration
5
+ delegate :resource, :to => :configuration
6
6
  delegate :unique_indices, :to => :configuration
7
7
 
8
8
  def initialize(configuration, position_in_run, description, options = {}, &block)
@@ -11,12 +11,12 @@ module DataMiner
11
11
  @description = description
12
12
  @options = options
13
13
  yield self if block_given? # pull in attributes
14
- @errata = Errata.new(:url => options[:errata], :klass => klass) if options[:errata]
14
+ @errata = Errata.new(:url => options[:errata], :klass => resource) if options[:errata]
15
15
  @table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop))
16
16
  end
17
17
 
18
18
  def inspect
19
- "Import(#{klass}) position #{position_in_run} (#{description})"
19
+ "Import(#{resource}) position #{position_in_run} (#{description})"
20
20
  end
21
21
 
22
22
  def attributes
@@ -28,7 +28,7 @@ module DataMiner
28
28
  end
29
29
 
30
30
  def store(attr_name, attr_options = {})
31
- configuration.attributes[attr_name] ||= Attribute.new(klass, attr_name)
31
+ configuration.attributes[attr_name] ||= Attribute.new(resource, attr_name)
32
32
  configuration.attributes[attr_name].options_for_import[self] = attr_options
33
33
  end
34
34
 
@@ -45,14 +45,16 @@ module DataMiner
45
45
 
46
46
  record_set = WilliamJamesCartesianProduct.cart_prod(*unifying_values).map do |combination|
47
47
  next if combination.include?(nil)
48
- klass.send "find_or_initialize_by_#{unique_indices.to_a.join('_and_')}", *combination
48
+ resource.send "find_or_initialize_by_#{unique_indices.to_a.join('_and_')}", *combination
49
49
  end.flatten
50
50
 
51
51
  Array.wrap(record_set).each do |record|
52
- attributes.values.each { |attr| attr.set_record_from_row(self, record, row) }
52
+ hits = attributes.values.map { |attr| attr.set_record_from_row self, record, row }
53
53
  record.data_miner_touch_count ||= 0
54
- record.data_miner_touch_count += 1
55
- record.data_miner_last_run = run
54
+ if hits.any?
55
+ record.data_miner_touch_count += 1
56
+ record.data_miner_last_run = run
57
+ end
56
58
  record.save!
57
59
  end
58
60
  end
@@ -3,7 +3,7 @@ module DataMiner
3
3
  attr_accessor :configuration, :position_in_run
4
4
  attr_accessor :method_name
5
5
  attr_accessor :block_description, :block
6
- delegate :klass, :to => :configuration
6
+ delegate :resource, :to => :configuration
7
7
 
8
8
  def initialize(configuration, position_in_run, method_name_or_block_description, &block)
9
9
  @configuration = configuration
@@ -12,16 +12,16 @@ module DataMiner
12
12
  @block_description = method_name_or_block_description
13
13
  @block = block
14
14
  else
15
- @method_name = method_name
15
+ @method_name = method_name_or_block_description
16
16
  end
17
17
  end
18
18
 
19
19
  def inspect
20
- str = "Process(#{klass}) position #{position_in_run}"
20
+ str = "Process(#{resource}) position #{position_in_run}"
21
21
  if block
22
- str << " called :#{method_name}"
23
- else
24
22
  str << " ran block (#{block_description})"
23
+ else
24
+ str << " called :#{method_name}"
25
25
  end
26
26
  end
27
27
 
@@ -29,7 +29,7 @@ module DataMiner
29
29
  if block
30
30
  block.call
31
31
  else
32
- klass.send method_name
32
+ resource.send method_name
33
33
  end
34
34
  DataMiner.logger.info "ran #{inspect}"
35
35
  end
@@ -2,6 +2,5 @@ module DataMiner
2
2
  class Run < ActiveRecord::Base
3
3
  set_table_name 'data_miner_runs'
4
4
  default_scope :order => 'id ASC'
5
- belongs_to :target, :class_name => '::DataMiner::Target', :foreign_key => 'data_miner_target_id'
6
5
  end
7
6
  end
@@ -876,25 +876,13 @@ class DataMinerTest < Test::Unit::TestCase
876
876
  assert AutomobileVariant.first.fuel_efficiency_city.present?
877
877
  end
878
878
 
879
- # should "mine multiple classes in the correct order" do
880
- # DataMiner.run
881
- # uy = Country.find_by_iso_3166('UY')
882
- # assert_equal 'Uruguay', uy.name
883
- # end
884
-
885
- should "have a target record for every class that is mined" do
886
- DataMiner.run :class_names => %w{ Country }
887
- assert DataMiner::Target.exists?(:name => 'Country')
888
- assert_equal 1, DataMiner::Target.count(:conditions => {:name => 'country'})
889
- end
890
-
891
879
  should "keep a log when it does a run" do
892
880
  approx_started_at = Time.now
893
- DataMiner.run :class_names => %w{ Country }
881
+ DataMiner.run :resource_names => %w{ Country }
894
882
  approx_ended_at = Time.now
895
- target = DataMiner::Target.find_by_name('Country')
896
- assert (target.runs.last.started_at - approx_started_at).abs < 5 # seconds
897
- assert (target.runs.last.ended_at - approx_ended_at).abs < 5 # seconds
883
+ last_run = DataMiner::Run.first(:conditions => { :resource_name => 'Country' }, :order => 'id DESC')
884
+ assert (last_run.started_at - approx_started_at).abs < 5 # seconds
885
+ assert (last_run.ended_at - approx_ended_at).abs < 5 # seconds
898
886
  end
899
887
 
900
888
  should "request a re-import from scratch" do
@@ -902,31 +890,37 @@ class DataMinerTest < Test::Unit::TestCase
902
890
  c.iso_3166 = 'JUNK'
903
891
  c.save!
904
892
  assert Country.exists?(:iso_3166 => 'JUNK')
905
- DataMiner.run :class_names => %w{ Country }, :from_scratch => true
893
+ DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
906
894
  assert !Country.exists?(:iso_3166 => 'JUNK')
907
895
  end
908
896
 
909
897
  should "track how many times a row was touched" do
910
- DataMiner.run :class_names => %w{ Country }, :from_scratch => true
898
+ DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
899
+ assert_equal 1, Country.first.data_miner_touch_count
900
+ DataMiner.run :resource_names => %w{ Country }
911
901
  assert_equal 1, Country.first.data_miner_touch_count
912
- DataMiner.run :class_names => %w{ Country }
913
- assert_equal 2, Country.first.data_miner_touch_count
914
902
  end
915
903
 
916
904
  should "keep track of what the last import run that touched a row was" do
917
- DataMiner.run :class_names => %w{ Country }, :from_scratch => true
905
+ DataMiner.run :resource_names => %w{ Country }, :from_scratch => true
918
906
  a = DataMiner::Run.last
919
907
  assert_equal a, Country.first.data_miner_last_run
920
- DataMiner.run :class_names => %w{ Country }
908
+ DataMiner.run :resource_names => %w{ Country }
921
909
  b = DataMiner::Run.last
922
910
  assert a != b
923
- assert_equal b, Country.first.data_miner_last_run
911
+ assert_equal a, Country.first.data_miner_last_run
924
912
  end
925
913
 
926
914
  unless ENV['FAST'] == 'true'
927
915
  should "import using a dictionary" do
928
- DataMiner.run :class_names => %w{ ResidentialEnergyConsumptionSurveyResponse }
916
+ DataMiner.run :resource_names => %w{ ResidentialEnergyConsumptionSurveyResponse }
929
917
  assert ResidentialEnergyConsumptionSurveyResponse.find(6).residence_class.starts_with?('Single-family detached house')
930
918
  end
919
+
920
+ should "mine multiple classes in the correct order" do
921
+ DataMiner.run
922
+ uy = Country.find_by_iso_3166('UY')
923
+ assert_equal 'Uruguay', uy.name
924
+ end
931
925
  end
932
926
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_miner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -108,7 +108,6 @@ files:
108
108
  - lib/data_miner/import.rb
109
109
  - lib/data_miner/process.rb
110
110
  - lib/data_miner/run.rb
111
- - lib/data_miner/target.rb
112
111
  - lib/data_miner/william_james_cartesian_product.rb
113
112
  - test/data_miner_test.rb
114
113
  - test/test_helper.rb
@@ -1,26 +0,0 @@
1
- module DataMiner
2
- class Target < ActiveRecord::Base
3
- set_table_name 'data_miner_targets'
4
- set_primary_key :name
5
- has_many :runs, :class_name => '::DataMiner::Run', :foreign_key => 'data_miner_target_id'
6
-
7
- def klass
8
- name.constantize
9
- end
10
-
11
- def run(options = {})
12
- klass.data_miner_config.run options
13
- end
14
-
15
- def included_in_list_of_targets
16
- msg = "must have a data_miner block"
17
- unless DataMiner.classes.include?(name.constantize)
18
- errors.add :name, msg
19
- end
20
- rescue NameError
21
- errors.add :name, msg
22
- end
23
-
24
- validate :included_in_list_of_targets
25
- end
26
- end