data_miner 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/CHANGELOG +5 -0
- data/README.rdoc +11 -15
- data/Rakefile +7 -2
- data/VERSION +1 -1
- data/data_miner.gemspec +27 -28
- data/lib/data_miner.rb +50 -27
- data/lib/data_miner/attribute.rb +157 -240
- data/lib/data_miner/configuration.rb +58 -55
- data/lib/data_miner/import.rb +57 -0
- data/lib/data_miner/process.rb +21 -0
- data/lib/data_miner/run.rb +7 -0
- data/lib/data_miner/target.rb +7 -0
- data/test/data_miner_test.rb +644 -48
- data/test/test_helper.rb +134 -3
- metadata +29 -23
- data/lib/data_miner/active_record_ext.rb +0 -25
- data/lib/data_miner/attribute_collection.rb +0 -51
- data/lib/data_miner/step.rb +0 -64
- data/lib/data_miner/step/associate.rb +0 -9
- data/lib/data_miner/step/await.rb +0 -35
- data/lib/data_miner/step/callback.rb +0 -22
- data/lib/data_miner/step/derive.rb +0 -9
- data/lib/data_miner/step/import.rb +0 -57
data/.gitignore
CHANGED
data/CHANGELOG
CHANGED
@@ -1,2 +1,7 @@
|
|
1
1
|
0.2.6
|
2
2
|
* Upgrade to remote_table 0.1.6 to handle UTF-8 CSVs and long urls.
|
3
|
+
0.3.0
|
4
|
+
* Removed association code... now data_miner focuses on just importing.
|
5
|
+
* New, simpler DSL
|
6
|
+
* Upgrade to remote_table 0.2.1 for row_hashes and better blank row handling
|
7
|
+
* Remove all association-related code
|
data/README.rdoc
CHANGED
@@ -8,15 +8,15 @@ Put this in <tt>config/environment.rb</tt>:
|
|
8
8
|
|
9
9
|
config.gem 'data_miner'
|
10
10
|
|
11
|
-
You need to define <tt>
|
11
|
+
You need to define <tt>data_miner</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
|
12
12
|
|
13
13
|
class Country < ActiveRecord::Base
|
14
|
-
|
14
|
+
data_miner do |step|
|
15
15
|
# import country names and country codes
|
16
16
|
step.import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr|
|
17
|
-
attr.key :iso_3166, :
|
18
|
-
attr.store :iso_3166, :
|
19
|
-
attr.store :name, :
|
17
|
+
attr.key :iso_3166, :field_name => 'country code'
|
18
|
+
attr.store :iso_3166, :field_name => 'country code'
|
19
|
+
attr.store :name, :field_name => 'country'
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|
@@ -26,7 +26,7 @@ You need to define <tt>mine_data</tt> blocks in your ActiveRecord models. For ex
|
|
26
26
|
class Airport < ActiveRecord::Base
|
27
27
|
belongs_to :country
|
28
28
|
|
29
|
-
|
29
|
+
data_miner do |step|
|
30
30
|
# import airport iata_code, name, etc.
|
31
31
|
step.import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false) do |attr|
|
32
32
|
attr.key :iata_code, :field_number => 3
|
@@ -43,12 +43,8 @@ You need to define <tt>mine_data</tt> blocks in your ActiveRecord models. For ex
|
|
43
43
|
Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't know a way to automatically include gem tasks, so you have to do this manually for now)
|
44
44
|
|
45
45
|
namespace :data_miner do
|
46
|
-
task :
|
47
|
-
DataMiner.
|
48
|
-
end
|
49
|
-
|
50
|
-
task :map_to_attrs => :environment do
|
51
|
-
DataMiner.map_to_attrs ENV['METHOD'], :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact
|
46
|
+
task :run => :environment do
|
47
|
+
DataMiner.run :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact
|
52
48
|
end
|
53
49
|
end
|
54
50
|
|
@@ -60,9 +56,9 @@ You need to specify what order to mine data. For example, in <tt>config/initiali
|
|
60
56
|
# etc
|
61
57
|
end
|
62
58
|
|
63
|
-
Once you have (1) set up the order of data mining and (2) defined <tt>
|
59
|
+
Once you have (1) set up the order of data mining and (2) defined <tt>data_miner</tt> blocks in your classes, you can:
|
64
60
|
|
65
|
-
$ rake data_miner:
|
61
|
+
$ rake data_miner:run
|
66
62
|
|
67
63
|
==Complete example
|
68
64
|
|
@@ -75,7 +71,7 @@ Once you have (1) set up the order of data mining and (2) defined <tt>mine_data<
|
|
75
71
|
[...edit per quick start...]
|
76
72
|
~/testapp $ touch config/initializers/data_miner_config.rake
|
77
73
|
[...edit per quick start...]
|
78
|
-
~/testapp $ rake data_miner:
|
74
|
+
~/testapp $ rake data_miner:run
|
79
75
|
|
80
76
|
Now you should have
|
81
77
|
|
data/Rakefile
CHANGED
@@ -10,8 +10,13 @@ begin
|
|
10
10
|
gem.email = "seamus@abshere.net"
|
11
11
|
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
12
|
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
|
14
|
-
gem.add_dependency '
|
13
|
+
gem.add_dependency 'remote_table', '~>0.2.1'
|
14
|
+
gem.add_dependency 'activerecord', '~>2.3.4'
|
15
|
+
gem.add_dependency 'activesupport', '~>2.3.4'
|
16
|
+
gem.add_dependency 'andand', '~>1.3.1'
|
17
|
+
gem.add_dependency 'errata', '~>0.1.4'
|
18
|
+
gem.add_dependency 'conversions', '~>1.4.3'
|
19
|
+
gem.add_dependency 'blockenspiel', '~>0.3.2'
|
15
20
|
gem.require_path = "lib"
|
16
21
|
gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
|
17
22
|
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-02-25}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,17 +26,13 @@ Gem::Specification.new do |s|
|
|
26
26
|
"VERSION",
|
27
27
|
"data_miner.gemspec",
|
28
28
|
"lib/data_miner.rb",
|
29
|
-
"lib/data_miner/active_record_ext.rb",
|
30
29
|
"lib/data_miner/attribute.rb",
|
31
|
-
"lib/data_miner/attribute_collection.rb",
|
32
30
|
"lib/data_miner/configuration.rb",
|
33
31
|
"lib/data_miner/dictionary.rb",
|
34
|
-
"lib/data_miner/
|
35
|
-
"lib/data_miner/
|
36
|
-
"lib/data_miner/
|
37
|
-
"lib/data_miner/
|
38
|
-
"lib/data_miner/step/derive.rb",
|
39
|
-
"lib/data_miner/step/import.rb",
|
32
|
+
"lib/data_miner/import.rb",
|
33
|
+
"lib/data_miner/process.rb",
|
34
|
+
"lib/data_miner/run.rb",
|
35
|
+
"lib/data_miner/target.rb",
|
40
36
|
"lib/data_miner/william_james_cartesian_product.rb",
|
41
37
|
"test/data_miner_test.rb",
|
42
38
|
"test/test_helper.rb"
|
@@ -57,27 +53,30 @@ Gem::Specification.new do |s|
|
|
57
53
|
s.specification_version = 3
|
58
54
|
|
59
55
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
60
|
-
s.add_runtime_dependency(%q<
|
61
|
-
s.add_runtime_dependency(%q<
|
62
|
-
s.add_runtime_dependency(%q<
|
63
|
-
s.add_runtime_dependency(%q<
|
64
|
-
s.add_runtime_dependency(%q<
|
65
|
-
s.add_runtime_dependency(%q<
|
56
|
+
s.add_runtime_dependency(%q<remote_table>, ["~> 0.2.1"])
|
57
|
+
s.add_runtime_dependency(%q<activerecord>, ["~> 2.3.4"])
|
58
|
+
s.add_runtime_dependency(%q<activesupport>, ["~> 2.3.4"])
|
59
|
+
s.add_runtime_dependency(%q<andand>, ["~> 1.3.1"])
|
60
|
+
s.add_runtime_dependency(%q<errata>, ["~> 0.1.4"])
|
61
|
+
s.add_runtime_dependency(%q<conversions>, ["~> 1.4.3"])
|
62
|
+
s.add_runtime_dependency(%q<blockenspiel>, ["~> 0.3.2"])
|
66
63
|
else
|
67
|
-
s.add_dependency(%q<
|
68
|
-
s.add_dependency(%q<
|
69
|
-
s.add_dependency(%q<
|
70
|
-
s.add_dependency(%q<
|
71
|
-
s.add_dependency(%q<
|
72
|
-
s.add_dependency(%q<
|
64
|
+
s.add_dependency(%q<remote_table>, ["~> 0.2.1"])
|
65
|
+
s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
|
66
|
+
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
67
|
+
s.add_dependency(%q<andand>, ["~> 1.3.1"])
|
68
|
+
s.add_dependency(%q<errata>, ["~> 0.1.4"])
|
69
|
+
s.add_dependency(%q<conversions>, ["~> 1.4.3"])
|
70
|
+
s.add_dependency(%q<blockenspiel>, ["~> 0.3.2"])
|
73
71
|
end
|
74
72
|
else
|
75
|
-
s.add_dependency(%q<
|
76
|
-
s.add_dependency(%q<
|
77
|
-
s.add_dependency(%q<
|
78
|
-
s.add_dependency(%q<
|
79
|
-
s.add_dependency(%q<
|
80
|
-
s.add_dependency(%q<
|
73
|
+
s.add_dependency(%q<remote_table>, ["~> 0.2.1"])
|
74
|
+
s.add_dependency(%q<activerecord>, ["~> 2.3.4"])
|
75
|
+
s.add_dependency(%q<activesupport>, ["~> 2.3.4"])
|
76
|
+
s.add_dependency(%q<andand>, ["~> 1.3.1"])
|
77
|
+
s.add_dependency(%q<errata>, ["~> 0.1.4"])
|
78
|
+
s.add_dependency(%q<conversions>, ["~> 1.4.3"])
|
79
|
+
s.add_dependency(%q<blockenspiel>, ["~> 0.3.2"])
|
81
80
|
end
|
82
81
|
end
|
83
82
|
|
data/lib/data_miner.rb
CHANGED
@@ -1,43 +1,66 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require '
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_record'
|
3
|
+
require 'blockenspiel'
|
4
4
|
require 'conversions'
|
5
5
|
require 'remote_table'
|
6
6
|
require 'errata'
|
7
|
+
require 'andand'
|
8
|
+
require 'log4r'
|
7
9
|
|
8
|
-
require 'data_miner/active_record_ext'
|
9
10
|
require 'data_miner/attribute'
|
10
|
-
require 'data_miner/attribute_collection'
|
11
11
|
require 'data_miner/configuration'
|
12
12
|
require 'data_miner/dictionary'
|
13
|
-
require 'data_miner/
|
14
|
-
require 'data_miner/
|
15
|
-
require 'data_miner/
|
16
|
-
require 'data_miner/
|
17
|
-
|
18
|
-
|
19
|
-
require 'data_miner/william_james_cartesian_product'
|
13
|
+
require 'data_miner/import'
|
14
|
+
require 'data_miner/process'
|
15
|
+
require 'data_miner/target'
|
16
|
+
require 'data_miner/run'
|
17
|
+
|
18
|
+
# TODO: move to gem
|
19
|
+
require 'data_miner/william_james_cartesian_product'
|
20
20
|
|
21
21
|
module DataMiner
|
22
|
-
class
|
23
|
-
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
def map_to_attrs(method, options = {})
|
28
|
-
puts DataMiner::Configuration.map_to_attrs(method, options)
|
29
|
-
end
|
22
|
+
class MissingHashColumn < RuntimeError; end
|
23
|
+
|
24
|
+
include Log4r
|
30
25
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
26
|
+
mattr_accessor :logger
|
27
|
+
|
28
|
+
def self.start_logging
|
29
|
+
if defined?(Rails)
|
30
|
+
self.logger = Rails.logger
|
31
|
+
else
|
32
|
+
self.logger = Logger.new 'data_miner'
|
33
|
+
logger.outputters = FileOutputter.new 'f1', :filename => 'data_miner.log'
|
37
34
|
end
|
38
35
|
end
|
36
|
+
|
37
|
+
def self.run(options = {})
|
38
|
+
DataMiner::Configuration.run options
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.enqueue(&block)
|
42
|
+
DataMiner::Configuration.enqueue &block
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.classes
|
46
|
+
DataMiner::Configuration.classes
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.create_tables
|
50
|
+
DataMiner::Configuration.create_tables
|
51
|
+
end
|
39
52
|
end
|
40
53
|
|
41
54
|
ActiveRecord::Base.class_eval do
|
42
|
-
|
55
|
+
def self.data_miner(&block)
|
56
|
+
# this is class_eval'ed here so that each ActiveRecord descendant has its own copy, or none at all
|
57
|
+
class_eval { cattr_accessor :data_miner_config }
|
58
|
+
self.data_miner_config = DataMiner::Configuration.new self
|
59
|
+
|
60
|
+
data_miner_config.before_invoke
|
61
|
+
Blockenspiel.invoke block, data_miner_config
|
62
|
+
data_miner_config.after_invoke
|
63
|
+
end
|
43
64
|
end
|
65
|
+
|
66
|
+
DataMiner.start_logging
|
data/lib/data_miner/attribute.rb
CHANGED
@@ -1,299 +1,216 @@
|
|
1
1
|
module DataMiner
|
2
2
|
class Attribute
|
3
|
-
attr_accessor :klass, :name, :
|
3
|
+
attr_accessor :klass, :name, :options_for_import
|
4
4
|
|
5
5
|
def initialize(klass, name)
|
6
6
|
@klass = klass
|
7
|
-
@name = name
|
8
|
-
@
|
9
|
-
@affected_by_steps = []
|
10
|
-
@key_for_steps = []
|
7
|
+
@name = name
|
8
|
+
@options_for_import = {}
|
11
9
|
end
|
12
|
-
|
13
|
-
# polling questions
|
14
|
-
def report_find_or_create(step)
|
15
|
-
"Creates parents: #{klass}##{name} is set with #{reflection_klass(step)}.find_or_create_by_#{foreign_key(step)}" if wants_create?(step)
|
16
|
-
end
|
17
|
-
|
18
|
-
def report_unnatural_order(step)
|
19
|
-
if (
|
20
|
-
(rk = klass.reflect_on_association(weighting_association(step)).andand.klass) or
|
21
|
-
(wants_inline_association? and rk = reflection_klass(step))
|
22
|
-
) and
|
23
|
-
step.configuration.classes.index(rk) > step.configuration.classes.index(klass) and
|
24
|
-
step.options[:awaiting].andand.klass != klass
|
25
|
-
"Unnatural order: #{klass} comes before #{rk}"
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
10
|
+
|
29
11
|
def inspect
|
30
|
-
"Attribute(#{klass}
|
12
|
+
"Attribute(#{klass}##{name})"
|
31
13
|
end
|
32
14
|
|
33
|
-
def
|
34
|
-
|
35
|
-
self.affected_by_steps << step
|
36
|
-
end
|
37
|
-
|
38
|
-
def affected_by?(step)
|
39
|
-
affected_by_steps.include?(step)
|
40
|
-
end
|
41
|
-
|
42
|
-
def key_for!(step, options = {})
|
43
|
-
self.options_for_step[step] = options
|
44
|
-
self.key_for_steps << step
|
45
|
-
end
|
46
|
-
|
47
|
-
def key_for?(step)
|
48
|
-
key_for_steps.include?(step)
|
15
|
+
def stored_by?(import)
|
16
|
+
options_for_import.has_key?(import)
|
49
17
|
end
|
50
18
|
|
51
|
-
def value_in_dictionary(
|
52
|
-
return *dictionary(
|
19
|
+
def value_in_dictionary(import, key)
|
20
|
+
return *dictionary(import).lookup(key) # strip the array wrapper if there's only one element
|
53
21
|
end
|
54
22
|
|
55
|
-
def value_in_source(
|
56
|
-
if wants_static?(
|
57
|
-
value = static(
|
58
|
-
elsif field_number(
|
59
|
-
if field_number(
|
60
|
-
value = field_number(
|
23
|
+
def value_in_source(import, row)
|
24
|
+
if wants_static?(import)
|
25
|
+
value = static(import)
|
26
|
+
elsif field_number(import)
|
27
|
+
if field_number(import).is_a?(Range)
|
28
|
+
value = field_number(import).map { |n| row[n] }.join(delimiter(import))
|
61
29
|
else
|
62
|
-
value = row[field_number(
|
30
|
+
value = row[field_number(import)]
|
63
31
|
end
|
64
32
|
else
|
65
|
-
value = row[
|
33
|
+
value = row[field_name(import)]
|
66
34
|
end
|
67
35
|
return nil if value.nil?
|
68
36
|
return value if value.is_a?(ActiveRecord::Base) # escape valve for parsers that look up associations directly
|
69
37
|
value = value.to_s
|
70
|
-
value = value[
|
71
|
-
value = do_split(
|
38
|
+
value = value[chars(import)] if wants_chars?(import)
|
39
|
+
value = do_split(import, value) if wants_split?(import)
|
72
40
|
# taken from old errata... maybe we want to do this here
|
73
41
|
value.gsub!(/[ ]+/, ' ')
|
74
42
|
# text.gsub!('- ', '-')
|
75
43
|
value.gsub!(/([^\\])~/, '\1 ')
|
76
44
|
value.strip!
|
77
|
-
value.upcase! if wants_upcase?(
|
78
|
-
value = do_convert(
|
79
|
-
value = do_sprintf(
|
45
|
+
value.upcase! if wants_upcase?(import)
|
46
|
+
value = do_convert(import, row, value) if wants_conversion?(import)
|
47
|
+
value = do_sprintf(import, value) if wants_sprintf?(import)
|
80
48
|
value
|
81
49
|
end
|
82
50
|
|
83
|
-
def value_from_row(
|
84
|
-
value = value_in_source(
|
51
|
+
def value_from_row(import, row)
|
52
|
+
value = value_in_source(import, row)
|
85
53
|
return value if value.is_a?(ActiveRecord::Base) # carry through trapdoor
|
86
|
-
value = value_in_dictionary(
|
87
|
-
value = value_as_association(step, value) if wants_inline_association?
|
54
|
+
value = value_in_dictionary(import, value) if wants_dictionary?(import)
|
88
55
|
value
|
89
56
|
end
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
dynamic_matcher = wants_create?(step) ? "find_or_create_by_#{foreign_key(step)}" : "find_by_#{foreign_key(step)}"
|
96
|
-
@_value_as_association[step][value] = reflection_klass(step).send(dynamic_matcher, value)
|
97
|
-
end
|
98
|
-
@_value_as_association[step][value]
|
99
|
-
end
|
100
|
-
|
101
|
-
# this will overwrite nils, even if wants_overwriting?(step) is false
|
102
|
-
def set_record_from_row(step, record, row)
|
103
|
-
return if !wants_overwriting?(step) and !record.send(name).nil?
|
104
|
-
value = value_from_row(step, row)
|
57
|
+
|
58
|
+
# this will overwrite nils, even if wants_overwriting?(import) is false
|
59
|
+
def set_record_from_row(import, record, row)
|
60
|
+
return if !wants_overwriting?(import) and !record.send(name).nil?
|
61
|
+
value = value_from_row(import, row)
|
105
62
|
record.send "#{name}=", value
|
106
|
-
|
107
|
-
end
|
108
|
-
|
109
|
-
def perform(step)
|
110
|
-
case step.variant
|
111
|
-
when :associate
|
112
|
-
perform_association(step)
|
113
|
-
when :derive
|
114
|
-
if wants_update_all?(step)
|
115
|
-
perform_update_all(step)
|
116
|
-
elsif wants_weighted_average?(step)
|
117
|
-
perform_weighted_average(step)
|
118
|
-
else
|
119
|
-
perform_callback(step)
|
120
|
-
end
|
121
|
-
when :import
|
122
|
-
raise "This shouldn't be called, the import step is special"
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
def perform_association(step)
|
127
|
-
raise "dictionary and prefix don't mix" if wants_dictionary?(step) and wants_prefix?(step)
|
128
|
-
klass.update_all("#{reflection.primary_key_name} = NULL") if wants_nullification?(step)
|
129
|
-
if wants_create?(step)
|
130
|
-
klass.find_in_batches do |batch|
|
131
|
-
batch.each do |record|
|
132
|
-
if wants_prefix?(step)
|
133
|
-
sql = "SELECT reflection_table.id FROM #{reflection_klass(step).quoted_table_name} AS reflection_table INNER JOIN #{klass.quoted_table_name} AS klass_table ON LEFT(klass_table.#{key(step)}, LENGTH(reflection_table.#{foreign_key(step)})) = reflection_table.#{foreign_key(step)} WHERE klass_table.id = #{record.id} ORDER BY LENGTH(reflection_table.#{foreign_key(step)}) DESC"
|
134
|
-
associated_id = ActiveRecord::Base.connection.select_value(sql)
|
135
|
-
next if associated_id.blank?
|
136
|
-
record.send("#{reflection.primary_key_name}=", associated_id)
|
137
|
-
else
|
138
|
-
dynamic_finder_value = record.send(key(step))
|
139
|
-
dynamic_finder_value = value_in_dictionary(step, dynamic_finder_value) if wants_dictionary?(step)
|
140
|
-
next if dynamic_finder_value.blank?
|
141
|
-
associated = reflection_klass(step).send("find_or_create_by_#{foreign_key(step)}", dynamic_finder_value) # TODO cache results
|
142
|
-
record.send("#{name}=", associated)
|
143
|
-
end
|
144
|
-
record.save
|
145
|
-
end
|
146
|
-
end
|
147
|
-
else
|
148
|
-
reflection_klass(step).find_in_batches do |batch|
|
149
|
-
batch.each do |reflection_record|
|
150
|
-
klass.update_all ["#{reflection.primary_key_name} = ?", reflection_record.id], ["#{key(step)} = ?", reflection_record.send(foreign_key(step))]
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
63
|
+
DataMiner.logger.info("ActiveRecord didn't like trying to set #{klass}.#{name} = #{value}") if !value.nil? and record.send(name).nil?
|
154
64
|
end
|
155
65
|
|
156
|
-
def
|
157
|
-
|
66
|
+
def unit_from_source(import, row)
|
67
|
+
row[units_field_name(import)].to_s.strip.underscore.to_sym
|
158
68
|
end
|
159
69
|
|
160
|
-
def
|
161
|
-
|
162
|
-
if weighting_association(step) and !klass.reflect_on_association(weighting_association(step))
|
163
|
-
klass.find_in_batches do |batch|
|
164
|
-
batch.each do |record|
|
165
|
-
record.send "#{name}=", record.send(weighting_association(step)).weighted_average(name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step))
|
166
|
-
record.save
|
167
|
-
end
|
168
|
-
end
|
169
|
-
else # there's no weighting association OR there is one and it's a valid association
|
170
|
-
klass.update_all_weighted_averages name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step), :association => weighting_association(step)
|
171
|
-
end
|
172
|
-
end
|
173
|
-
|
174
|
-
def perform_callback(step)
|
175
|
-
case klass.method(callback(step)).arity
|
176
|
-
when 0:
|
177
|
-
klass.send(callback(step))
|
178
|
-
when 1:
|
179
|
-
klass.send(callback(step), name)
|
180
|
-
when 2:
|
181
|
-
klass.send(callback(step), name, options_for_step[step])
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
def unit_from_source(step, row)
|
186
|
-
row[unit_in_source(step)].to_s.strip.underscore.to_sym
|
70
|
+
def do_convert(import, row, value)
|
71
|
+
value.to_f.convert((from_units(import) || unit_from_source(import, row)), to_units(import))
|
187
72
|
end
|
188
73
|
|
189
|
-
def
|
190
|
-
|
191
|
-
value.to_f.convert(from_unit, to(step))
|
192
|
-
end
|
193
|
-
|
194
|
-
def do_sprintf(step, value)
|
195
|
-
if /\%[0-9\.]*f/.match(sprintf(step))
|
74
|
+
def do_sprintf(import, value)
|
75
|
+
if /\%[0-9\.]*f/.match(sprintf(import))
|
196
76
|
value = value.to_f
|
197
|
-
elsif /\%[0-9\.]*d/.match(sprintf(
|
77
|
+
elsif /\%[0-9\.]*d/.match(sprintf(import))
|
198
78
|
value = value.to_i
|
199
79
|
end
|
200
|
-
sprintf(
|
80
|
+
sprintf(import) % value
|
201
81
|
end
|
202
82
|
|
203
|
-
def do_split(
|
204
|
-
pattern = split_options(
|
205
|
-
keep = split_options(
|
83
|
+
def do_split(import, value)
|
84
|
+
pattern = split_options(import)[:pattern] || /\s+/ # default is split on whitespace
|
85
|
+
keep = split_options(import)[:keep] || 0 # default is keep first element
|
206
86
|
value.to_s.split(pattern)[keep].to_s
|
207
87
|
end
|
208
88
|
|
209
89
|
def column_type
|
210
|
-
|
90
|
+
klass.columns_hash[name.to_s].type
|
91
|
+
end
|
92
|
+
|
93
|
+
def dictionary(import)
|
94
|
+
raise "shouldn't ask for this" unless wants_dictionary?(import) # don't try to initialize if there are no dictionary options
|
95
|
+
Dictionary.new dictionary_options(import)
|
96
|
+
end
|
97
|
+
|
98
|
+
# {
|
99
|
+
# :static => 'options_for_import[import].has_key?(:static)',
|
100
|
+
# :chars => :chars,
|
101
|
+
# :upcase => :upcase,
|
102
|
+
# :conversion => '!from_units(import).nil? or !units_field_name(import).nil?',
|
103
|
+
# :sprintf => :sprintf,
|
104
|
+
# :dictionary => :dictionary_options,
|
105
|
+
# :split => :split_options,
|
106
|
+
# :nullification => 'nullify(import) != false',
|
107
|
+
# :overwriting => 'overwrite(import) != false',
|
108
|
+
# }.each do |name, condition|
|
109
|
+
# condition = "!#{condition}(import).nil?" if condition.is_a?(Symbol)
|
110
|
+
# puts <<-EOS
|
111
|
+
# def wants_#{name}?(import)
|
112
|
+
# #{condition}
|
113
|
+
# end
|
114
|
+
# EOS
|
115
|
+
# end
|
116
|
+
def wants_split?(import)
|
117
|
+
!split_options(import).nil?
|
118
|
+
end
|
119
|
+
def wants_sprintf?(import)
|
120
|
+
!sprintf(import).nil?
|
121
|
+
end
|
122
|
+
def wants_upcase?(import)
|
123
|
+
!upcase(import).nil?
|
124
|
+
end
|
125
|
+
def wants_static?(import)
|
126
|
+
options_for_import[import].has_key?(:static)
|
127
|
+
end
|
128
|
+
def wants_nullification?(import)
|
129
|
+
nullify(import) != false
|
130
|
+
end
|
131
|
+
def wants_chars?(import)
|
132
|
+
!chars(import).nil?
|
133
|
+
end
|
134
|
+
def wants_overwriting?(import)
|
135
|
+
overwrite(import) != false
|
136
|
+
end
|
137
|
+
def wants_conversion?(import)
|
138
|
+
!from_units(import).nil? or !units_field_name(import).nil?
|
139
|
+
end
|
140
|
+
def wants_dictionary?(import)
|
141
|
+
!dictionary_options(import).nil?
|
142
|
+
end
|
143
|
+
|
144
|
+
# {
|
145
|
+
# :field_name => { :default => :name, :stringify => true },
|
146
|
+
# :delimiter => { :default => '", "' }
|
147
|
+
# }.each do |name, options|
|
148
|
+
# puts <<-EOS
|
149
|
+
# def #{name}(import)
|
150
|
+
# (options_for_import[import][:#{name}] || #{options[:default]})#{'.to_s' if options[:stringify]}
|
151
|
+
# end
|
152
|
+
# EOS
|
153
|
+
# end
|
154
|
+
def field_name(import)
|
155
|
+
(options_for_import[import][:field_name] || name).to_s
|
156
|
+
end
|
157
|
+
def delimiter(import)
|
158
|
+
(options_for_import[import][:delimiter] || ", ")
|
159
|
+
end
|
160
|
+
|
161
|
+
# %w(dictionary split).each do |name|
|
162
|
+
# puts <<-EOS
|
163
|
+
# def #{name}_options(import)
|
164
|
+
# options_for_import[import][:#{name}]
|
165
|
+
# end
|
166
|
+
# EOS
|
167
|
+
# end
|
168
|
+
def dictionary_options(import)
|
169
|
+
options_for_import[import][:dictionary]
|
170
|
+
end
|
171
|
+
def split_options(import)
|
172
|
+
options_for_import[import][:split]
|
211
173
|
end
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
:
|
222
|
-
:split => :split_options,
|
223
|
-
:update_all => :set,
|
224
|
-
:nullification => 'nullify(step) != false',
|
225
|
-
:overwriting => 'overwrite(step) != false',
|
226
|
-
:weighted_average => '!weighting_association(step).nil? or !weighting_column(step).nil?'
|
227
|
-
}.each do |name, condition|
|
228
|
-
condition = "!#{condition}(step).nil?" if condition.is_a?(Symbol)
|
229
|
-
eval <<-EOS
|
230
|
-
def wants_#{name}?(step)
|
231
|
-
#{condition}
|
232
|
-
end
|
233
|
-
EOS
|
174
|
+
|
175
|
+
# %w(from_units to_units conditions sprintf nullify overwrite upcase units_field_name field_number chars static).each do |name|
|
176
|
+
# puts <<-EOS
|
177
|
+
# def #{name}(import)
|
178
|
+
# options_for_import[import][:#{name}]
|
179
|
+
# end
|
180
|
+
# EOS
|
181
|
+
# end
|
182
|
+
def from_units(import)
|
183
|
+
options_for_import[import][:from_units]
|
234
184
|
end
|
235
|
-
|
236
|
-
|
237
|
-
:name_in_source => { :default => :name, :stringify => true },
|
238
|
-
:key => { :default => :name, :stringify => true },
|
239
|
-
:foreign_key => { :default => 'key(step)', :stringify => true },
|
240
|
-
:delimiter => { :default => '", "' }
|
241
|
-
}.each do |name, options|
|
242
|
-
eval <<-EOS
|
243
|
-
def #{name}(step)
|
244
|
-
(options_for_step[step][:#{name}] || #{options[:default]})#{'.to_s' if options[:stringify]}
|
245
|
-
end
|
246
|
-
EOS
|
185
|
+
def to_units(import)
|
186
|
+
options_for_import[import][:to_units]
|
247
187
|
end
|
248
|
-
|
249
|
-
|
250
|
-
if @_reflection.nil?
|
251
|
-
@_reflection = klass.reflect_on_association(name) || :missing
|
252
|
-
reflection
|
253
|
-
elsif @_reflection == :missing
|
254
|
-
nil
|
255
|
-
else
|
256
|
-
@_reflection
|
257
|
-
end
|
188
|
+
def conditions(import)
|
189
|
+
options_for_import[import][:conditions]
|
258
190
|
end
|
259
|
-
|
260
|
-
|
261
|
-
return nil unless reflection
|
262
|
-
if reflection.options[:polymorphic]
|
263
|
-
polymorphic_type(step).andand.constantize
|
264
|
-
else
|
265
|
-
reflection.klass
|
266
|
-
end
|
191
|
+
def sprintf(import)
|
192
|
+
options_for_import[import][:sprintf]
|
267
193
|
end
|
268
|
-
|
269
|
-
|
270
|
-
reflection.present?
|
194
|
+
def nullify(import)
|
195
|
+
options_for_import[import][:nullify]
|
271
196
|
end
|
272
|
-
|
273
|
-
|
274
|
-
(options_for_step[step][:callback] || "derive_#{name}").to_sym
|
197
|
+
def overwrite(import)
|
198
|
+
options_for_import[import][:overwrite]
|
275
199
|
end
|
276
|
-
|
277
|
-
|
278
|
-
raise "shouldn't ask for this" unless wants_dictionary?(step) # don't try to initialize if there are no dictionary options
|
279
|
-
@dictionaries ||= {}
|
280
|
-
@dictionaries[step] ||= Dictionary.new(dictionary_options(step))
|
200
|
+
def upcase(import)
|
201
|
+
options_for_import[import][:upcase]
|
281
202
|
end
|
282
|
-
|
283
|
-
|
284
|
-
eval <<-EOS
|
285
|
-
def #{name}_options(step)
|
286
|
-
options_for_step[step][:#{name}]
|
287
|
-
end
|
288
|
-
EOS
|
203
|
+
def units_field_name(import)
|
204
|
+
options_for_import[import][:units_field_name]
|
289
205
|
end
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
206
|
+
def field_number(import)
|
207
|
+
options_for_import[import][:field_number]
|
208
|
+
end
|
209
|
+
def chars(import)
|
210
|
+
options_for_import[import][:chars]
|
211
|
+
end
|
212
|
+
def static(import)
|
213
|
+
options_for_import[import][:static]
|
297
214
|
end
|
298
215
|
end
|
299
216
|
end
|