data_miner 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +6 -0
- data/LICENSE +20 -0
- data/README.rdoc +96 -0
- data/Rakefile +65 -0
- data/VERSION +1 -0
- data/data_miner.gemspec +81 -0
- data/lib/data_miner.rb +43 -0
- data/lib/data_miner/active_record_ext.rb +25 -0
- data/lib/data_miner/attribute.rb +299 -0
- data/lib/data_miner/attribute_collection.rb +51 -0
- data/lib/data_miner/configuration.rb +94 -0
- data/lib/data_miner/dictionary.rb +36 -0
- data/lib/data_miner/step.rb +64 -0
- data/lib/data_miner/step/associate.rb +9 -0
- data/lib/data_miner/step/await.rb +35 -0
- data/lib/data_miner/step/callback.rb +22 -0
- data/lib/data_miner/step/derive.rb +9 -0
- data/lib/data_miner/step/import.rb +57 -0
- data/lib/data_miner/william_james_cartesian_product.rb +11 -0
- data/test/data_miner_test.rb +78 -0
- data/test/test_helper.rb +16 -0
- metadata +140 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Brighter Planet
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
=data_miner
|
2
|
+
|
3
|
+
Mine remote data into your ActiveRecord models.
|
4
|
+
|
5
|
+
==Quick start
|
6
|
+
|
7
|
+
Put this in <tt>config/environment.rb</tt>:
|
8
|
+
|
9
|
+
config.gem 'seamusabshere-data_miner', :lib => 'data_miner', :source => 'http://gems.github.com'
|
10
|
+
|
11
|
+
You need to define <tt>mine_data</tt> blocks in your ActiveRecord models. For example, in <tt>app/models/country.rb</tt>:
|
12
|
+
|
13
|
+
class Country < ActiveRecord::Base
|
14
|
+
mine_data do |step|
|
15
|
+
# import country names and country codes
|
16
|
+
step.import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr|
|
17
|
+
attr.key :iso_3166, :name_in_source => 'country code'
|
18
|
+
attr.store :iso_3166, :name_in_source => 'country code'
|
19
|
+
attr.store :name, :name_in_source => 'country'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
...and in <tt>app/models/airport.rb</tt>:
|
25
|
+
|
26
|
+
class Airport < ActiveRecord::Base
|
27
|
+
belongs_to :country
|
28
|
+
|
29
|
+
mine_data do |step|
|
30
|
+
# import airport iata_code, name, etc.
|
31
|
+
step.import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false) do |attr|
|
32
|
+
attr.key :iata_code, :field_number => 3
|
33
|
+
attr.store :name, :field_number => 0
|
34
|
+
attr.store :city, :field_number => 1
|
35
|
+
attr.store :country, :field_number => 2, :foreign_key => :name # will use Country.find_by_name(X)
|
36
|
+
attr.store :iata_code, :field_number => 3
|
37
|
+
attr.store :latitude, :field_number => 5
|
38
|
+
attr.store :longitude, :field_number => 6
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't know a way to automatically include gem tasks, so you have to do this manually for now)
|
44
|
+
|
45
|
+
namespace :data_miner do
|
46
|
+
task :mine => :environment do
|
47
|
+
DataMiner.mine :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact
|
48
|
+
end
|
49
|
+
|
50
|
+
task :map_to_attrs => :environment do
|
51
|
+
DataMiner.map_to_attrs ENV['METHOD'], :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
You need to specify what order to mine data. For example, in <tt>config/initializers/data_miner_config.rb</tt>:
|
56
|
+
|
57
|
+
DataMiner.enqueue do |queue|
|
58
|
+
queue << Country # class whose data should be mined 1st
|
59
|
+
queue << Airport # class whose data should be mined 2nd
|
60
|
+
# etc
|
61
|
+
end
|
62
|
+
|
63
|
+
Once you have (1) set up the order of data mining and (2) defined <tt>mine_data</tt> blocks in your classes, you can:
|
64
|
+
|
65
|
+
$ rake data_miner:mine
|
66
|
+
|
67
|
+
==Complete example
|
68
|
+
|
69
|
+
~ $ rails testapp
|
70
|
+
~ $ cd testapp/
|
71
|
+
~/testapp $ ./script/generate model Airport iata_code:string name:string city:string country_id:integer latitude:float longitude:float
|
72
|
+
~/testapp $ ./script/generate model Country iso_3166:string name:string
|
73
|
+
~/testapp $ rake db:migrate
|
74
|
+
~/testapp $ touch lib/tasks/data_miner_tasks.rb
|
75
|
+
[...edit per quick start...]
|
76
|
+
~/testapp $ touch config/initializers/data_miner_config.rake
|
77
|
+
[...edit per quick start...]
|
78
|
+
~/testapp $ rake data_miner:mine
|
79
|
+
|
80
|
+
Now you should have
|
81
|
+
|
82
|
+
~/testapp $ ./script/console
|
83
|
+
Loading development environment (Rails 2.3.3)
|
84
|
+
>> Airport.first.iata_code
|
85
|
+
=> "GKA"
|
86
|
+
>> Airport.first.country.name
|
87
|
+
=> "Papua New Guinea"
|
88
|
+
|
89
|
+
==Authors
|
90
|
+
|
91
|
+
* Seamus Abshere <seamus@abshere.net>
|
92
|
+
* Andy Rossmeissl <andy@rossmeissl.net>
|
93
|
+
|
94
|
+
==Copyright
|
95
|
+
|
96
|
+
Copyright (c) 2009 Brighter Planet. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "data_miner"
|
8
|
+
gem.summary = %Q{Mine remote data into your ActiveRecord models.}
|
9
|
+
gem.description = %Q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
10
|
+
gem.email = "seamus@abshere.net"
|
11
|
+
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
|
+
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
+
%w{ activerecord activesupport andand remote_table seamusabshere-errata seamusabshere-conversions }.each { |name| gem.add_dependency name }
|
14
|
+
gem.require_path = "lib"
|
15
|
+
gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
|
16
|
+
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
17
|
+
# gem.rubyforge_project = "dataminer"
|
18
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
19
|
+
end
|
20
|
+
Jeweler::GemcutterTasks.new
|
21
|
+
Jeweler::RubyforgeTasks.new do |rubyforge|
|
22
|
+
rubyforge.doc_task = "rdoc"
|
23
|
+
end
|
24
|
+
rescue LoadError
|
25
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/*_test.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
require 'rcov/rcovtask'
|
37
|
+
Rcov::RcovTask.new do |test|
|
38
|
+
test.libs << 'test'
|
39
|
+
test.pattern = 'test/**/*_test.rb'
|
40
|
+
test.verbose = true
|
41
|
+
end
|
42
|
+
rescue LoadError
|
43
|
+
task :rcov do
|
44
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
task :default => :test
|
52
|
+
|
53
|
+
require 'rake/rdoctask'
|
54
|
+
Rake::RDocTask.new do |rdoc|
|
55
|
+
if File.exist?('VERSION')
|
56
|
+
version = File.read('VERSION')
|
57
|
+
else
|
58
|
+
version = ""
|
59
|
+
end
|
60
|
+
|
61
|
+
rdoc.rdoc_dir = 'rdoc'
|
62
|
+
rdoc.title = "data_miner #{version}"
|
63
|
+
rdoc.rdoc_files.include('README*')
|
64
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
65
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.2
|
data/data_miner.gemspec
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{data_miner}
|
8
|
+
s.version = "0.2.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
+
s.date = %q{2009-10-30}
|
13
|
+
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
|
+
s.email = %q{seamus@abshere.net}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"data_miner.gemspec",
|
27
|
+
"lib/data_miner.rb",
|
28
|
+
"lib/data_miner/active_record_ext.rb",
|
29
|
+
"lib/data_miner/attribute.rb",
|
30
|
+
"lib/data_miner/attribute_collection.rb",
|
31
|
+
"lib/data_miner/configuration.rb",
|
32
|
+
"lib/data_miner/dictionary.rb",
|
33
|
+
"lib/data_miner/step.rb",
|
34
|
+
"lib/data_miner/step/associate.rb",
|
35
|
+
"lib/data_miner/step/await.rb",
|
36
|
+
"lib/data_miner/step/callback.rb",
|
37
|
+
"lib/data_miner/step/derive.rb",
|
38
|
+
"lib/data_miner/step/import.rb",
|
39
|
+
"lib/data_miner/william_james_cartesian_product.rb",
|
40
|
+
"test/data_miner_test.rb",
|
41
|
+
"test/test_helper.rb"
|
42
|
+
]
|
43
|
+
s.homepage = %q{http://github.com/seamusabshere/data_miner}
|
44
|
+
s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
|
45
|
+
s.require_paths = ["lib"]
|
46
|
+
s.rubygems_version = %q{1.3.5}
|
47
|
+
s.summary = %q{Mine remote data into your ActiveRecord models.}
|
48
|
+
s.test_files = [
|
49
|
+
"test/data_miner_test.rb",
|
50
|
+
"test/test_helper.rb"
|
51
|
+
]
|
52
|
+
|
53
|
+
if s.respond_to? :specification_version then
|
54
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
55
|
+
s.specification_version = 3
|
56
|
+
|
57
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
58
|
+
s.add_runtime_dependency(%q<activerecord>, [">= 0"])
|
59
|
+
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
60
|
+
s.add_runtime_dependency(%q<andand>, [">= 0"])
|
61
|
+
s.add_runtime_dependency(%q<remote_table>, [">= 0"])
|
62
|
+
s.add_runtime_dependency(%q<seamusabshere-errata>, [">= 0"])
|
63
|
+
s.add_runtime_dependency(%q<seamusabshere-conversions>, [">= 0"])
|
64
|
+
else
|
65
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
66
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
67
|
+
s.add_dependency(%q<andand>, [">= 0"])
|
68
|
+
s.add_dependency(%q<remote_table>, [">= 0"])
|
69
|
+
s.add_dependency(%q<seamusabshere-errata>, [">= 0"])
|
70
|
+
s.add_dependency(%q<seamusabshere-conversions>, [">= 0"])
|
71
|
+
end
|
72
|
+
else
|
73
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
74
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
75
|
+
s.add_dependency(%q<andand>, [">= 0"])
|
76
|
+
s.add_dependency(%q<remote_table>, [">= 0"])
|
77
|
+
s.add_dependency(%q<seamusabshere-errata>, [">= 0"])
|
78
|
+
s.add_dependency(%q<seamusabshere-conversions>, [">= 0"])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
data/lib/data_miner.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activesupport'
|
3
|
+
require 'activerecord'
|
4
|
+
require 'conversions'
|
5
|
+
require 'remote_table'
|
6
|
+
require 'errata'
|
7
|
+
|
8
|
+
require 'data_miner/active_record_ext'
|
9
|
+
require 'data_miner/attribute'
|
10
|
+
require 'data_miner/attribute_collection'
|
11
|
+
require 'data_miner/configuration'
|
12
|
+
require 'data_miner/dictionary'
|
13
|
+
require 'data_miner/step'
|
14
|
+
require 'data_miner/step/associate'
|
15
|
+
require 'data_miner/step/await'
|
16
|
+
require 'data_miner/step/callback'
|
17
|
+
require 'data_miner/step/derive'
|
18
|
+
require 'data_miner/step/import'
|
19
|
+
require 'data_miner/william_james_cartesian_product' # TODO: move to gem
|
20
|
+
|
21
|
+
module DataMiner
|
22
|
+
class << self
|
23
|
+
def mine(options = {})
|
24
|
+
DataMiner::Configuration.mine options
|
25
|
+
end
|
26
|
+
|
27
|
+
def map_to_attrs(method, options = {})
|
28
|
+
puts DataMiner::Configuration.map_to_attrs(method, options)
|
29
|
+
end
|
30
|
+
|
31
|
+
def enqueue(&block)
|
32
|
+
DataMiner::Configuration.enqueue &block
|
33
|
+
end
|
34
|
+
|
35
|
+
def classes
|
36
|
+
DataMiner::Configuration.classes
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
ActiveRecord::Base.class_eval do
|
42
|
+
include DataMiner::ActiveRecordExt
|
43
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module DataMiner
|
2
|
+
module ActiveRecordExt
|
3
|
+
def self.included(klass)
|
4
|
+
klass.extend(ClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
module ClassMethods
|
8
|
+
def mine_data(options = {}, &block)
|
9
|
+
if defined?(NO_DATA_MINER) and NO_DATA_MINER == true
|
10
|
+
class_eval do
|
11
|
+
class << self
|
12
|
+
def data_mine
|
13
|
+
raise "NO_DATA_MINER is set to true, so data_mine is not available"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
else
|
18
|
+
class_eval { cattr_accessor :data_mine }
|
19
|
+
self.data_mine = Configuration.new(self)
|
20
|
+
yield data_mine
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,299 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Attribute
|
3
|
+
attr_accessor :klass, :name, :options_for_step, :affected_by_steps, :key_for_steps
|
4
|
+
|
5
|
+
def initialize(klass, name)
|
6
|
+
@klass = klass
|
7
|
+
@name = name.to_sym
|
8
|
+
@options_for_step = {}
|
9
|
+
@affected_by_steps = []
|
10
|
+
@key_for_steps = []
|
11
|
+
end
|
12
|
+
|
13
|
+
# polling questions
|
14
|
+
def report_find_or_create(step)
|
15
|
+
"Creates parents: #{klass}##{name} is set with #{reflection_klass(step)}.find_or_create_by_#{foreign_key(step)}" if wants_create?(step)
|
16
|
+
end
|
17
|
+
|
18
|
+
def report_unnatural_order(step)
|
19
|
+
if (
|
20
|
+
(rk = klass.reflect_on_association(weighting_association(step)).andand.klass) or
|
21
|
+
(wants_inline_association? and rk = reflection_klass(step))
|
22
|
+
) and
|
23
|
+
step.configuration.classes.index(rk) > step.configuration.classes.index(klass) and
|
24
|
+
step.options[:awaiting].andand.klass != klass
|
25
|
+
"Unnatural order: #{klass} comes before #{rk}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def inspect
|
30
|
+
"Attribute(#{klass}.#{name})"
|
31
|
+
end
|
32
|
+
|
33
|
+
def affected_by!(step, options = {})
|
34
|
+
self.options_for_step[step] = options
|
35
|
+
self.affected_by_steps << step
|
36
|
+
end
|
37
|
+
|
38
|
+
def affected_by?(step)
|
39
|
+
affected_by_steps.include?(step)
|
40
|
+
end
|
41
|
+
|
42
|
+
def key_for!(step, options = {})
|
43
|
+
self.options_for_step[step] = options
|
44
|
+
self.key_for_steps << step
|
45
|
+
end
|
46
|
+
|
47
|
+
def key_for?(step)
|
48
|
+
key_for_steps.include?(step)
|
49
|
+
end
|
50
|
+
|
51
|
+
def value_in_dictionary(step, key)
|
52
|
+
return *dictionary(step).lookup(key) # strip the array wrapper if there's only one element
|
53
|
+
end
|
54
|
+
|
55
|
+
def value_in_source(step, row)
|
56
|
+
if wants_static?(step)
|
57
|
+
value = static(step)
|
58
|
+
elsif field_number(step)
|
59
|
+
if field_number(step).is_a?(Range)
|
60
|
+
value = field_number(step).map { |n| row[n] }.join(delimiter(step))
|
61
|
+
else
|
62
|
+
value = row[field_number(step)]
|
63
|
+
end
|
64
|
+
else
|
65
|
+
value = row[name_in_source(step)]
|
66
|
+
end
|
67
|
+
return nil if value.nil?
|
68
|
+
return value if value.is_a?(ActiveRecord::Base) # escape valve for parsers that look up associations directly
|
69
|
+
value = value.to_s
|
70
|
+
value = value[keep(step)] if wants_keep?(step)
|
71
|
+
value = do_split(step, value) if wants_split?(step)
|
72
|
+
# taken from old errata... maybe we want to do this here
|
73
|
+
value.gsub!(/[ ]+/, ' ')
|
74
|
+
# text.gsub!('- ', '-')
|
75
|
+
value.gsub!(/([^\\])~/, '\1 ')
|
76
|
+
value.strip!
|
77
|
+
value.upcase! if wants_upcase?(step)
|
78
|
+
value = do_convert(step, row, value) if wants_conversion?(step)
|
79
|
+
value = do_sprintf(step, value) if wants_sprintf?(step)
|
80
|
+
value
|
81
|
+
end
|
82
|
+
|
83
|
+
def value_from_row(step, row)
|
84
|
+
value = value_in_source(step, row)
|
85
|
+
return value if value.is_a?(ActiveRecord::Base) # carry through trapdoor
|
86
|
+
value = value_in_dictionary(step, value) if wants_dictionary?(step)
|
87
|
+
value = value_as_association(step, value) if wants_inline_association?
|
88
|
+
value
|
89
|
+
end
|
90
|
+
|
91
|
+
def value_as_association(step, value)
|
92
|
+
@_value_as_association ||= {}
|
93
|
+
@_value_as_association[step] ||= {}
|
94
|
+
if !@_value_as_association[step].has_key?(value)
|
95
|
+
dynamic_matcher = wants_create?(step) ? "find_or_create_by_#{foreign_key(step)}" : "find_by_#{foreign_key(step)}"
|
96
|
+
@_value_as_association[step][value] = reflection_klass(step).send(dynamic_matcher, value)
|
97
|
+
end
|
98
|
+
@_value_as_association[step][value]
|
99
|
+
end
|
100
|
+
|
101
|
+
# this will overwrite nils, even if wants_overwriting?(step) is false
|
102
|
+
def set_record_from_row(step, record, row)
|
103
|
+
return if !wants_overwriting?(step) and !record.send(name).nil?
|
104
|
+
value = value_from_row(step, row)
|
105
|
+
record.send "#{name}=", value
|
106
|
+
$stderr.puts("ActiveRecord didn't like trying to set #{klass}.#{name} = #{value}") if !value.nil? and record.send(name).nil?
|
107
|
+
end
|
108
|
+
|
109
|
+
def perform(step)
|
110
|
+
case step.variant
|
111
|
+
when :associate
|
112
|
+
perform_association(step)
|
113
|
+
when :derive
|
114
|
+
if wants_update_all?(step)
|
115
|
+
perform_update_all(step)
|
116
|
+
elsif wants_weighted_average?(step)
|
117
|
+
perform_weighted_average(step)
|
118
|
+
else
|
119
|
+
perform_callback(step)
|
120
|
+
end
|
121
|
+
when :import
|
122
|
+
raise "This shouldn't be called, the import step is special"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def perform_association(step)
|
127
|
+
raise "dictionary and prefix don't mix" if wants_dictionary?(step) and wants_prefix?(step)
|
128
|
+
klass.update_all("#{reflection.primary_key_name} = NULL") if wants_nullification?(step)
|
129
|
+
if wants_create?(step)
|
130
|
+
klass.find_in_batches do |batch|
|
131
|
+
batch.each do |record|
|
132
|
+
if wants_prefix?(step)
|
133
|
+
sql = "SELECT reflection_table.id FROM #{reflection_klass(step).quoted_table_name} AS reflection_table INNER JOIN #{klass.quoted_table_name} AS klass_table ON LEFT(klass_table.#{key(step)}, LENGTH(reflection_table.#{foreign_key(step)})) = reflection_table.#{foreign_key(step)} WHERE klass_table.id = #{record.id} ORDER BY LENGTH(reflection_table.#{foreign_key(step)}) DESC"
|
134
|
+
associated_id = ActiveRecord::Base.connection.select_value(sql)
|
135
|
+
next if associated_id.blank?
|
136
|
+
record.send("#{reflection.primary_key_name}=", associated_id)
|
137
|
+
else
|
138
|
+
dynamic_finder_value = record.send(key(step))
|
139
|
+
dynamic_finder_value = value_in_dictionary(step, dynamic_finder_value) if wants_dictionary?(step)
|
140
|
+
next if dynamic_finder_value.blank?
|
141
|
+
associated = reflection_klass(step).send("find_or_create_by_#{foreign_key(step)}", dynamic_finder_value) # TODO cache results
|
142
|
+
record.send("#{name}=", associated)
|
143
|
+
end
|
144
|
+
record.save
|
145
|
+
end
|
146
|
+
end
|
147
|
+
else
|
148
|
+
reflection_klass(step).find_in_batches do |batch|
|
149
|
+
batch.each do |reflection_record|
|
150
|
+
klass.update_all ["#{reflection.primary_key_name} = ?", reflection_record.id], ["#{key(step)} = ?", reflection_record.send(foreign_key(step))]
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def perform_update_all(step)
|
157
|
+
klass.update_all("#{name} = #{set(step)}", conditions(step))
|
158
|
+
end
|
159
|
+
|
160
|
+
def perform_weighted_average(step)
|
161
|
+
# handle weighting by scopes instead of associations
|
162
|
+
if weighting_association(step) and !klass.reflect_on_association(weighting_association(step))
|
163
|
+
klass.find_in_batches do |batch|
|
164
|
+
batch.each do |record|
|
165
|
+
record.send "#{name}=", record.send(weighting_association(step)).weighted_average(name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step))
|
166
|
+
record.save
|
167
|
+
end
|
168
|
+
end
|
169
|
+
else # there's no weighting association OR there is one and it's a valid association
|
170
|
+
klass.update_all_weighted_averages name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step), :association => weighting_association(step)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def perform_callback(step)
|
175
|
+
case klass.method(callback(step)).arity
|
176
|
+
when 0:
|
177
|
+
klass.send(callback(step))
|
178
|
+
when 1:
|
179
|
+
klass.send(callback(step), name)
|
180
|
+
when 2:
|
181
|
+
klass.send(callback(step), name, options_for_step[step])
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def unit_from_source(step, row)
|
186
|
+
row[unit_in_source(step)].to_s.strip.underscore.to_sym
|
187
|
+
end
|
188
|
+
|
189
|
+
def do_convert(step, row, value)
|
190
|
+
from_unit = from(step) || unit_from_source(step, row)
|
191
|
+
value.to_f.convert(from_unit, to(step))
|
192
|
+
end
|
193
|
+
|
194
|
+
def do_sprintf(step, value)
|
195
|
+
if /\%[0-9\.]*f/.match(sprintf(step))
|
196
|
+
value = value.to_f
|
197
|
+
elsif /\%[0-9\.]*d/.match(sprintf(step))
|
198
|
+
value = value.to_i
|
199
|
+
end
|
200
|
+
sprintf(step) % value
|
201
|
+
end
|
202
|
+
|
203
|
+
def do_split(step, value)
|
204
|
+
pattern = split_options(step)[:pattern] || /\s+/ # default is split on whitespace
|
205
|
+
keep = split_options(step)[:keep] || 0 # default is keep first element
|
206
|
+
value.to_s.split(pattern)[keep].to_s
|
207
|
+
end
|
208
|
+
|
209
|
+
def column_type
|
210
|
+
@column_type ||= klass.columns_hash[name.to_s].type
|
211
|
+
end
|
212
|
+
|
213
|
+
{
|
214
|
+
:static => 'options_for_step[step].has_key?(:static)',
|
215
|
+
:prefix => :prefix,
|
216
|
+
:create => :create,
|
217
|
+
:keep => :keep,
|
218
|
+
:upcase => :upcase,
|
219
|
+
:conversion => '!from(step).nil? or !unit_in_source(step).nil?',
|
220
|
+
:sprintf => :sprintf,
|
221
|
+
:dictionary => :dictionary_options,
|
222
|
+
:split => :split_options,
|
223
|
+
:update_all => :set,
|
224
|
+
:nullification => 'nullify(step) != false',
|
225
|
+
:overwriting => 'overwrite(step) != false',
|
226
|
+
:weighted_average => '!weighting_association(step).nil? or !weighting_column(step).nil?'
|
227
|
+
}.each do |name, condition|
|
228
|
+
condition = "!#{condition}(step).nil?" if condition.is_a?(Symbol)
|
229
|
+
eval <<-EOS
|
230
|
+
def wants_#{name}?(step)
|
231
|
+
#{condition}
|
232
|
+
end
|
233
|
+
EOS
|
234
|
+
end
|
235
|
+
|
236
|
+
{
|
237
|
+
:name_in_source => { :default => :name, :stringify => true },
|
238
|
+
:key => { :default => :name, :stringify => true },
|
239
|
+
:foreign_key => { :default => 'key(step)', :stringify => true },
|
240
|
+
:delimiter => { :default => '", "' }
|
241
|
+
}.each do |name, options|
|
242
|
+
eval <<-EOS
|
243
|
+
def #{name}(step)
|
244
|
+
(options_for_step[step][:#{name}] || #{options[:default]})#{'.to_s' if options[:stringify]}
|
245
|
+
end
|
246
|
+
EOS
|
247
|
+
end
|
248
|
+
|
249
|
+
def reflection
|
250
|
+
if @_reflection.nil?
|
251
|
+
@_reflection = klass.reflect_on_association(name) || :missing
|
252
|
+
reflection
|
253
|
+
elsif @_reflection == :missing
|
254
|
+
nil
|
255
|
+
else
|
256
|
+
@_reflection
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def reflection_klass(step)
|
261
|
+
return nil unless reflection
|
262
|
+
if reflection.options[:polymorphic]
|
263
|
+
polymorphic_type(step).andand.constantize
|
264
|
+
else
|
265
|
+
reflection.klass
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
def wants_inline_association?
|
270
|
+
reflection.present?
|
271
|
+
end
|
272
|
+
|
273
|
+
def callback(step)
|
274
|
+
(options_for_step[step][:callback] || "derive_#{name}").to_sym
|
275
|
+
end
|
276
|
+
|
277
|
+
def dictionary(step)
|
278
|
+
raise "shouldn't ask for this" unless wants_dictionary?(step) # don't try to initialize if there are no dictionary options
|
279
|
+
@dictionaries ||= {}
|
280
|
+
@dictionaries[step] ||= Dictionary.new(dictionary_options(step))
|
281
|
+
end
|
282
|
+
|
283
|
+
%w(dictionary split).each do |name|
|
284
|
+
eval <<-EOS
|
285
|
+
def #{name}_options(step)
|
286
|
+
options_for_step[step][:#{name}]
|
287
|
+
end
|
288
|
+
EOS
|
289
|
+
end
|
290
|
+
|
291
|
+
%w(from to set conditions weighting_association weighting_column weighting_disaggregator sprintf nullify overwrite upcase prefix unit_in_source field_number keep create static polymorphic_type).each do |name|
|
292
|
+
eval <<-EOS
|
293
|
+
def #{name}(step)
|
294
|
+
options_for_step[step][:#{name}]
|
295
|
+
end
|
296
|
+
EOS
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class AttributeCollection
|
3
|
+
attr_accessor :klass, :attributes
|
4
|
+
|
5
|
+
def initialize(klass)
|
6
|
+
@klass = klass
|
7
|
+
@attributes = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def key!(step, attr_name, attr_options = {})
|
11
|
+
find_or_initialize(attr_name).key_for!(step, attr_options)
|
12
|
+
end
|
13
|
+
|
14
|
+
def affect!(step, attr_name, attr_options = {})
|
15
|
+
find_or_initialize(attr_name).affected_by!(step, attr_options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def affect_all_content_columns!(step, options = {})
|
19
|
+
except = Array.wrap(options[:except]).map(&:to_sym)
|
20
|
+
step.klass.content_columns.map(&:name).reject { |content_column| except.include?(content_column.to_sym) }.each do |content_column|
|
21
|
+
find_or_initialize(content_column).affected_by!(step)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def all_affected_by(step)
|
26
|
+
attributes.values.select { |attr| attr.affected_by?(step) }
|
27
|
+
end
|
28
|
+
|
29
|
+
def all_keys_for(step)
|
30
|
+
attributes.values.select { |attr| attr.key_for?(step) }
|
31
|
+
end
|
32
|
+
|
33
|
+
def all_for(step)
|
34
|
+
(all_affected_by(step) + all_keys_for(step)).uniq
|
35
|
+
end
|
36
|
+
|
37
|
+
def has_keys_for?(step)
|
38
|
+
attributes.values.any? { |attr| attr.key_for?(step) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def has_conditional_writes_for?(step)
|
42
|
+
all_affected_by(step).any? { |attr| !attr.wants_overwriting?(step) }
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def find_or_initialize(attr_name)
|
48
|
+
self.attributes[attr_name] ||= Attribute.new(klass, attr_name)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Configuration
|
3
|
+
attr_accessor :steps, :klass, :counter, :attributes, :awaiting
|
4
|
+
|
5
|
+
def initialize(klass)
|
6
|
+
@steps = []
|
7
|
+
@klass = klass
|
8
|
+
@counter = 0
|
9
|
+
@attributes = AttributeCollection.new(klass)
|
10
|
+
end
|
11
|
+
|
12
|
+
%w(import associate derive await).each do |method|
|
13
|
+
eval <<-EOS
|
14
|
+
def #{method}(*args, &block)
|
15
|
+
self.counter += 1
|
16
|
+
if block_given? # FORM C
|
17
|
+
step_options = args[0] || {}
|
18
|
+
set_awaiting!(step_options)
|
19
|
+
self.steps << Step::#{method.camelcase}.new(self, counter, step_options, &block)
|
20
|
+
elsif args[0].is_a?(Hash) # FORM A
|
21
|
+
step_options = args[0]
|
22
|
+
set_awaiting!(step_options)
|
23
|
+
self.steps << Step::#{method.camelcase}.new(self, counter, step_options)
|
24
|
+
else # FORM B
|
25
|
+
attr_name = args[0]
|
26
|
+
attr_options = args[1] || {}
|
27
|
+
step_options = {}
|
28
|
+
set_awaiting!(step_options)
|
29
|
+
self.steps << Step::#{method.camelcase}.new(self, counter, step_options) do |attr|
|
30
|
+
attr.affect attr_name, attr_options
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
EOS
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_awaiting!(step_options)
|
38
|
+
step_options.merge!(:awaiting => awaiting) if !awaiting.nil?
|
39
|
+
end
|
40
|
+
|
41
|
+
def awaiting!(step)
|
42
|
+
self.awaiting = step
|
43
|
+
end
|
44
|
+
|
45
|
+
def stop_awaiting!
|
46
|
+
self.awaiting = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
# Mine data for this class.
|
50
|
+
def mine(options = {})
|
51
|
+
steps.each { |step| step.perform options }
|
52
|
+
end
|
53
|
+
|
54
|
+
# Map <tt>method</tt> to attributes
|
55
|
+
def map_to_attrs(method)
|
56
|
+
steps.map { |step| step.map_to_attrs(method) }.compact
|
57
|
+
end
|
58
|
+
|
59
|
+
cattr_accessor :classes
|
60
|
+
self.classes = []
|
61
|
+
class << self
|
62
|
+
# Mine data. Defaults to all classes touched by DataMiner.
|
63
|
+
#
|
64
|
+
# Options
|
65
|
+
# * <tt>:class_names</tt>: provide an array class names to mine
|
66
|
+
def mine(options = {})
|
67
|
+
classes.each do |klass|
|
68
|
+
if options[:class_names].blank? or options[:class_names].include?(klass.name)
|
69
|
+
klass.data_mine.mine options
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Map a <tt>method</tt> to attrs. Defaults to all classes touched by DataMiner.
|
75
|
+
#
|
76
|
+
# Options
|
77
|
+
# * <tt>:class_names</tt>: provide an array class names to mine
|
78
|
+
def map_to_attrs(method, options = {})
|
79
|
+
classes.map do |klass|
|
80
|
+
if options[:class_names].blank? or options[:class_names].include?(klass.name)
|
81
|
+
klass.data_mine.map_to_attrs method
|
82
|
+
end
|
83
|
+
end.flatten.compact
|
84
|
+
end
|
85
|
+
|
86
|
+
# Queue up all the ActiveRecord classes that DataMiner should touch.
|
87
|
+
#
|
88
|
+
# Generally done in <tt>config/initializers/data_miner_config.rb</tt>.
|
89
|
+
def enqueue(&block)
|
90
|
+
yield self.classes
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Dictionary
|
3
|
+
attr_accessor :key_name, :value_name, :sprintf, :table
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
@key_name = options[:key]
|
7
|
+
@value_name = options[:returns]
|
8
|
+
@sprintf = options[:sprintf] || '%s'
|
9
|
+
@table = RemoteTable.new(:url => options[:url])
|
10
|
+
end
|
11
|
+
|
12
|
+
def lookup(key)
|
13
|
+
find(self.key_name, key, self.value_name, :sprintf => self.sprintf)
|
14
|
+
end
|
15
|
+
|
16
|
+
def find(key_name, key, value_name, options = {})
|
17
|
+
if match = table.rows.detect { |row| normalize_for_comparison(key, options) == normalize_for_comparison(row[key_name], options) }
|
18
|
+
match[value_name].to_s.split(/\s*;\s/)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def normalize_for_comparison(string, options = {})
|
25
|
+
if options[:sprintf]
|
26
|
+
if /\%[0-9\.]*f/.match(options[:sprintf])
|
27
|
+
string = string.to_f
|
28
|
+
elsif /\%[0-9\.]*d/.match(options[:sprintf])
|
29
|
+
string = string.to_i
|
30
|
+
end
|
31
|
+
string = sprintf % string
|
32
|
+
end
|
33
|
+
string.to_s.strip
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
attr_accessor :configuration, :number, :options
|
4
|
+
delegate :klass, :to => :configuration
|
5
|
+
delegate :attributes, :to => :configuration
|
6
|
+
|
7
|
+
def initialize(configuration, number, options = {}, &block)
|
8
|
+
@configuration = configuration
|
9
|
+
@number = number
|
10
|
+
@options = options
|
11
|
+
yield self if block_given? # pull in attributes
|
12
|
+
attributes.affect_all_content_columns!(self, :except => options[:except]) if options[:affect_all] == :content_columns
|
13
|
+
affected_attributes.each { |attr| attr.options_for_step[self][:callback] = options[:callback] } if options[:callback]
|
14
|
+
all_attributes.each { |attr| attr.options_for_step[self][:name_in_source] = attr.name_in_source(self).upcase } if options[:headers] == :upcase # TODO remove
|
15
|
+
end
|
16
|
+
|
17
|
+
def variant
|
18
|
+
self.class.name.demodulize.underscore.to_sym
|
19
|
+
end
|
20
|
+
|
21
|
+
def awaiting?
|
22
|
+
!options[:awaiting].nil?
|
23
|
+
end
|
24
|
+
|
25
|
+
def inspect
|
26
|
+
"Step(#{klass} #{variant.to_s.camelcase} #{number})"
|
27
|
+
end
|
28
|
+
|
29
|
+
def signature
|
30
|
+
"#{klass} step #{number}: #{variant}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def perform(options = {})
|
34
|
+
return if awaiting? and !options[:force]
|
35
|
+
affected_attributes.each { |attr| attr.perform self }
|
36
|
+
$stderr.puts "performed #{signature}"
|
37
|
+
end
|
38
|
+
|
39
|
+
def affected_attributes
|
40
|
+
@affected_attributes ||= attributes.all_affected_by self
|
41
|
+
end
|
42
|
+
|
43
|
+
def key_attributes
|
44
|
+
@key_attributes ||= attributes.all_keys_for self
|
45
|
+
end
|
46
|
+
|
47
|
+
def all_attributes
|
48
|
+
@all_attributes ||= attributes.all_for self
|
49
|
+
end
|
50
|
+
|
51
|
+
def key(attr_name, attr_options = {})
|
52
|
+
attributes.key! self, attr_name, attr_options
|
53
|
+
end
|
54
|
+
|
55
|
+
def affect(attr_name, attr_options = {})
|
56
|
+
attributes.affect! self, attr_name, attr_options
|
57
|
+
end
|
58
|
+
alias_method :store, :affect
|
59
|
+
|
60
|
+
def map_to_attrs(method)
|
61
|
+
affected_attributes.map { |attr| attr.send method, self }.compact
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
class Await < Step
|
4
|
+
attr_accessor :other_class
|
5
|
+
|
6
|
+
def initialize(configuration, number, options = {}, &block)
|
7
|
+
# doesn't call super
|
8
|
+
@configuration = configuration
|
9
|
+
@number = number
|
10
|
+
@options = options
|
11
|
+
@other_class = options.delete :other_class
|
12
|
+
configuration.awaiting! self
|
13
|
+
yield configuration # pull in steps
|
14
|
+
configuration.stop_awaiting!
|
15
|
+
end
|
16
|
+
|
17
|
+
def perform(*args)
|
18
|
+
other_class.data_mine.steps << Step::Callback.new(other_class.data_mine, self)
|
19
|
+
$stderr.puts "added #{signature} to callbacks after #{other_class}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def callback
|
23
|
+
$stderr.puts "starting to perform deferred steps in #{signature}..."
|
24
|
+
all_awaiting.each { |step| step.perform :force => true }
|
25
|
+
$stderr.puts "...done"
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def all_awaiting
|
31
|
+
configuration.steps.select { |step| step.options and step.options[:awaiting] == self }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
class Callback < Step
|
4
|
+
attr_accessor :foreign_step
|
5
|
+
|
6
|
+
def initialize(configuration, foreign_step)
|
7
|
+
@configuration = configuration
|
8
|
+
@foreign_step = foreign_step
|
9
|
+
@number = "(last)"
|
10
|
+
end
|
11
|
+
|
12
|
+
def perform(*args)
|
13
|
+
foreign_step.callback
|
14
|
+
$stderr.puts "performed #{signature}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def signature
|
18
|
+
"#{super} (on behalf of #{foreign_step.signature})"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
class Import < Step
|
4
|
+
attr_accessor :table, :errata
|
5
|
+
|
6
|
+
def initialize(configuration, number, options = {}, &block)
|
7
|
+
super
|
8
|
+
@errata = Errata.new(:url => options[:errata], :klass => klass) if options[:errata]
|
9
|
+
@table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop))
|
10
|
+
end
|
11
|
+
|
12
|
+
def signature
|
13
|
+
"#{super} #{options[:url]}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def perform(*args)
|
17
|
+
ActiveRecord::Base.connection.execute("TRUNCATE #{klass.quoted_table_name}") if wants_truncate?
|
18
|
+
table.each_row do |row|
|
19
|
+
if errata
|
20
|
+
next if errata.rejects?(row)
|
21
|
+
errata.correct!(row)
|
22
|
+
end
|
23
|
+
if uses_existing_data?
|
24
|
+
key_values = key_attributes.map { |key_attr| [ key_attr.value_from_row(self, row) ] }
|
25
|
+
record_set = WilliamJamesCartesianProduct.cart_prod(*key_values).map do |combination|
|
26
|
+
next if combination.include?(nil) and !wants_nil_keys?
|
27
|
+
klass.send(dynamic_finder_name, *combination)
|
28
|
+
end.flatten
|
29
|
+
else
|
30
|
+
record_set = klass.new
|
31
|
+
end
|
32
|
+
Array.wrap(record_set).each do |record|
|
33
|
+
affected_attributes.each { |attr| attr.set_record_from_row(self, record, row) }
|
34
|
+
record.save
|
35
|
+
end
|
36
|
+
end
|
37
|
+
$stderr.puts "performed #{signature}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def wants_truncate?
|
41
|
+
options[:truncate] == true or (!(options[:truncate] == false) and !uses_existing_data?)
|
42
|
+
end
|
43
|
+
|
44
|
+
def wants_nil_keys?
|
45
|
+
options[:allow_nil_keys] == true
|
46
|
+
end
|
47
|
+
|
48
|
+
def uses_existing_data?
|
49
|
+
@uses_existing_data ||= attributes.has_keys_for?(self) or attributes.has_conditional_writes_for?(self)
|
50
|
+
end
|
51
|
+
|
52
|
+
def dynamic_finder_name
|
53
|
+
"find_or_initialize_by_#{key_attributes.map(&:name).join('_and_')}".to_sym
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
ActiveRecord::Schema.define(:version => 20090819143429) do
|
4
|
+
create_table "airports", :force => true do |t|
|
5
|
+
t.string "iata_code"
|
6
|
+
t.string "name"
|
7
|
+
t.string "city"
|
8
|
+
t.integer "country_id"
|
9
|
+
t.float "latitude"
|
10
|
+
t.float "longitude"
|
11
|
+
t.datetime "created_at"
|
12
|
+
t.datetime "updated_at"
|
13
|
+
end
|
14
|
+
create_table "countries", :force => true do |t|
|
15
|
+
t.string "iso_3166"
|
16
|
+
t.string "name"
|
17
|
+
t.datetime "created_at"
|
18
|
+
t.datetime "updated_at"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Country < ActiveRecord::Base
|
23
|
+
mine_data do |step|
|
24
|
+
# import country names and country codes
|
25
|
+
step.import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr|
|
26
|
+
attr.key :iso_3166, :name_in_source => 'country code'
|
27
|
+
attr.store :iso_3166, :name_in_source => 'country code'
|
28
|
+
attr.store :name, :name_in_source => 'country'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class Airport < ActiveRecord::Base
|
34
|
+
belongs_to :country
|
35
|
+
mine_data do |step|
|
36
|
+
# import airport iata_code, name, etc.
|
37
|
+
step.import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false) do |attr|
|
38
|
+
attr.key :iata_code, :field_number => 3
|
39
|
+
attr.store :name, :field_number => 0
|
40
|
+
attr.store :city, :field_number => 1
|
41
|
+
attr.store :country, :field_number => 2, :foreign_key => :name # will use Country.find_by_name(X)
|
42
|
+
attr.store :iata_code, :field_number => 3
|
43
|
+
attr.store :latitude, :field_number => 5
|
44
|
+
attr.store :longitude, :field_number => 6
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
DataMiner.enqueue do |queue|
|
50
|
+
queue << Country
|
51
|
+
queue << Airport
|
52
|
+
end
|
53
|
+
|
54
|
+
class DataMinerTest < Test::Unit::TestCase
|
55
|
+
def teardown
|
56
|
+
Airport.delete_all
|
57
|
+
Country.delete_all
|
58
|
+
end
|
59
|
+
|
60
|
+
should "mine a single class" do
|
61
|
+
Country.data_mine.mine
|
62
|
+
assert_equal 'Uruguay', Country.find_by_iso_3166('UY').name
|
63
|
+
assert_equal 0, Airport.count
|
64
|
+
end
|
65
|
+
|
66
|
+
should "mine a single class using the API" do
|
67
|
+
DataMiner.mine :class_names => ['Country']
|
68
|
+
assert_equal 'Uruguay', Country.find_by_iso_3166('UY').name
|
69
|
+
assert_equal 0, Airport.count
|
70
|
+
end
|
71
|
+
|
72
|
+
should "mine all classes" do
|
73
|
+
DataMiner.mine
|
74
|
+
uy = Country.find_by_iso_3166('UY')
|
75
|
+
assert_equal 'Uruguay', uy.name
|
76
|
+
assert_equal uy, Airport.find_by_iata_code('MVD').country
|
77
|
+
end
|
78
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'shoulda'
|
4
|
+
require 'sqlite3'
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
8
|
+
require 'data_miner'
|
9
|
+
|
10
|
+
ActiveRecord::Base.establish_connection(
|
11
|
+
'adapter' => 'sqlite3',
|
12
|
+
'database' => 'test/test.sqlite3'
|
13
|
+
)
|
14
|
+
|
15
|
+
class Test::Unit::TestCase
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: data_miner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Seamus Abshere
|
8
|
+
- Andy Rossmeissl
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-10-30 00:00:00 -04:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: activerecord
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: activesupport
|
28
|
+
type: :runtime
|
29
|
+
version_requirement:
|
30
|
+
version_requirements: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
version:
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: andand
|
38
|
+
type: :runtime
|
39
|
+
version_requirement:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: "0"
|
45
|
+
version:
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: remote_table
|
48
|
+
type: :runtime
|
49
|
+
version_requirement:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: seamusabshere-errata
|
58
|
+
type: :runtime
|
59
|
+
version_requirement:
|
60
|
+
version_requirements: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
- !ruby/object:Gem::Dependency
|
67
|
+
name: seamusabshere-conversions
|
68
|
+
type: :runtime
|
69
|
+
version_requirement:
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: "0"
|
75
|
+
version:
|
76
|
+
description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
|
77
|
+
email: seamus@abshere.net
|
78
|
+
executables: []
|
79
|
+
|
80
|
+
extensions: []
|
81
|
+
|
82
|
+
extra_rdoc_files:
|
83
|
+
- LICENSE
|
84
|
+
- README.rdoc
|
85
|
+
files:
|
86
|
+
- .document
|
87
|
+
- .gitignore
|
88
|
+
- LICENSE
|
89
|
+
- README.rdoc
|
90
|
+
- Rakefile
|
91
|
+
- VERSION
|
92
|
+
- data_miner.gemspec
|
93
|
+
- lib/data_miner.rb
|
94
|
+
- lib/data_miner/active_record_ext.rb
|
95
|
+
- lib/data_miner/attribute.rb
|
96
|
+
- lib/data_miner/attribute_collection.rb
|
97
|
+
- lib/data_miner/configuration.rb
|
98
|
+
- lib/data_miner/dictionary.rb
|
99
|
+
- lib/data_miner/step.rb
|
100
|
+
- lib/data_miner/step/associate.rb
|
101
|
+
- lib/data_miner/step/await.rb
|
102
|
+
- lib/data_miner/step/callback.rb
|
103
|
+
- lib/data_miner/step/derive.rb
|
104
|
+
- lib/data_miner/step/import.rb
|
105
|
+
- lib/data_miner/william_james_cartesian_product.rb
|
106
|
+
- test/data_miner_test.rb
|
107
|
+
- test/test_helper.rb
|
108
|
+
has_rdoc: true
|
109
|
+
homepage: http://github.com/seamusabshere/data_miner
|
110
|
+
licenses: []
|
111
|
+
|
112
|
+
post_install_message:
|
113
|
+
rdoc_options:
|
114
|
+
- --charset=UTF-8
|
115
|
+
- --line-numbers
|
116
|
+
- --inline-source
|
117
|
+
require_paths:
|
118
|
+
- lib
|
119
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: "0"
|
124
|
+
version:
|
125
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: "0"
|
130
|
+
version:
|
131
|
+
requirements: []
|
132
|
+
|
133
|
+
rubyforge_project:
|
134
|
+
rubygems_version: 1.3.5
|
135
|
+
signing_key:
|
136
|
+
specification_version: 3
|
137
|
+
summary: Mine remote data into your ActiveRecord models.
|
138
|
+
test_files:
|
139
|
+
- test/data_miner_test.rb
|
140
|
+
- test/test_helper.rb
|