seamusabshere-data_miner 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +6 -0
- data/LICENSE +20 -0
- data/README.rdoc +87 -0
- data/Rakefile +65 -0
- data/VERSION +1 -0
- data/data_miner.gemspec +74 -0
- data/lib/data_miner.rb +38 -0
- data/lib/data_miner/active_record_ext.rb +15 -0
- data/lib/data_miner/attribute.rb +279 -0
- data/lib/data_miner/attribute_collection.rb +51 -0
- data/lib/data_miner/configuration.rb +77 -0
- data/lib/data_miner/dictionary.rb +36 -0
- data/lib/data_miner/step.rb +60 -0
- data/lib/data_miner/step/associate.rb +9 -0
- data/lib/data_miner/step/await.rb +35 -0
- data/lib/data_miner/step/callback.rb +22 -0
- data/lib/data_miner/step/derive.rb +9 -0
- data/lib/data_miner/step/import.rb +57 -0
- data/lib/data_miner/william_james_cartesian_product.rb +11 -0
- data/test/data_miner_test.rb +78 -0
- data/test/test_helper.rb +16 -0
- metadata +119 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Brighter Planet
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
=data_miner
|
2
|
+
|
3
|
+
Mine remote data into your ActiveRecord models.
|
4
|
+
|
5
|
+
==Quick start
|
6
|
+
|
7
|
+
Put this in <tt>config/environment.rb</tt>:
|
8
|
+
|
9
|
+
config.gem 'seamusabshere-data_miner', :lib => 'data_miner', :source => 'http://gems.github.com'
|
10
|
+
|
11
|
+
Put this in <tt>lib/tasks/data_miner_tasks.rake</tt>: (unfortunately I don't know a way to automatically include gem tasks, so you have to do this manually for now)
|
12
|
+
|
13
|
+
namespace :data_miner do
|
14
|
+
task :mine => :environment do
|
15
|
+
DataMiner.mine :class_names => ENV['CLASSES'].to_s.split(/\s*,\s*/).flatten.compact
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
You need to specify what order to mine data. For example, in <tt>config/initializers/data_miner_config.rb</tt>:
|
20
|
+
|
21
|
+
DataMiner.enqueue do |queue|
|
22
|
+
queue << Country # class whose data should be mined 1st
|
23
|
+
queue << Airport # class whose data should be mined 2nd
|
24
|
+
# etc
|
25
|
+
end
|
26
|
+
|
27
|
+
You need to define <tt>mine_data</tt> blocks. For example, in <tt>app/models/country.rb</tt>:
|
28
|
+
|
29
|
+
class Country < ActiveRecord::Base
|
30
|
+
mine_data do |step|
|
31
|
+
# import country names and country codes
|
32
|
+
step.import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr|
|
33
|
+
attr.key :iso_3166, :name_in_source => 'country code'
|
34
|
+
attr.store :iso_3166, :name_in_source => 'country code'
|
35
|
+
attr.store :name, :name_in_source => 'country'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
To complete the example, in <tt>app/models/airport.rb</tt>:
|
41
|
+
|
42
|
+
class Airport < ActiveRecord::Base
|
43
|
+
belongs_to :country
|
44
|
+
|
45
|
+
mine_data do |step|
|
46
|
+
# import airport iata_code, name, etc.
|
47
|
+
step.import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false) do |attr|
|
48
|
+
attr.key :iata_code, :field_number => 3
|
49
|
+
attr.store :name, :field_number => 0
|
50
|
+
attr.store :city, :field_number => 1
|
51
|
+
attr.store :country, :field_number => 2, :foreign_key => :name # will use Country.find_by_name(X)
|
52
|
+
attr.store :iata_code, :field_number => 3
|
53
|
+
attr.store :latitude, :field_number => 5
|
54
|
+
attr.store :longitude, :field_number => 6
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
Once you have (1) set up the order of data mining and (2) defined <tt>mine_data</tt> blocks in your classes, you can:
|
60
|
+
|
61
|
+
$ rake data_miner:mine
|
62
|
+
|
63
|
+
==Complete example
|
64
|
+
|
65
|
+
~ $ rails testapp
|
66
|
+
~ $ cd testapp/
|
67
|
+
~/testapp $ ./script/generate model Airport iata_code:string name:string city:string country_id:integer latitude:float longitude:float
|
68
|
+
~/testapp $ ./script/generate model Country iso_3166:string name:string
|
69
|
+
~/testapp $ rake db:migrate
|
70
|
+
~/testapp $ touch lib/tasks/data_miner_tasks.rb
|
71
|
+
[...edit per quick start...]
|
72
|
+
~/testapp $ touch config/initializers/data_miner_config.rake
|
73
|
+
[...edit per quick start...]
|
74
|
+
~/testapp $ rake data_miner:mine
|
75
|
+
|
76
|
+
Now you should have
|
77
|
+
|
78
|
+
~/testapp $ ./script/console
|
79
|
+
Loading development environment (Rails 2.3.3)
|
80
|
+
>> Airport.first.iata_code
|
81
|
+
=> "GKA"
|
82
|
+
>> Airport.first.country.name
|
83
|
+
=> "Papua New Guinea"
|
84
|
+
|
85
|
+
==Copyright
|
86
|
+
|
87
|
+
Copyright (c) 2009 Brighter Planet. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "data_miner"
|
8
|
+
gem.summary = %Q{Mine remote data into your ActiveRecord models.}
|
9
|
+
gem.description = %Q{Mine remote data into your ActiveRecord models.}
|
10
|
+
gem.email = "seamus@abshere.net"
|
11
|
+
gem.homepage = "http://github.com/seamusabshere/data_miner"
|
12
|
+
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
+
%w{ activerecord activesupport seamusabshere-remote_table seamusabshere-errata }.each { |name| gem.add_dependency name }
|
14
|
+
gem.require_path = "lib"
|
15
|
+
gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
|
16
|
+
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
17
|
+
# gem.rubyforge_project = "dataminer"
|
18
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
19
|
+
end
|
20
|
+
|
21
|
+
Jeweler::RubyforgeTasks.new do |rubyforge|
|
22
|
+
rubyforge.doc_task = "rdoc"
|
23
|
+
end
|
24
|
+
rescue LoadError
|
25
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/*_test.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
require 'rcov/rcovtask'
|
37
|
+
Rcov::RcovTask.new do |test|
|
38
|
+
test.libs << 'test'
|
39
|
+
test.pattern = 'test/**/*_test.rb'
|
40
|
+
test.verbose = true
|
41
|
+
end
|
42
|
+
rescue LoadError
|
43
|
+
task :rcov do
|
44
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
task :default => :test
|
52
|
+
|
53
|
+
require 'rake/rdoctask'
|
54
|
+
Rake::RDocTask.new do |rdoc|
|
55
|
+
if File.exist?('VERSION')
|
56
|
+
version = File.read('VERSION')
|
57
|
+
else
|
58
|
+
version = ""
|
59
|
+
end
|
60
|
+
|
61
|
+
rdoc.rdoc_dir = 'rdoc'
|
62
|
+
rdoc.title = "data_miner #{version}"
|
63
|
+
rdoc.rdoc_files.include('README*')
|
64
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
65
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/data_miner.gemspec
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{data_miner}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
+
s.date = %q{2009-08-19}
|
13
|
+
s.description = %q{Mine remote data into your ActiveRecord models.}
|
14
|
+
s.email = %q{seamus@abshere.net}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"data_miner.gemspec",
|
27
|
+
"lib/data_miner.rb",
|
28
|
+
"lib/data_miner/active_record_ext.rb",
|
29
|
+
"lib/data_miner/attribute.rb",
|
30
|
+
"lib/data_miner/attribute_collection.rb",
|
31
|
+
"lib/data_miner/configuration.rb",
|
32
|
+
"lib/data_miner/dictionary.rb",
|
33
|
+
"lib/data_miner/step.rb",
|
34
|
+
"lib/data_miner/step/associate.rb",
|
35
|
+
"lib/data_miner/step/await.rb",
|
36
|
+
"lib/data_miner/step/callback.rb",
|
37
|
+
"lib/data_miner/step/derive.rb",
|
38
|
+
"lib/data_miner/step/import.rb",
|
39
|
+
"lib/data_miner/william_james_cartesian_product.rb",
|
40
|
+
"test/data_miner_test.rb",
|
41
|
+
"test/test_helper.rb"
|
42
|
+
]
|
43
|
+
s.homepage = %q{http://github.com/seamusabshere/data_miner}
|
44
|
+
s.rdoc_options = ["--charset=UTF-8", "--line-numbers", "--inline-source"]
|
45
|
+
s.require_paths = ["lib"]
|
46
|
+
s.rubygems_version = %q{1.3.5}
|
47
|
+
s.summary = %q{Mine remote data into your ActiveRecord models.}
|
48
|
+
s.test_files = [
|
49
|
+
"test/data_miner_test.rb",
|
50
|
+
"test/test_helper.rb"
|
51
|
+
]
|
52
|
+
|
53
|
+
if s.respond_to? :specification_version then
|
54
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
55
|
+
s.specification_version = 3
|
56
|
+
|
57
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
58
|
+
s.add_runtime_dependency(%q<activerecord>, [">= 0"])
|
59
|
+
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
60
|
+
s.add_runtime_dependency(%q<seamusabshere-remote_table>, [">= 0"])
|
61
|
+
s.add_runtime_dependency(%q<seamusabshere-errata>, [">= 0"])
|
62
|
+
else
|
63
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
64
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
65
|
+
s.add_dependency(%q<seamusabshere-remote_table>, [">= 0"])
|
66
|
+
s.add_dependency(%q<seamusabshere-errata>, [">= 0"])
|
67
|
+
end
|
68
|
+
else
|
69
|
+
s.add_dependency(%q<activerecord>, [">= 0"])
|
70
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
71
|
+
s.add_dependency(%q<seamusabshere-remote_table>, [">= 0"])
|
72
|
+
s.add_dependency(%q<seamusabshere-errata>, [">= 0"])
|
73
|
+
end
|
74
|
+
end
|
data/lib/data_miner.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activesupport'
|
3
|
+
require 'activerecord'
|
4
|
+
require 'remote_table'
|
5
|
+
require 'errata'
|
6
|
+
|
7
|
+
require 'data_miner/active_record_ext'
|
8
|
+
require 'data_miner/attribute'
|
9
|
+
require 'data_miner/attribute_collection'
|
10
|
+
require 'data_miner/configuration'
|
11
|
+
require 'data_miner/dictionary'
|
12
|
+
require 'data_miner/step'
|
13
|
+
require 'data_miner/step/associate'
|
14
|
+
require 'data_miner/step/await'
|
15
|
+
require 'data_miner/step/callback'
|
16
|
+
require 'data_miner/step/derive'
|
17
|
+
require 'data_miner/step/import'
|
18
|
+
require 'data_miner/william_james_cartesian_product' # TODO: move to gem
|
19
|
+
|
20
|
+
module DataMiner
|
21
|
+
class << self
|
22
|
+
def mine(options = {})
|
23
|
+
DataMiner::Configuration.mine options
|
24
|
+
end
|
25
|
+
|
26
|
+
def enqueue(&block)
|
27
|
+
DataMiner::Configuration.enqueue &block
|
28
|
+
end
|
29
|
+
|
30
|
+
def classes
|
31
|
+
DataMiner::Configuration.classes
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
ActiveRecord::Base.class_eval do
|
37
|
+
include DataMiner::ActiveRecordExt
|
38
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module DataMiner
|
2
|
+
module ActiveRecordExt
|
3
|
+
def self.included(klass)
|
4
|
+
klass.extend(ClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
module ClassMethods
|
8
|
+
def mine_data(options = {}, &block)
|
9
|
+
class_eval { cattr_accessor :data_mine }
|
10
|
+
self.data_mine = Configuration.new(self)
|
11
|
+
yield data_mine
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,279 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Attribute
|
3
|
+
attr_accessor :klass, :name, :options_for_step, :affected_by_steps, :key_for_steps
|
4
|
+
|
5
|
+
def initialize(klass, name)
|
6
|
+
@klass = klass
|
7
|
+
@name = name.to_sym
|
8
|
+
@options_for_step = {}
|
9
|
+
@affected_by_steps = []
|
10
|
+
@key_for_steps = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
"Attribute(#{klass}.#{name})"
|
15
|
+
end
|
16
|
+
|
17
|
+
def affected_by!(step, options = {})
|
18
|
+
self.options_for_step[step] = options
|
19
|
+
self.affected_by_steps << step
|
20
|
+
end
|
21
|
+
|
22
|
+
def affected_by?(step)
|
23
|
+
affected_by_steps.include?(step)
|
24
|
+
end
|
25
|
+
|
26
|
+
def key_for!(step, options = {})
|
27
|
+
self.options_for_step[step] = options
|
28
|
+
self.key_for_steps << step
|
29
|
+
end
|
30
|
+
|
31
|
+
def key_for?(step)
|
32
|
+
key_for_steps.include?(step)
|
33
|
+
end
|
34
|
+
|
35
|
+
def value_in_dictionary(step, key)
|
36
|
+
return *dictionary(step).lookup(key) # strip the array wrapper if there's only one element
|
37
|
+
end
|
38
|
+
|
39
|
+
def value_in_source(step, row)
|
40
|
+
if wants_static?(step)
|
41
|
+
value = static(step)
|
42
|
+
elsif field_number(step)
|
43
|
+
if field_number(step).is_a?(Range)
|
44
|
+
value = field_number(step).map { |n| row[n] }.join(delimiter(step))
|
45
|
+
else
|
46
|
+
value = row[field_number(step)]
|
47
|
+
end
|
48
|
+
else
|
49
|
+
value = row[name_in_source(step)]
|
50
|
+
end
|
51
|
+
return nil if value.nil?
|
52
|
+
return value if value.is_a?(ActiveRecord::Base) # escape valve for parsers that look up associations directly
|
53
|
+
value = value.to_s
|
54
|
+
value = value[keep(step)] if wants_keep?(step)
|
55
|
+
value = do_split(step, value) if wants_split?(step)
|
56
|
+
# taken from old errata... maybe we want to do this here
|
57
|
+
value.gsub!(/[ ]+/, ' ')
|
58
|
+
# text.gsub!('- ', '-')
|
59
|
+
value.gsub!(/([^\\])~/, '\1 ')
|
60
|
+
value.strip!
|
61
|
+
value.upcase! if wants_upcase?(step)
|
62
|
+
value = do_convert(step, row, value) if wants_conversion?(step)
|
63
|
+
value = do_sprintf(step, value) if wants_sprintf?(step)
|
64
|
+
value
|
65
|
+
end
|
66
|
+
|
67
|
+
def value_from_row(step, row)
|
68
|
+
value = value_in_source(step, row)
|
69
|
+
return value if value.is_a?(ActiveRecord::Base) # carry through trapdoor
|
70
|
+
value = value_in_dictionary(step, value) if wants_dictionary?(step)
|
71
|
+
value = value_as_association(step, value) if wants_inline_association?
|
72
|
+
value
|
73
|
+
end
|
74
|
+
|
75
|
+
def value_as_association(step, value)
|
76
|
+
@_value_as_association ||= {}
|
77
|
+
@_value_as_association[step] ||= {}
|
78
|
+
@_value_as_association[step][value] ||= reflection_klass(step).send("find_by_#{foreign_key(step)}", value)
|
79
|
+
end
|
80
|
+
|
81
|
+
# this will overwrite nils, even if wants_overwriting?(step) is false
|
82
|
+
def set_record_from_row(step, record, row)
|
83
|
+
return if !wants_overwriting?(step) and !record.send(name).nil?
|
84
|
+
value = value_from_row(step, row)
|
85
|
+
record.send "#{name}=", value
|
86
|
+
$stderr.puts("ActiveRecord didn't like trying to set #{klass}.#{name} = #{value}") if !value.nil? and record.send(name).nil?
|
87
|
+
end
|
88
|
+
|
89
|
+
def perform(step)
|
90
|
+
case step.variant
|
91
|
+
when :associate
|
92
|
+
perform_association(step)
|
93
|
+
when :derive
|
94
|
+
if wants_update_all?(step)
|
95
|
+
perform_update_all(step)
|
96
|
+
elsif wants_weighted_average?(step)
|
97
|
+
perform_weighted_average(step)
|
98
|
+
else
|
99
|
+
perform_callback(step)
|
100
|
+
end
|
101
|
+
when :import
|
102
|
+
raise "This shouldn't be called, the import step is special"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def perform_association(step)
|
107
|
+
raise "dictionary and prefix don't mix" if wants_dictionary?(step) and wants_prefix?(step)
|
108
|
+
klass.update_all("#{reflection.primary_key_name} = NULL") if wants_nullification?(step)
|
109
|
+
if wants_create?(step)
|
110
|
+
klass.find_in_batches do |batch|
|
111
|
+
batch.each do |record|
|
112
|
+
if wants_prefix?(step)
|
113
|
+
sql = "SELECT reflection_table.id FROM #{reflection_klass(step).quoted_table_name} AS reflection_table INNER JOIN #{klass.quoted_table_name} AS klass_table ON LEFT(klass_table.#{key(step)}, LENGTH(reflection_table.#{foreign_key(step)})) = reflection_table.#{foreign_key(step)} WHERE klass_table.id = #{record.id} ORDER BY LENGTH(reflection_table.#{foreign_key(step)}) DESC"
|
114
|
+
associated_id = ActiveRecord::Base.connection.select_value(sql)
|
115
|
+
next if associated_id.blank?
|
116
|
+
record.send("#{reflection.primary_key_name}=", associated_id)
|
117
|
+
else
|
118
|
+
dynamic_finder_value = record.send(key(step))
|
119
|
+
dynamic_finder_value = value_in_dictionary(step, dynamic_finder_value) if wants_dictionary?(step)
|
120
|
+
next if dynamic_finder_value.blank?
|
121
|
+
associated = reflection_klass(step).send("find_or_create_by_#{foreign_key(step)}", dynamic_finder_value) # TODO cache results
|
122
|
+
record.send("#{name}=", associated)
|
123
|
+
end
|
124
|
+
record.save
|
125
|
+
end
|
126
|
+
end
|
127
|
+
else
|
128
|
+
reflection_klass(step).find_in_batches do |batch|
|
129
|
+
batch.each do |reflection_record|
|
130
|
+
klass.update_all ["#{reflection.primary_key_name} = ?", reflection_record.id], ["#{key(step)} = ?", reflection_record.send(foreign_key(step))]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def perform_update_all(step)
|
137
|
+
klass.update_all("#{name} = #{set(step)}", conditions(step))
|
138
|
+
end
|
139
|
+
|
140
|
+
def perform_weighted_average(step)
|
141
|
+
# handle weighting by scopes instead of associations
|
142
|
+
if weighting_association(step) and !klass.reflect_on_association(weighting_association(step))
|
143
|
+
klass.find_in_batches do |batch|
|
144
|
+
batch.each do |record|
|
145
|
+
record.send "#{name}=", record.send(weighting_association(step)).weighted_average(name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step))
|
146
|
+
record.save
|
147
|
+
end
|
148
|
+
end
|
149
|
+
else # there's no weighting association OR there is one and it's a valid association
|
150
|
+
klass.update_all_weighted_averages name, :by => weighting_column(step), :disaggregator => weighting_disaggregator(step), :association => weighting_association(step)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def perform_callback(step)
|
155
|
+
case klass.method(callback(step)).arity
|
156
|
+
when 0:
|
157
|
+
klass.send(callback(step))
|
158
|
+
when 1:
|
159
|
+
klass.send(callback(step), name)
|
160
|
+
when 2:
|
161
|
+
klass.send(callback(step), name, options_for_step[step])
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def unit_from_source(step, row)
|
166
|
+
row[unit_in_source(step)].to_s.strip.underscore.to_sym
|
167
|
+
end
|
168
|
+
|
169
|
+
def do_convert(step, row, value)
|
170
|
+
from_unit = from(step) || unit_from_source(step, row)
|
171
|
+
value.to_f.convert(from_unit, to(step))
|
172
|
+
end
|
173
|
+
|
174
|
+
def do_sprintf(step, value)
|
175
|
+
if /\%[0-9\.]*f/.match(sprintf(step))
|
176
|
+
value = value.to_f
|
177
|
+
elsif /\%[0-9\.]*d/.match(sprintf(step))
|
178
|
+
value = value.to_i
|
179
|
+
end
|
180
|
+
sprintf(step) % value
|
181
|
+
end
|
182
|
+
|
183
|
+
def do_split(step, value)
|
184
|
+
pattern = split_options(step)[:pattern] || /\s+/ # default is split on whitespace
|
185
|
+
keep = split_options(step)[:keep] || 0 # default is keep first element
|
186
|
+
value.to_s.split(pattern)[keep].to_s
|
187
|
+
end
|
188
|
+
|
189
|
+
def column_type
|
190
|
+
@column_type ||= klass.columns_hash[name.to_s].type
|
191
|
+
end
|
192
|
+
|
193
|
+
{
|
194
|
+
:static => 'options_for_step[step].has_key?(:static)',
|
195
|
+
:prefix => :prefix,
|
196
|
+
:create => 'create(step) != false',
|
197
|
+
:keep => :keep,
|
198
|
+
:upcase => :upcase,
|
199
|
+
:conversion => '!from(step).nil? or !unit_in_source(step).nil?',
|
200
|
+
:sprintf => :sprintf,
|
201
|
+
:dictionary => :dictionary_options,
|
202
|
+
:split => :split_options,
|
203
|
+
:update_all => :set,
|
204
|
+
:nullification => 'nullify(step) != false',
|
205
|
+
:overwriting => 'overwrite(step) != false',
|
206
|
+
:weighted_average => '!weighting_association(step).nil? or !weighting_column(step).nil?'
|
207
|
+
}.each do |name, condition|
|
208
|
+
condition = "!#{condition}(step).nil?" if condition.is_a?(Symbol)
|
209
|
+
eval <<-EOS
|
210
|
+
def wants_#{name}?(step)
|
211
|
+
#{condition}
|
212
|
+
end
|
213
|
+
EOS
|
214
|
+
end
|
215
|
+
|
216
|
+
{
|
217
|
+
:name_in_source => { :default => :name, :stringify => true },
|
218
|
+
:key => { :default => :name, :stringify => true },
|
219
|
+
:foreign_key => { :default => 'key(step)', :stringify => true },
|
220
|
+
:delimiter => { :default => '", "' }
|
221
|
+
}.each do |name, options|
|
222
|
+
eval <<-EOS
|
223
|
+
def #{name}(step)
|
224
|
+
(options_for_step[step][:#{name}] || #{options[:default]})#{'.to_s' if options[:stringify]}
|
225
|
+
end
|
226
|
+
EOS
|
227
|
+
end
|
228
|
+
|
229
|
+
def reflection
|
230
|
+
if @_reflection.nil?
|
231
|
+
@_reflection = klass.reflect_on_association(name) || :missing
|
232
|
+
reflection
|
233
|
+
elsif @_reflection == :missing
|
234
|
+
nil
|
235
|
+
else
|
236
|
+
@_reflection
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def reflection_klass(step)
|
241
|
+
return nil unless reflection
|
242
|
+
if reflection.options[:polymorphic]
|
243
|
+
polymorphic_type(step).constantize
|
244
|
+
else
|
245
|
+
reflection.klass
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
def wants_inline_association?
|
250
|
+
!reflection.nil?
|
251
|
+
end
|
252
|
+
|
253
|
+
def callback(step)
|
254
|
+
(options_for_step[step][:callback] || "derive_#{name}").to_sym
|
255
|
+
end
|
256
|
+
|
257
|
+
def dictionary(step)
|
258
|
+
raise "shouldn't ask for this" unless wants_dictionary?(step) # don't try to initialize if there are no dictionary options
|
259
|
+
@dictionaries ||= {}
|
260
|
+
@dictionaries[step] ||= Dictionary.new(dictionary_options(step))
|
261
|
+
end
|
262
|
+
|
263
|
+
%w(dictionary split).each do |name|
|
264
|
+
eval <<-EOS
|
265
|
+
def #{name}_options(step)
|
266
|
+
options_for_step[step][:#{name}]
|
267
|
+
end
|
268
|
+
EOS
|
269
|
+
end
|
270
|
+
|
271
|
+
%w(from to set conditions weighting_association weighting_column weighting_disaggregator sprintf nullify overwrite upcase prefix unit_in_source field_number keep create static polymorphic_type).each do |name|
|
272
|
+
eval <<-EOS
|
273
|
+
def #{name}(step)
|
274
|
+
options_for_step[step][:#{name}]
|
275
|
+
end
|
276
|
+
EOS
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class AttributeCollection
|
3
|
+
attr_accessor :klass, :attributes
|
4
|
+
|
5
|
+
def initialize(klass)
|
6
|
+
@klass = klass
|
7
|
+
@attributes = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def key!(step, attr_name, attr_options = {})
|
11
|
+
find_or_initialize(attr_name).key_for!(step, attr_options)
|
12
|
+
end
|
13
|
+
|
14
|
+
def affect!(step, attr_name, attr_options = {})
|
15
|
+
find_or_initialize(attr_name).affected_by!(step, attr_options)
|
16
|
+
end
|
17
|
+
|
18
|
+
def affect_all_content_columns!(step, options = {})
|
19
|
+
except = Array.wrap(options[:except]).map(&:to_sym)
|
20
|
+
step.klass.content_columns.map(&:name).reject { |content_column| except.include?(content_column.to_sym) }.each do |content_column|
|
21
|
+
find_or_initialize(content_column).affected_by!(step)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def all_affected_by(step)
|
26
|
+
attributes.values.select { |attr| attr.affected_by?(step) }
|
27
|
+
end
|
28
|
+
|
29
|
+
def all_keys_for(step)
|
30
|
+
attributes.values.select { |attr| attr.key_for?(step) }
|
31
|
+
end
|
32
|
+
|
33
|
+
def all_for(step)
|
34
|
+
(all_affected_by(step) + all_keys_for(step)).uniq
|
35
|
+
end
|
36
|
+
|
37
|
+
def has_keys_for?(step)
|
38
|
+
attributes.values.any? { |attr| attr.key_for?(step) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def has_conditional_writes_for?(step)
|
42
|
+
all_affected_by(step).any? { |attr| !attr.wants_overwriting?(step) }
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def find_or_initialize(attr_name)
|
48
|
+
self.attributes[attr_name] ||= Attribute.new(klass, attr_name)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Configuration
|
3
|
+
attr_accessor :steps, :klass, :counter, :attributes, :awaiting
|
4
|
+
|
5
|
+
def initialize(klass)
|
6
|
+
@steps = []
|
7
|
+
@klass = klass
|
8
|
+
@counter = 0
|
9
|
+
@attributes = AttributeCollection.new(klass)
|
10
|
+
end
|
11
|
+
|
12
|
+
%w(import associate derive await).each do |method|
|
13
|
+
eval <<-EOS
|
14
|
+
def #{method}(*args, &block)
|
15
|
+
self.counter += 1
|
16
|
+
if block_given? # FORM C
|
17
|
+
step_options = args[0] || {}
|
18
|
+
set_awaiting!(step_options)
|
19
|
+
self.steps << Step::#{method.camelcase}.new(self, counter, step_options, &block)
|
20
|
+
elsif args[0].is_a?(Hash) # FORM A
|
21
|
+
step_options = args[0]
|
22
|
+
set_awaiting!(step_options)
|
23
|
+
self.steps << Step::#{method.camelcase}.new(self, counter, step_options)
|
24
|
+
else # FORM B
|
25
|
+
attr_name = args[0]
|
26
|
+
attr_options = args[1] || {}
|
27
|
+
step_options = {}
|
28
|
+
set_awaiting!(step_options)
|
29
|
+
self.steps << Step::#{method.camelcase}.new(self, counter, step_options) do |attr|
|
30
|
+
attr.affect attr_name, attr_options
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
EOS
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_awaiting!(step_options)
|
38
|
+
step_options.merge!(:awaiting => awaiting) if !awaiting.nil?
|
39
|
+
end
|
40
|
+
|
41
|
+
def awaiting!(step)
|
42
|
+
self.awaiting = step
|
43
|
+
end
|
44
|
+
|
45
|
+
def stop_awaiting!
|
46
|
+
self.awaiting = nil
|
47
|
+
end
|
48
|
+
|
49
|
+
# Mine data for this class.
|
50
|
+
def mine(options = {})
|
51
|
+
steps.each { |step| step.perform options }
|
52
|
+
end
|
53
|
+
|
54
|
+
cattr_accessor :classes
|
55
|
+
self.classes = []
|
56
|
+
class << self
|
57
|
+
# Mine data. Defaults to all classes touched by DataMiner.
|
58
|
+
#
|
59
|
+
# Options
|
60
|
+
# * <tt>:class_names</tt>: provide an array class names to mine
|
61
|
+
def mine(options = {})
|
62
|
+
classes.each do |klass|
|
63
|
+
if options[:class_names].blank? or options[:class_names].include?(klass.name)
|
64
|
+
klass.data_mine.mine options
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Queue up all the ActiveRecord classes that DataMiner should touch.
|
70
|
+
#
|
71
|
+
# Generally done in <tt>config/initializers/data_miner_config.rb</tt>.
|
72
|
+
def enqueue(&block)
|
73
|
+
yield self.classes
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Dictionary
|
3
|
+
attr_accessor :key_name, :value_name, :sprintf, :table
|
4
|
+
|
5
|
+
def initialize(options = {})
|
6
|
+
@key_name = options[:key]
|
7
|
+
@value_name = options[:returns]
|
8
|
+
@sprintf = options[:sprintf] || '%s'
|
9
|
+
@table = RemoteTable.new(:url => options[:url])
|
10
|
+
end
|
11
|
+
|
12
|
+
def lookup(key)
|
13
|
+
find(self.key_name, key, self.value_name, :sprintf => self.sprintf)
|
14
|
+
end
|
15
|
+
|
16
|
+
def find(key_name, key, value_name, options = {})
|
17
|
+
if match = table.rows.detect { |row| normalize_for_comparison(key, options) == normalize_for_comparison(row[key_name], options) }
|
18
|
+
match[value_name].to_s.split(/\s*;\s/)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def normalize_for_comparison(string, options = {})
|
25
|
+
if options[:sprintf]
|
26
|
+
if /\%[0-9\.]*f/.match(options[:sprintf])
|
27
|
+
string = string.to_f
|
28
|
+
elsif /\%[0-9\.]*d/.match(options[:sprintf])
|
29
|
+
string = string.to_i
|
30
|
+
end
|
31
|
+
string = sprintf % string
|
32
|
+
end
|
33
|
+
string.to_s.strip
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
attr_accessor :configuration, :number, :options
|
4
|
+
delegate :klass, :to => :configuration
|
5
|
+
delegate :attributes, :to => :configuration
|
6
|
+
|
7
|
+
def initialize(configuration, number, options = {}, &block)
|
8
|
+
@configuration = configuration
|
9
|
+
@number = number
|
10
|
+
@options = options
|
11
|
+
yield self if block_given? # pull in attributes
|
12
|
+
attributes.affect_all_content_columns!(self, :except => options[:except]) if options[:affect_all] == :content_columns
|
13
|
+
affected_attributes.each { |attr| attr.options_for_step[self][:callback] = options[:callback] } if options[:callback]
|
14
|
+
all_attributes.each { |attr| attr.options_for_step[self][:name_in_source] = attr.name_in_source(self).upcase } if options[:headers] == :upcase # TODO remove
|
15
|
+
end
|
16
|
+
|
17
|
+
def variant
|
18
|
+
self.class.name.demodulize.underscore.to_sym
|
19
|
+
end
|
20
|
+
|
21
|
+
def awaiting?
|
22
|
+
!options[:awaiting].nil?
|
23
|
+
end
|
24
|
+
|
25
|
+
def inspect
|
26
|
+
"Step(#{klass} #{variant.to_s.camelcase} #{number})"
|
27
|
+
end
|
28
|
+
|
29
|
+
def signature
|
30
|
+
"#{klass} step #{number}: #{variant}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def perform(options = {})
|
34
|
+
return if awaiting? and !options[:force]
|
35
|
+
affected_attributes.each { |attr| attr.perform self }
|
36
|
+
$stderr.puts "performed #{signature}"
|
37
|
+
end
|
38
|
+
|
39
|
+
def affected_attributes
|
40
|
+
@affected_attributes ||= attributes.all_affected_by self
|
41
|
+
end
|
42
|
+
|
43
|
+
def key_attributes
|
44
|
+
@key_attributes ||= attributes.all_keys_for self
|
45
|
+
end
|
46
|
+
|
47
|
+
def all_attributes
|
48
|
+
@all_attributes ||= attributes.all_for self
|
49
|
+
end
|
50
|
+
|
51
|
+
def key(attr_name, attr_options = {})
|
52
|
+
attributes.key! self, attr_name, attr_options
|
53
|
+
end
|
54
|
+
|
55
|
+
def affect(attr_name, attr_options = {})
|
56
|
+
attributes.affect! self, attr_name, attr_options
|
57
|
+
end
|
58
|
+
alias_method :store, :affect
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
class Await < Step
|
4
|
+
attr_accessor :other_class
|
5
|
+
|
6
|
+
def initialize(configuration, number, options = {}, &block)
|
7
|
+
# doesn't call super
|
8
|
+
@configuration = configuration
|
9
|
+
@number = number
|
10
|
+
@options = options
|
11
|
+
@other_class = options.delete :other_class
|
12
|
+
configuration.awaiting! self
|
13
|
+
yield configuration # pull in steps
|
14
|
+
configuration.stop_awaiting!
|
15
|
+
end
|
16
|
+
|
17
|
+
def perform(*args)
|
18
|
+
other_class.data_mine.steps << Step::Callback.new(other_class.data_mine, self)
|
19
|
+
$stderr.puts "added #{signature} to callbacks after #{other_class}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def callback
|
23
|
+
$stderr.puts "starting to perform deferred steps in #{signature}..."
|
24
|
+
all_awaiting.each { |step| step.perform :force => true }
|
25
|
+
$stderr.puts "...done"
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def all_awaiting
|
31
|
+
configuration.steps.select { |step| step.options and step.options[:awaiting] == self }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
class Callback < Step
|
4
|
+
attr_accessor :foreign_step
|
5
|
+
|
6
|
+
def initialize(configuration, foreign_step)
|
7
|
+
@configuration = configuration
|
8
|
+
@foreign_step = foreign_step
|
9
|
+
@number = "(last)"
|
10
|
+
end
|
11
|
+
|
12
|
+
def perform(*args)
|
13
|
+
foreign_step.callback
|
14
|
+
$stderr.puts "performed #{signature}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def signature
|
18
|
+
"#{super} (on behalf of #{foreign_step.signature})"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Step
|
3
|
+
class Import < Step
|
4
|
+
attr_accessor :table, :errata
|
5
|
+
|
6
|
+
def initialize(configuration, number, options = {}, &block)
|
7
|
+
super
|
8
|
+
@errata = Errata.new(:url => options[:errata], :klass => klass) if options[:errata]
|
9
|
+
@table = RemoteTable.new(options.slice(:url, :filename, :post_data, :format, :skip, :cut, :schema, :schema_name, :trap, :select, :reject, :sheet, :delimiter, :headers, :transform, :crop))
|
10
|
+
end
|
11
|
+
|
12
|
+
def signature
|
13
|
+
"#{super} #{options[:url]}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def perform(*args)
|
17
|
+
ActiveRecord::Base.connection.execute("TRUNCATE #{klass.quoted_table_name}") if wants_truncate?
|
18
|
+
table.each_row do |row|
|
19
|
+
if errata
|
20
|
+
next if errata.rejects?(row)
|
21
|
+
errata.correct!(row)
|
22
|
+
end
|
23
|
+
if uses_existing_data?
|
24
|
+
key_values = key_attributes.map { |key_attr| [ key_attr.value_from_row(self, row) ] }
|
25
|
+
record_set = WilliamJamesCartesianProduct.cart_prod(*key_values).map do |combination|
|
26
|
+
next if combination.include?(nil) and !wants_nil_keys?
|
27
|
+
klass.send(dynamic_finder_name, *combination)
|
28
|
+
end.flatten
|
29
|
+
else
|
30
|
+
record_set = klass.new
|
31
|
+
end
|
32
|
+
Array.wrap(record_set).each do |record|
|
33
|
+
affected_attributes.each { |attr| attr.set_record_from_row(self, record, row) }
|
34
|
+
record.save
|
35
|
+
end
|
36
|
+
end
|
37
|
+
$stderr.puts "performed #{signature}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def wants_truncate?
|
41
|
+
options[:truncate] == true or (!(options[:truncate] == false) and !uses_existing_data?)
|
42
|
+
end
|
43
|
+
|
44
|
+
def wants_nil_keys?
|
45
|
+
options[:allow_nil_keys] == true
|
46
|
+
end
|
47
|
+
|
48
|
+
def uses_existing_data?
|
49
|
+
@uses_existing_data ||= attributes.has_keys_for?(self) or attributes.has_conditional_writes_for?(self)
|
50
|
+
end
|
51
|
+
|
52
|
+
def dynamic_finder_name
|
53
|
+
"find_or_initialize_by_#{key_attributes.map(&:name).join('_and_')}".to_sym
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
ActiveRecord::Schema.define(:version => 20090819143429) do
|
4
|
+
create_table "airports", :force => true do |t|
|
5
|
+
t.string "iata_code"
|
6
|
+
t.string "name"
|
7
|
+
t.string "city"
|
8
|
+
t.integer "country_id"
|
9
|
+
t.float "latitude"
|
10
|
+
t.float "longitude"
|
11
|
+
t.datetime "created_at"
|
12
|
+
t.datetime "updated_at"
|
13
|
+
end
|
14
|
+
create_table "countries", :force => true do |t|
|
15
|
+
t.string "iso_3166"
|
16
|
+
t.string "name"
|
17
|
+
t.datetime "created_at"
|
18
|
+
t.datetime "updated_at"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Country < ActiveRecord::Base
|
23
|
+
mine_data do |step|
|
24
|
+
# import country names and country codes
|
25
|
+
step.import :url => 'http://www.cs.princeton.edu/introcs/data/iso3166.csv' do |attr|
|
26
|
+
attr.key :iso_3166, :name_in_source => 'country code'
|
27
|
+
attr.store :iso_3166, :name_in_source => 'country code'
|
28
|
+
attr.store :name, :name_in_source => 'country'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class Airport < ActiveRecord::Base
|
34
|
+
belongs_to :country
|
35
|
+
mine_data do |step|
|
36
|
+
# import airport iata_code, name, etc.
|
37
|
+
step.import(:url => 'http://openflights.svn.sourceforge.net/viewvc/openflights/openflights/data/airports.dat', :headers => false) do |attr|
|
38
|
+
attr.key :iata_code, :field_number => 3
|
39
|
+
attr.store :name, :field_number => 0
|
40
|
+
attr.store :city, :field_number => 1
|
41
|
+
attr.store :country, :field_number => 2, :foreign_key => :name # will use Country.find_by_name(X)
|
42
|
+
attr.store :iata_code, :field_number => 3
|
43
|
+
attr.store :latitude, :field_number => 5
|
44
|
+
attr.store :longitude, :field_number => 6
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
DataMiner.enqueue do |queue|
|
50
|
+
queue << Country
|
51
|
+
queue << Airport
|
52
|
+
end
|
53
|
+
|
54
|
+
class DataMinerTest < Test::Unit::TestCase
|
55
|
+
def teardown
|
56
|
+
Airport.delete_all
|
57
|
+
Country.delete_all
|
58
|
+
end
|
59
|
+
|
60
|
+
should "mine a single class" do
|
61
|
+
Country.data_mine.mine
|
62
|
+
assert_equal 'Uruguay', Country.find_by_iso_3166('UY').name
|
63
|
+
assert_equal 0, Airport.count
|
64
|
+
end
|
65
|
+
|
66
|
+
should "mine a single class using the API" do
|
67
|
+
DataMiner.mine :class_names => ['Country']
|
68
|
+
assert_equal 'Uruguay', Country.find_by_iso_3166('UY').name
|
69
|
+
assert_equal 0, Airport.count
|
70
|
+
end
|
71
|
+
|
72
|
+
# should "mine all classes" do
|
73
|
+
# DataMiner.mine
|
74
|
+
# uy = Country.find_by_iso_3166('UY')
|
75
|
+
# assert_equal 'Uruguay', uy.name
|
76
|
+
# assert_equal uy, Airport.find_by_iata_code('MVD').country
|
77
|
+
# end
|
78
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'shoulda'
|
4
|
+
require 'sqlite3'
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
8
|
+
require 'data_miner'
|
9
|
+
|
10
|
+
ActiveRecord::Base.establish_connection(
|
11
|
+
'adapter' => 'sqlite3',
|
12
|
+
'database' => 'test/test.sqlite3'
|
13
|
+
)
|
14
|
+
|
15
|
+
class Test::Unit::TestCase
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: seamusabshere-data_miner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Seamus Abshere
|
8
|
+
- Andy Rossmeissl
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-08-19 00:00:00 -07:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: activerecord
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: "0"
|
25
|
+
version:
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: activesupport
|
28
|
+
type: :runtime
|
29
|
+
version_requirement:
|
30
|
+
version_requirements: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
version:
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: seamusabshere-remote_table
|
38
|
+
type: :runtime
|
39
|
+
version_requirement:
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: "0"
|
45
|
+
version:
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: seamusabshere-errata
|
48
|
+
type: :runtime
|
49
|
+
version_requirement:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
description: Mine remote data into your ActiveRecord models.
|
57
|
+
email: seamus@abshere.net
|
58
|
+
executables: []
|
59
|
+
|
60
|
+
extensions: []
|
61
|
+
|
62
|
+
extra_rdoc_files:
|
63
|
+
- LICENSE
|
64
|
+
- README.rdoc
|
65
|
+
files:
|
66
|
+
- .document
|
67
|
+
- .gitignore
|
68
|
+
- LICENSE
|
69
|
+
- README.rdoc
|
70
|
+
- Rakefile
|
71
|
+
- VERSION
|
72
|
+
- data_miner.gemspec
|
73
|
+
- lib/data_miner.rb
|
74
|
+
- lib/data_miner/active_record_ext.rb
|
75
|
+
- lib/data_miner/attribute.rb
|
76
|
+
- lib/data_miner/attribute_collection.rb
|
77
|
+
- lib/data_miner/configuration.rb
|
78
|
+
- lib/data_miner/dictionary.rb
|
79
|
+
- lib/data_miner/step.rb
|
80
|
+
- lib/data_miner/step/associate.rb
|
81
|
+
- lib/data_miner/step/await.rb
|
82
|
+
- lib/data_miner/step/callback.rb
|
83
|
+
- lib/data_miner/step/derive.rb
|
84
|
+
- lib/data_miner/step/import.rb
|
85
|
+
- lib/data_miner/william_james_cartesian_product.rb
|
86
|
+
- test/data_miner_test.rb
|
87
|
+
- test/test_helper.rb
|
88
|
+
has_rdoc: false
|
89
|
+
homepage: http://github.com/seamusabshere/data_miner
|
90
|
+
licenses:
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options:
|
93
|
+
- --charset=UTF-8
|
94
|
+
- --line-numbers
|
95
|
+
- --inline-source
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: "0"
|
103
|
+
version:
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: "0"
|
109
|
+
version:
|
110
|
+
requirements: []
|
111
|
+
|
112
|
+
rubyforge_project:
|
113
|
+
rubygems_version: 1.3.5
|
114
|
+
signing_key:
|
115
|
+
specification_version: 3
|
116
|
+
summary: Mine remote data into your ActiveRecord models.
|
117
|
+
test_files:
|
118
|
+
- test/data_miner_test.rb
|
119
|
+
- test/test_helper.rb
|