data_miner 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/CHANGELOG +5 -0
- data/README.rdoc +11 -15
- data/Rakefile +7 -2
- data/VERSION +1 -1
- data/data_miner.gemspec +27 -28
- data/lib/data_miner.rb +50 -27
- data/lib/data_miner/attribute.rb +157 -240
- data/lib/data_miner/configuration.rb +58 -55
- data/lib/data_miner/import.rb +57 -0
- data/lib/data_miner/process.rb +21 -0
- data/lib/data_miner/run.rb +7 -0
- data/lib/data_miner/target.rb +7 -0
- data/test/data_miner_test.rb +644 -48
- data/test/test_helper.rb +134 -3
- metadata +29 -23
- data/lib/data_miner/active_record_ext.rb +0 -25
- data/lib/data_miner/attribute_collection.rb +0 -51
- data/lib/data_miner/step.rb +0 -64
- data/lib/data_miner/step/associate.rb +0 -9
- data/lib/data_miner/step/await.rb +0 -35
- data/lib/data_miner/step/callback.rb +0 -22
- data/lib/data_miner/step/derive.rb +0 -9
- data/lib/data_miner/step/import.rb +0 -57
data/test/test_helper.rb
CHANGED
@@ -1,16 +1,147 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'test/unit'
|
3
3
|
require 'shoulda'
|
4
|
-
require '
|
4
|
+
require 'ruby-debug'
|
5
5
|
|
6
6
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
7
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
8
8
|
require 'data_miner'
|
9
9
|
|
10
10
|
ActiveRecord::Base.establish_connection(
|
11
|
-
'adapter' => '
|
12
|
-
'database' => '
|
11
|
+
'adapter' => 'mysql',
|
12
|
+
'database' => 'data_miner_test',
|
13
|
+
'username' => 'root',
|
14
|
+
'password' => ''
|
13
15
|
)
|
14
16
|
|
15
17
|
class Test::Unit::TestCase
|
16
18
|
end
|
19
|
+
|
20
|
+
ActiveRecord::Schema.define(:version => 20090819143429) do
|
21
|
+
create_table "airports", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
22
|
+
t.string "country_id"
|
23
|
+
|
24
|
+
t.string "iata_code"
|
25
|
+
t.string "name"
|
26
|
+
t.string "city"
|
27
|
+
t.string "country_name"
|
28
|
+
t.float "latitude"
|
29
|
+
t.float "longitude"
|
30
|
+
t.datetime "created_at"
|
31
|
+
t.datetime "updated_at"
|
32
|
+
end
|
33
|
+
execute "ALTER TABLE airports ADD PRIMARY KEY (iata_code);"
|
34
|
+
|
35
|
+
create_table "countries", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
36
|
+
t.string "iso_3166"
|
37
|
+
t.string "name"
|
38
|
+
t.datetime "created_at"
|
39
|
+
t.datetime "updated_at"
|
40
|
+
end
|
41
|
+
execute "ALTER TABLE countries ADD PRIMARY KEY (iso_3166);"
|
42
|
+
|
43
|
+
create_table "census_regions", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
44
|
+
t.string "name"
|
45
|
+
t.datetime "updated_at"
|
46
|
+
t.datetime "created_at"
|
47
|
+
t.integer "number"
|
48
|
+
end
|
49
|
+
execute "ALTER TABLE census_regions ADD PRIMARY KEY (number);"
|
50
|
+
|
51
|
+
create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
52
|
+
t.string "automobile_make_id"
|
53
|
+
t.string "automobile_model_id"
|
54
|
+
t.string "automobile_model_year_id"
|
55
|
+
t.string "automobile_fuel_type_id"
|
56
|
+
|
57
|
+
t.float "fuel_efficiency_city"
|
58
|
+
t.float "fuel_efficiency_highway"
|
59
|
+
t.string "make_name"
|
60
|
+
t.string "model_name"
|
61
|
+
t.string "year"
|
62
|
+
t.string "fuel_type_code"
|
63
|
+
t.datetime "updated_at"
|
64
|
+
t.datetime "created_at"
|
65
|
+
t.string "transmission"
|
66
|
+
t.string "drive"
|
67
|
+
t.boolean "turbo"
|
68
|
+
t.boolean "supercharger"
|
69
|
+
t.integer "cylinders"
|
70
|
+
t.float "displacement"
|
71
|
+
t.float "raw_fuel_efficiency_city"
|
72
|
+
t.float "raw_fuel_efficiency_highway"
|
73
|
+
t.integer "carline_mfr_code"
|
74
|
+
t.integer "vi_mfr_code"
|
75
|
+
t.integer "carline_code"
|
76
|
+
t.integer "carline_class_code"
|
77
|
+
t.boolean "injection"
|
78
|
+
t.string "carline_class_name"
|
79
|
+
t.string "speeds"
|
80
|
+
t.string "row_hash"
|
81
|
+
end
|
82
|
+
execute "ALTER TABLE automobile_variants ADD PRIMARY KEY (row_hash);"
|
83
|
+
|
84
|
+
create_table "automobile_fuel_types", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
85
|
+
t.string "name"
|
86
|
+
t.datetime "created_at"
|
87
|
+
t.datetime "updated_at"
|
88
|
+
t.float "emission_factor"
|
89
|
+
t.float "annual_distance"
|
90
|
+
t.string "code"
|
91
|
+
end
|
92
|
+
execute "ALTER TABLE automobile_fuel_types ADD PRIMARY KEY (code);"
|
93
|
+
|
94
|
+
create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
95
|
+
t.string "automobile_make_id"
|
96
|
+
t.string "automobile_model_year_id"
|
97
|
+
t.integer "automobile_make_year_id"
|
98
|
+
|
99
|
+
t.string "fleet"
|
100
|
+
t.string "make_name"
|
101
|
+
t.string "year"
|
102
|
+
t.float "fuel_efficiency"
|
103
|
+
t.integer "volume"
|
104
|
+
t.datetime "created_at"
|
105
|
+
t.datetime "updated_at"
|
106
|
+
|
107
|
+
t.string "row_hash"
|
108
|
+
end
|
109
|
+
execute "ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (row_hash);"
|
110
|
+
|
111
|
+
create_table "automobile_make_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
112
|
+
t.integer "automobile_make_id" # user-defined
|
113
|
+
t.integer "automobile_model_year_id" # user-defined
|
114
|
+
t.datetime "created_at"
|
115
|
+
t.datetime "updated_at"
|
116
|
+
t.float "fuel_efficiency"
|
117
|
+
t.integer "volume"
|
118
|
+
t.string "row_hash"
|
119
|
+
end
|
120
|
+
execute "ALTER TABLE automobile_make_years ADD PRIMARY KEY (row_hash);"
|
121
|
+
|
122
|
+
create_table "automobile_makes", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
123
|
+
t.string "name"
|
124
|
+
t.datetime "updated_at"
|
125
|
+
t.datetime "created_at"
|
126
|
+
t.float "fuel_efficiency"
|
127
|
+
t.boolean "major"
|
128
|
+
end
|
129
|
+
execute "ALTER TABLE automobile_makes ADD PRIMARY KEY (name);"
|
130
|
+
|
131
|
+
create_table "automobile_model_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
132
|
+
t.integer "year"
|
133
|
+
t.datetime "updated_at"
|
134
|
+
t.datetime "created_at"
|
135
|
+
t.float "fuel_efficiency"
|
136
|
+
end
|
137
|
+
execute "ALTER TABLE automobile_model_years ADD PRIMARY KEY (year);"
|
138
|
+
|
139
|
+
create_table "automobile_models", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
140
|
+
t.string "name"
|
141
|
+
t.string "automobile_make_id"
|
142
|
+
t.datetime "updated_at"
|
143
|
+
t.datetime "created_at"
|
144
|
+
t.string "row_hash"
|
145
|
+
end
|
146
|
+
execute "ALTER TABLE automobile_models ADD PRIMARY KEY (row_hash);"
|
147
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -10,18 +10,28 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date:
|
13
|
+
date: 2010-02-25 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: remote_table
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.2.1
|
25
|
+
version:
|
16
26
|
- !ruby/object:Gem::Dependency
|
17
27
|
name: activerecord
|
18
28
|
type: :runtime
|
19
29
|
version_requirement:
|
20
30
|
version_requirements: !ruby/object:Gem::Requirement
|
21
31
|
requirements:
|
22
|
-
- -
|
32
|
+
- - ~>
|
23
33
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
34
|
+
version: 2.3.4
|
25
35
|
version:
|
26
36
|
- !ruby/object:Gem::Dependency
|
27
37
|
name: activesupport
|
@@ -29,9 +39,9 @@ dependencies:
|
|
29
39
|
version_requirement:
|
30
40
|
version_requirements: !ruby/object:Gem::Requirement
|
31
41
|
requirements:
|
32
|
-
- -
|
42
|
+
- - ~>
|
33
43
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
44
|
+
version: 2.3.4
|
35
45
|
version:
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: andand
|
@@ -39,9 +49,9 @@ dependencies:
|
|
39
49
|
version_requirement:
|
40
50
|
version_requirements: !ruby/object:Gem::Requirement
|
41
51
|
requirements:
|
42
|
-
- -
|
52
|
+
- - ~>
|
43
53
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
54
|
+
version: 1.3.1
|
45
55
|
version:
|
46
56
|
- !ruby/object:Gem::Dependency
|
47
57
|
name: errata
|
@@ -49,9 +59,9 @@ dependencies:
|
|
49
59
|
version_requirement:
|
50
60
|
version_requirements: !ruby/object:Gem::Requirement
|
51
61
|
requirements:
|
52
|
-
- -
|
62
|
+
- - ~>
|
53
63
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
64
|
+
version: 0.1.4
|
55
65
|
version:
|
56
66
|
- !ruby/object:Gem::Dependency
|
57
67
|
name: conversions
|
@@ -59,19 +69,19 @@ dependencies:
|
|
59
69
|
version_requirement:
|
60
70
|
version_requirements: !ruby/object:Gem::Requirement
|
61
71
|
requirements:
|
62
|
-
- -
|
72
|
+
- - ~>
|
63
73
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
74
|
+
version: 1.4.3
|
65
75
|
version:
|
66
76
|
- !ruby/object:Gem::Dependency
|
67
|
-
name:
|
77
|
+
name: blockenspiel
|
68
78
|
type: :runtime
|
69
79
|
version_requirement:
|
70
80
|
version_requirements: !ruby/object:Gem::Requirement
|
71
81
|
requirements:
|
72
|
-
- -
|
82
|
+
- - ~>
|
73
83
|
- !ruby/object:Gem::Version
|
74
|
-
version: 0.
|
84
|
+
version: 0.3.2
|
75
85
|
version:
|
76
86
|
description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
|
77
87
|
email: seamus@abshere.net
|
@@ -92,17 +102,13 @@ files:
|
|
92
102
|
- VERSION
|
93
103
|
- data_miner.gemspec
|
94
104
|
- lib/data_miner.rb
|
95
|
-
- lib/data_miner/active_record_ext.rb
|
96
105
|
- lib/data_miner/attribute.rb
|
97
|
-
- lib/data_miner/attribute_collection.rb
|
98
106
|
- lib/data_miner/configuration.rb
|
99
107
|
- lib/data_miner/dictionary.rb
|
100
|
-
- lib/data_miner/
|
101
|
-
- lib/data_miner/
|
102
|
-
- lib/data_miner/
|
103
|
-
- lib/data_miner/
|
104
|
-
- lib/data_miner/step/derive.rb
|
105
|
-
- lib/data_miner/step/import.rb
|
108
|
+
- lib/data_miner/import.rb
|
109
|
+
- lib/data_miner/process.rb
|
110
|
+
- lib/data_miner/run.rb
|
111
|
+
- lib/data_miner/target.rb
|
106
112
|
- lib/data_miner/william_james_cartesian_product.rb
|
107
113
|
- test/data_miner_test.rb
|
108
114
|
- test/test_helper.rb
|
@@ -1,25 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
module ActiveRecordExt
|
3
|
-
def self.included(klass)
|
4
|
-
klass.extend(ClassMethods)
|
5
|
-
end
|
6
|
-
|
7
|
-
module ClassMethods
|
8
|
-
def mine_data(options = {}, &block)
|
9
|
-
if defined?(NO_DATA_MINER) and NO_DATA_MINER == true
|
10
|
-
class_eval do
|
11
|
-
class << self
|
12
|
-
def data_mine
|
13
|
-
raise "NO_DATA_MINER is set to true, so data_mine is not available"
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
else
|
18
|
-
class_eval { cattr_accessor :data_mine }
|
19
|
-
self.data_mine = Configuration.new(self)
|
20
|
-
yield data_mine
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
class AttributeCollection
|
3
|
-
attr_accessor :klass, :attributes
|
4
|
-
|
5
|
-
def initialize(klass)
|
6
|
-
@klass = klass
|
7
|
-
@attributes = {}
|
8
|
-
end
|
9
|
-
|
10
|
-
def key!(step, attr_name, attr_options = {})
|
11
|
-
find_or_initialize(attr_name).key_for!(step, attr_options)
|
12
|
-
end
|
13
|
-
|
14
|
-
def affect!(step, attr_name, attr_options = {})
|
15
|
-
find_or_initialize(attr_name).affected_by!(step, attr_options)
|
16
|
-
end
|
17
|
-
|
18
|
-
def affect_all_content_columns!(step, options = {})
|
19
|
-
except = Array.wrap(options[:except]).map(&:to_sym)
|
20
|
-
step.klass.content_columns.map(&:name).reject { |content_column| except.include?(content_column.to_sym) }.each do |content_column|
|
21
|
-
find_or_initialize(content_column).affected_by!(step)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def all_affected_by(step)
|
26
|
-
attributes.values.select { |attr| attr.affected_by?(step) }
|
27
|
-
end
|
28
|
-
|
29
|
-
def all_keys_for(step)
|
30
|
-
attributes.values.select { |attr| attr.key_for?(step) }
|
31
|
-
end
|
32
|
-
|
33
|
-
def all_for(step)
|
34
|
-
(all_affected_by(step) + all_keys_for(step)).uniq
|
35
|
-
end
|
36
|
-
|
37
|
-
def has_keys_for?(step)
|
38
|
-
attributes.values.any? { |attr| attr.key_for?(step) }
|
39
|
-
end
|
40
|
-
|
41
|
-
def has_conditional_writes_for?(step)
|
42
|
-
all_affected_by(step).any? { |attr| !attr.wants_overwriting?(step) }
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
|
47
|
-
def find_or_initialize(attr_name)
|
48
|
-
self.attributes[attr_name] ||= Attribute.new(klass, attr_name)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
data/lib/data_miner/step.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
class Step
|
3
|
-
attr_accessor :configuration, :number, :options
|
4
|
-
delegate :klass, :to => :configuration
|
5
|
-
delegate :attributes, :to => :configuration
|
6
|
-
|
7
|
-
def initialize(configuration, number, options = {}, &block)
|
8
|
-
@configuration = configuration
|
9
|
-
@number = number
|
10
|
-
@options = options
|
11
|
-
yield self if block_given? # pull in attributes
|
12
|
-
attributes.affect_all_content_columns!(self, :except => options[:except]) if options[:affect_all] == :content_columns
|
13
|
-
affected_attributes.each { |attr| attr.options_for_step[self][:callback] = options[:callback] } if options[:callback]
|
14
|
-
all_attributes.each { |attr| attr.options_for_step[self][:name_in_source] = attr.name_in_source(self).upcase } if options[:headers] == :upcase # TODO remove
|
15
|
-
end
|
16
|
-
|
17
|
-
def variant
|
18
|
-
self.class.name.demodulize.underscore.to_sym
|
19
|
-
end
|
20
|
-
|
21
|
-
def awaiting?
|
22
|
-
!options[:awaiting].nil?
|
23
|
-
end
|
24
|
-
|
25
|
-
def inspect
|
26
|
-
"Step(#{klass} #{variant.to_s.camelcase} #{number})"
|
27
|
-
end
|
28
|
-
|
29
|
-
def signature
|
30
|
-
"#{klass} step #{number}: #{variant}"
|
31
|
-
end
|
32
|
-
|
33
|
-
def perform(options = {})
|
34
|
-
return if awaiting? and !options[:force]
|
35
|
-
affected_attributes.each { |attr| attr.perform self }
|
36
|
-
$stderr.puts "performed #{signature}"
|
37
|
-
end
|
38
|
-
|
39
|
-
def affected_attributes
|
40
|
-
@affected_attributes ||= attributes.all_affected_by self
|
41
|
-
end
|
42
|
-
|
43
|
-
def key_attributes
|
44
|
-
@key_attributes ||= attributes.all_keys_for self
|
45
|
-
end
|
46
|
-
|
47
|
-
def all_attributes
|
48
|
-
@all_attributes ||= attributes.all_for self
|
49
|
-
end
|
50
|
-
|
51
|
-
def key(attr_name, attr_options = {})
|
52
|
-
attributes.key! self, attr_name, attr_options
|
53
|
-
end
|
54
|
-
|
55
|
-
def affect(attr_name, attr_options = {})
|
56
|
-
attributes.affect! self, attr_name, attr_options
|
57
|
-
end
|
58
|
-
alias_method :store, :affect
|
59
|
-
|
60
|
-
def map_to_attrs(method)
|
61
|
-
affected_attributes.map { |attr| attr.send method, self }.compact
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
class Step
|
3
|
-
class Await < Step
|
4
|
-
attr_accessor :other_class
|
5
|
-
|
6
|
-
def initialize(configuration, number, options = {}, &block)
|
7
|
-
# doesn't call super
|
8
|
-
@configuration = configuration
|
9
|
-
@number = number
|
10
|
-
@options = options
|
11
|
-
@other_class = options.delete :other_class
|
12
|
-
configuration.awaiting! self
|
13
|
-
yield configuration # pull in steps
|
14
|
-
configuration.stop_awaiting!
|
15
|
-
end
|
16
|
-
|
17
|
-
def perform(*args)
|
18
|
-
other_class.data_mine.steps << Step::Callback.new(other_class.data_mine, self)
|
19
|
-
$stderr.puts "added #{signature} to callbacks after #{other_class}"
|
20
|
-
end
|
21
|
-
|
22
|
-
def callback
|
23
|
-
$stderr.puts "starting to perform deferred steps in #{signature}..."
|
24
|
-
all_awaiting.each { |step| step.perform :force => true }
|
25
|
-
$stderr.puts "...done"
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
def all_awaiting
|
31
|
-
configuration.steps.select { |step| step.options and step.options[:awaiting] == self }
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|