data_miner 0.2.6 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/CHANGELOG +5 -0
- data/README.rdoc +11 -15
- data/Rakefile +7 -2
- data/VERSION +1 -1
- data/data_miner.gemspec +27 -28
- data/lib/data_miner.rb +50 -27
- data/lib/data_miner/attribute.rb +157 -240
- data/lib/data_miner/configuration.rb +58 -55
- data/lib/data_miner/import.rb +57 -0
- data/lib/data_miner/process.rb +21 -0
- data/lib/data_miner/run.rb +7 -0
- data/lib/data_miner/target.rb +7 -0
- data/test/data_miner_test.rb +644 -48
- data/test/test_helper.rb +134 -3
- metadata +29 -23
- data/lib/data_miner/active_record_ext.rb +0 -25
- data/lib/data_miner/attribute_collection.rb +0 -51
- data/lib/data_miner/step.rb +0 -64
- data/lib/data_miner/step/associate.rb +0 -9
- data/lib/data_miner/step/await.rb +0 -35
- data/lib/data_miner/step/callback.rb +0 -22
- data/lib/data_miner/step/derive.rb +0 -9
- data/lib/data_miner/step/import.rb +0 -57
data/test/test_helper.rb
CHANGED
@@ -1,16 +1,147 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'test/unit'
|
3
3
|
require 'shoulda'
|
4
|
-
require '
|
4
|
+
require 'ruby-debug'
|
5
5
|
|
6
6
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
7
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
8
8
|
require 'data_miner'
|
9
9
|
|
10
10
|
ActiveRecord::Base.establish_connection(
|
11
|
-
'adapter' => '
|
12
|
-
'database' => '
|
11
|
+
'adapter' => 'mysql',
|
12
|
+
'database' => 'data_miner_test',
|
13
|
+
'username' => 'root',
|
14
|
+
'password' => ''
|
13
15
|
)
|
14
16
|
|
15
17
|
class Test::Unit::TestCase
|
16
18
|
end
|
19
|
+
|
20
|
+
ActiveRecord::Schema.define(:version => 20090819143429) do
|
21
|
+
create_table "airports", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
22
|
+
t.string "country_id"
|
23
|
+
|
24
|
+
t.string "iata_code"
|
25
|
+
t.string "name"
|
26
|
+
t.string "city"
|
27
|
+
t.string "country_name"
|
28
|
+
t.float "latitude"
|
29
|
+
t.float "longitude"
|
30
|
+
t.datetime "created_at"
|
31
|
+
t.datetime "updated_at"
|
32
|
+
end
|
33
|
+
execute "ALTER TABLE airports ADD PRIMARY KEY (iata_code);"
|
34
|
+
|
35
|
+
create_table "countries", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
36
|
+
t.string "iso_3166"
|
37
|
+
t.string "name"
|
38
|
+
t.datetime "created_at"
|
39
|
+
t.datetime "updated_at"
|
40
|
+
end
|
41
|
+
execute "ALTER TABLE countries ADD PRIMARY KEY (iso_3166);"
|
42
|
+
|
43
|
+
create_table "census_regions", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
44
|
+
t.string "name"
|
45
|
+
t.datetime "updated_at"
|
46
|
+
t.datetime "created_at"
|
47
|
+
t.integer "number"
|
48
|
+
end
|
49
|
+
execute "ALTER TABLE census_regions ADD PRIMARY KEY (number);"
|
50
|
+
|
51
|
+
create_table "automobile_variants", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
52
|
+
t.string "automobile_make_id"
|
53
|
+
t.string "automobile_model_id"
|
54
|
+
t.string "automobile_model_year_id"
|
55
|
+
t.string "automobile_fuel_type_id"
|
56
|
+
|
57
|
+
t.float "fuel_efficiency_city"
|
58
|
+
t.float "fuel_efficiency_highway"
|
59
|
+
t.string "make_name"
|
60
|
+
t.string "model_name"
|
61
|
+
t.string "year"
|
62
|
+
t.string "fuel_type_code"
|
63
|
+
t.datetime "updated_at"
|
64
|
+
t.datetime "created_at"
|
65
|
+
t.string "transmission"
|
66
|
+
t.string "drive"
|
67
|
+
t.boolean "turbo"
|
68
|
+
t.boolean "supercharger"
|
69
|
+
t.integer "cylinders"
|
70
|
+
t.float "displacement"
|
71
|
+
t.float "raw_fuel_efficiency_city"
|
72
|
+
t.float "raw_fuel_efficiency_highway"
|
73
|
+
t.integer "carline_mfr_code"
|
74
|
+
t.integer "vi_mfr_code"
|
75
|
+
t.integer "carline_code"
|
76
|
+
t.integer "carline_class_code"
|
77
|
+
t.boolean "injection"
|
78
|
+
t.string "carline_class_name"
|
79
|
+
t.string "speeds"
|
80
|
+
t.string "row_hash"
|
81
|
+
end
|
82
|
+
execute "ALTER TABLE automobile_variants ADD PRIMARY KEY (row_hash);"
|
83
|
+
|
84
|
+
create_table "automobile_fuel_types", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
85
|
+
t.string "name"
|
86
|
+
t.datetime "created_at"
|
87
|
+
t.datetime "updated_at"
|
88
|
+
t.float "emission_factor"
|
89
|
+
t.float "annual_distance"
|
90
|
+
t.string "code"
|
91
|
+
end
|
92
|
+
execute "ALTER TABLE automobile_fuel_types ADD PRIMARY KEY (code);"
|
93
|
+
|
94
|
+
create_table "automobile_make_fleet_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
95
|
+
t.string "automobile_make_id"
|
96
|
+
t.string "automobile_model_year_id"
|
97
|
+
t.integer "automobile_make_year_id"
|
98
|
+
|
99
|
+
t.string "fleet"
|
100
|
+
t.string "make_name"
|
101
|
+
t.string "year"
|
102
|
+
t.float "fuel_efficiency"
|
103
|
+
t.integer "volume"
|
104
|
+
t.datetime "created_at"
|
105
|
+
t.datetime "updated_at"
|
106
|
+
|
107
|
+
t.string "row_hash"
|
108
|
+
end
|
109
|
+
execute "ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (row_hash);"
|
110
|
+
|
111
|
+
create_table "automobile_make_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
112
|
+
t.integer "automobile_make_id" # user-defined
|
113
|
+
t.integer "automobile_model_year_id" # user-defined
|
114
|
+
t.datetime "created_at"
|
115
|
+
t.datetime "updated_at"
|
116
|
+
t.float "fuel_efficiency"
|
117
|
+
t.integer "volume"
|
118
|
+
t.string "row_hash"
|
119
|
+
end
|
120
|
+
execute "ALTER TABLE automobile_make_years ADD PRIMARY KEY (row_hash);"
|
121
|
+
|
122
|
+
create_table "automobile_makes", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
123
|
+
t.string "name"
|
124
|
+
t.datetime "updated_at"
|
125
|
+
t.datetime "created_at"
|
126
|
+
t.float "fuel_efficiency"
|
127
|
+
t.boolean "major"
|
128
|
+
end
|
129
|
+
execute "ALTER TABLE automobile_makes ADD PRIMARY KEY (name);"
|
130
|
+
|
131
|
+
create_table "automobile_model_years", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
132
|
+
t.integer "year"
|
133
|
+
t.datetime "updated_at"
|
134
|
+
t.datetime "created_at"
|
135
|
+
t.float "fuel_efficiency"
|
136
|
+
end
|
137
|
+
execute "ALTER TABLE automobile_model_years ADD PRIMARY KEY (year);"
|
138
|
+
|
139
|
+
create_table "automobile_models", :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
140
|
+
t.string "name"
|
141
|
+
t.string "automobile_make_id"
|
142
|
+
t.datetime "updated_at"
|
143
|
+
t.datetime "created_at"
|
144
|
+
t.string "row_hash"
|
145
|
+
end
|
146
|
+
execute "ALTER TABLE automobile_models ADD PRIMARY KEY (row_hash);"
|
147
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_miner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Seamus Abshere
|
@@ -10,18 +10,28 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date:
|
13
|
+
date: 2010-02-25 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: remote_table
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.2.1
|
25
|
+
version:
|
16
26
|
- !ruby/object:Gem::Dependency
|
17
27
|
name: activerecord
|
18
28
|
type: :runtime
|
19
29
|
version_requirement:
|
20
30
|
version_requirements: !ruby/object:Gem::Requirement
|
21
31
|
requirements:
|
22
|
-
- -
|
32
|
+
- - ~>
|
23
33
|
- !ruby/object:Gem::Version
|
24
|
-
version:
|
34
|
+
version: 2.3.4
|
25
35
|
version:
|
26
36
|
- !ruby/object:Gem::Dependency
|
27
37
|
name: activesupport
|
@@ -29,9 +39,9 @@ dependencies:
|
|
29
39
|
version_requirement:
|
30
40
|
version_requirements: !ruby/object:Gem::Requirement
|
31
41
|
requirements:
|
32
|
-
- -
|
42
|
+
- - ~>
|
33
43
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
44
|
+
version: 2.3.4
|
35
45
|
version:
|
36
46
|
- !ruby/object:Gem::Dependency
|
37
47
|
name: andand
|
@@ -39,9 +49,9 @@ dependencies:
|
|
39
49
|
version_requirement:
|
40
50
|
version_requirements: !ruby/object:Gem::Requirement
|
41
51
|
requirements:
|
42
|
-
- -
|
52
|
+
- - ~>
|
43
53
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
54
|
+
version: 1.3.1
|
45
55
|
version:
|
46
56
|
- !ruby/object:Gem::Dependency
|
47
57
|
name: errata
|
@@ -49,9 +59,9 @@ dependencies:
|
|
49
59
|
version_requirement:
|
50
60
|
version_requirements: !ruby/object:Gem::Requirement
|
51
61
|
requirements:
|
52
|
-
- -
|
62
|
+
- - ~>
|
53
63
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
64
|
+
version: 0.1.4
|
55
65
|
version:
|
56
66
|
- !ruby/object:Gem::Dependency
|
57
67
|
name: conversions
|
@@ -59,19 +69,19 @@ dependencies:
|
|
59
69
|
version_requirement:
|
60
70
|
version_requirements: !ruby/object:Gem::Requirement
|
61
71
|
requirements:
|
62
|
-
- -
|
72
|
+
- - ~>
|
63
73
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
74
|
+
version: 1.4.3
|
65
75
|
version:
|
66
76
|
- !ruby/object:Gem::Dependency
|
67
|
-
name:
|
77
|
+
name: blockenspiel
|
68
78
|
type: :runtime
|
69
79
|
version_requirement:
|
70
80
|
version_requirements: !ruby/object:Gem::Requirement
|
71
81
|
requirements:
|
72
|
-
- -
|
82
|
+
- - ~>
|
73
83
|
- !ruby/object:Gem::Version
|
74
|
-
version: 0.
|
84
|
+
version: 0.3.2
|
75
85
|
version:
|
76
86
|
description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
|
77
87
|
email: seamus@abshere.net
|
@@ -92,17 +102,13 @@ files:
|
|
92
102
|
- VERSION
|
93
103
|
- data_miner.gemspec
|
94
104
|
- lib/data_miner.rb
|
95
|
-
- lib/data_miner/active_record_ext.rb
|
96
105
|
- lib/data_miner/attribute.rb
|
97
|
-
- lib/data_miner/attribute_collection.rb
|
98
106
|
- lib/data_miner/configuration.rb
|
99
107
|
- lib/data_miner/dictionary.rb
|
100
|
-
- lib/data_miner/
|
101
|
-
- lib/data_miner/
|
102
|
-
- lib/data_miner/
|
103
|
-
- lib/data_miner/
|
104
|
-
- lib/data_miner/step/derive.rb
|
105
|
-
- lib/data_miner/step/import.rb
|
108
|
+
- lib/data_miner/import.rb
|
109
|
+
- lib/data_miner/process.rb
|
110
|
+
- lib/data_miner/run.rb
|
111
|
+
- lib/data_miner/target.rb
|
106
112
|
- lib/data_miner/william_james_cartesian_product.rb
|
107
113
|
- test/data_miner_test.rb
|
108
114
|
- test/test_helper.rb
|
@@ -1,25 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
module ActiveRecordExt
|
3
|
-
def self.included(klass)
|
4
|
-
klass.extend(ClassMethods)
|
5
|
-
end
|
6
|
-
|
7
|
-
module ClassMethods
|
8
|
-
def mine_data(options = {}, &block)
|
9
|
-
if defined?(NO_DATA_MINER) and NO_DATA_MINER == true
|
10
|
-
class_eval do
|
11
|
-
class << self
|
12
|
-
def data_mine
|
13
|
-
raise "NO_DATA_MINER is set to true, so data_mine is not available"
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
else
|
18
|
-
class_eval { cattr_accessor :data_mine }
|
19
|
-
self.data_mine = Configuration.new(self)
|
20
|
-
yield data_mine
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
class AttributeCollection
|
3
|
-
attr_accessor :klass, :attributes
|
4
|
-
|
5
|
-
def initialize(klass)
|
6
|
-
@klass = klass
|
7
|
-
@attributes = {}
|
8
|
-
end
|
9
|
-
|
10
|
-
def key!(step, attr_name, attr_options = {})
|
11
|
-
find_or_initialize(attr_name).key_for!(step, attr_options)
|
12
|
-
end
|
13
|
-
|
14
|
-
def affect!(step, attr_name, attr_options = {})
|
15
|
-
find_or_initialize(attr_name).affected_by!(step, attr_options)
|
16
|
-
end
|
17
|
-
|
18
|
-
def affect_all_content_columns!(step, options = {})
|
19
|
-
except = Array.wrap(options[:except]).map(&:to_sym)
|
20
|
-
step.klass.content_columns.map(&:name).reject { |content_column| except.include?(content_column.to_sym) }.each do |content_column|
|
21
|
-
find_or_initialize(content_column).affected_by!(step)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def all_affected_by(step)
|
26
|
-
attributes.values.select { |attr| attr.affected_by?(step) }
|
27
|
-
end
|
28
|
-
|
29
|
-
def all_keys_for(step)
|
30
|
-
attributes.values.select { |attr| attr.key_for?(step) }
|
31
|
-
end
|
32
|
-
|
33
|
-
def all_for(step)
|
34
|
-
(all_affected_by(step) + all_keys_for(step)).uniq
|
35
|
-
end
|
36
|
-
|
37
|
-
def has_keys_for?(step)
|
38
|
-
attributes.values.any? { |attr| attr.key_for?(step) }
|
39
|
-
end
|
40
|
-
|
41
|
-
def has_conditional_writes_for?(step)
|
42
|
-
all_affected_by(step).any? { |attr| !attr.wants_overwriting?(step) }
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
|
47
|
-
def find_or_initialize(attr_name)
|
48
|
-
self.attributes[attr_name] ||= Attribute.new(klass, attr_name)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
data/lib/data_miner/step.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
class Step
|
3
|
-
attr_accessor :configuration, :number, :options
|
4
|
-
delegate :klass, :to => :configuration
|
5
|
-
delegate :attributes, :to => :configuration
|
6
|
-
|
7
|
-
def initialize(configuration, number, options = {}, &block)
|
8
|
-
@configuration = configuration
|
9
|
-
@number = number
|
10
|
-
@options = options
|
11
|
-
yield self if block_given? # pull in attributes
|
12
|
-
attributes.affect_all_content_columns!(self, :except => options[:except]) if options[:affect_all] == :content_columns
|
13
|
-
affected_attributes.each { |attr| attr.options_for_step[self][:callback] = options[:callback] } if options[:callback]
|
14
|
-
all_attributes.each { |attr| attr.options_for_step[self][:name_in_source] = attr.name_in_source(self).upcase } if options[:headers] == :upcase # TODO remove
|
15
|
-
end
|
16
|
-
|
17
|
-
def variant
|
18
|
-
self.class.name.demodulize.underscore.to_sym
|
19
|
-
end
|
20
|
-
|
21
|
-
def awaiting?
|
22
|
-
!options[:awaiting].nil?
|
23
|
-
end
|
24
|
-
|
25
|
-
def inspect
|
26
|
-
"Step(#{klass} #{variant.to_s.camelcase} #{number})"
|
27
|
-
end
|
28
|
-
|
29
|
-
def signature
|
30
|
-
"#{klass} step #{number}: #{variant}"
|
31
|
-
end
|
32
|
-
|
33
|
-
def perform(options = {})
|
34
|
-
return if awaiting? and !options[:force]
|
35
|
-
affected_attributes.each { |attr| attr.perform self }
|
36
|
-
$stderr.puts "performed #{signature}"
|
37
|
-
end
|
38
|
-
|
39
|
-
def affected_attributes
|
40
|
-
@affected_attributes ||= attributes.all_affected_by self
|
41
|
-
end
|
42
|
-
|
43
|
-
def key_attributes
|
44
|
-
@key_attributes ||= attributes.all_keys_for self
|
45
|
-
end
|
46
|
-
|
47
|
-
def all_attributes
|
48
|
-
@all_attributes ||= attributes.all_for self
|
49
|
-
end
|
50
|
-
|
51
|
-
def key(attr_name, attr_options = {})
|
52
|
-
attributes.key! self, attr_name, attr_options
|
53
|
-
end
|
54
|
-
|
55
|
-
def affect(attr_name, attr_options = {})
|
56
|
-
attributes.affect! self, attr_name, attr_options
|
57
|
-
end
|
58
|
-
alias_method :store, :affect
|
59
|
-
|
60
|
-
def map_to_attrs(method)
|
61
|
-
affected_attributes.map { |attr| attr.send method, self }.compact
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
module DataMiner
|
2
|
-
class Step
|
3
|
-
class Await < Step
|
4
|
-
attr_accessor :other_class
|
5
|
-
|
6
|
-
def initialize(configuration, number, options = {}, &block)
|
7
|
-
# doesn't call super
|
8
|
-
@configuration = configuration
|
9
|
-
@number = number
|
10
|
-
@options = options
|
11
|
-
@other_class = options.delete :other_class
|
12
|
-
configuration.awaiting! self
|
13
|
-
yield configuration # pull in steps
|
14
|
-
configuration.stop_awaiting!
|
15
|
-
end
|
16
|
-
|
17
|
-
def perform(*args)
|
18
|
-
other_class.data_mine.steps << Step::Callback.new(other_class.data_mine, self)
|
19
|
-
$stderr.puts "added #{signature} to callbacks after #{other_class}"
|
20
|
-
end
|
21
|
-
|
22
|
-
def callback
|
23
|
-
$stderr.puts "starting to perform deferred steps in #{signature}..."
|
24
|
-
all_awaiting.each { |step| step.perform :force => true }
|
25
|
-
$stderr.puts "...done"
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
def all_awaiting
|
31
|
-
configuration.steps.select { |step| step.options and step.options[:awaiting] == self }
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|