data_miner 0.4.30 → 0.4.31
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/data_miner.gemspec +3 -2
- data/lib/data_miner.rb +5 -0
- data/lib/data_miner/configuration.rb +7 -0
- data/lib/data_miner/schema.rb +217 -0
- data/test/data_miner_test.rb +56 -16
- data/test/test_helper.rb +1 -1
- metadata +4 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.31
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.31"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-19}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
"lib/data_miner/import.rb",
|
33
33
|
"lib/data_miner/process.rb",
|
34
34
|
"lib/data_miner/run.rb",
|
35
|
+
"lib/data_miner/schema.rb",
|
35
36
|
"lib/data_miner/tap.rb",
|
36
37
|
"test/data_miner_test.rb",
|
37
38
|
"test/test_helper.rb"
|
data/lib/data_miner.rb
CHANGED
@@ -25,6 +25,7 @@ require 'data_miner/import'
|
|
25
25
|
require 'data_miner/tap'
|
26
26
|
require 'data_miner/process'
|
27
27
|
require 'data_miner/run'
|
28
|
+
require 'data_miner/schema'
|
28
29
|
|
29
30
|
module DataMiner
|
30
31
|
class MissingHashColumn < RuntimeError; end
|
@@ -62,6 +63,10 @@ module DataMiner
|
|
62
63
|
logger.info "[data_miner gem] #{message}"
|
63
64
|
end
|
64
65
|
|
66
|
+
def self.log_debug(message)
|
67
|
+
logger.debug "[data_miner gem] #{message}"
|
68
|
+
end
|
69
|
+
|
65
70
|
def self.run(options = {})
|
66
71
|
DataMiner::Configuration.run options
|
67
72
|
end
|
@@ -11,6 +11,13 @@ module DataMiner
|
|
11
11
|
@attributes = HashWithIndifferentAccess.new
|
12
12
|
end
|
13
13
|
|
14
|
+
def schema(create_table_options = {}, &block)
|
15
|
+
step = DataMiner::Schema.new self, step_counter, create_table_options
|
16
|
+
Blockenspiel.invoke block, step
|
17
|
+
steps << step
|
18
|
+
self.step_counter += 1
|
19
|
+
end
|
20
|
+
|
14
21
|
def process(method_name_or_block_description, &block)
|
15
22
|
steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
|
16
23
|
self.step_counter += 1
|
@@ -0,0 +1,217 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Schema
|
3
|
+
include Blockenspiel::DSL
|
4
|
+
|
5
|
+
attr_reader :configuration
|
6
|
+
attr_reader :position_in_run
|
7
|
+
attr_reader :create_table_options
|
8
|
+
delegate :resource, :to => :configuration
|
9
|
+
|
10
|
+
def initialize(configuration, position_in_run, create_table_options)
|
11
|
+
@configuration = configuration
|
12
|
+
@position_in_run = position_in_run
|
13
|
+
@create_table_options = create_table_options
|
14
|
+
end
|
15
|
+
|
16
|
+
def connection
|
17
|
+
ActiveRecord::Base.connection
|
18
|
+
end
|
19
|
+
|
20
|
+
def table_name
|
21
|
+
resource.table_name
|
22
|
+
end
|
23
|
+
|
24
|
+
def ideal_table
|
25
|
+
@ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new(connection)
|
26
|
+
end
|
27
|
+
|
28
|
+
def ideal_indexes
|
29
|
+
@ideal_indexes ||= Array.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def actual_indexes
|
33
|
+
connection.indexes table_name
|
34
|
+
end
|
35
|
+
|
36
|
+
def description
|
37
|
+
"Define a table called #{table_name} with primary key #{primary_key}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def inspect
|
41
|
+
"Block(#{resource}): #{description}"
|
42
|
+
end
|
43
|
+
|
44
|
+
# lifted straight from activerecord-3.0.0.beta3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
|
45
|
+
%w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
|
46
|
+
class_eval <<-EOV
|
47
|
+
def #{column_type}(*args) # def string(*args)
|
48
|
+
options = args.extract_options! # options = args.extract_options!
|
49
|
+
column_names = args # column_names = args
|
50
|
+
#
|
51
|
+
column_names.each { |name| ideal_table.column(name, '#{column_type}', options) } # column_names.each { |name| ideal_table.column(name, 'string', options) }
|
52
|
+
end # end
|
53
|
+
EOV
|
54
|
+
end
|
55
|
+
def column(*args)
|
56
|
+
ideal_table.column(*args)
|
57
|
+
end
|
58
|
+
# class IndexDefinition < Struct.new(:table, :name, :unique, :columns)
|
59
|
+
def index(columns, options = {})
|
60
|
+
options.symbolize_keys!
|
61
|
+
columns = Array.wrap columns
|
62
|
+
name = connection.index_name table_name, options.merge(:columns => columns)
|
63
|
+
index_unique = options.has_key?(:unique) ? options[:unique] : true
|
64
|
+
ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
|
65
|
+
end
|
66
|
+
|
67
|
+
def primary_key
|
68
|
+
resource.primary_key.to_s
|
69
|
+
end
|
70
|
+
|
71
|
+
INDEX_PROPERTIES = %w{ name columns }
|
72
|
+
COLUMN_PROPERTIES = %w{ name type }
|
73
|
+
%w{ column index }.each do |i|
|
74
|
+
eval %{
|
75
|
+
def #{i}_needs_to_be_placed?(name)
|
76
|
+
actual = actual_#{i} name
|
77
|
+
return true unless actual
|
78
|
+
ideal = ideal_#{i} name
|
79
|
+
#{i.upcase}_PROPERTIES.any? do |property|
|
80
|
+
DataMiner.log_debug "...comparing \#{actual.send(property).inspect}.to_s <-> \#{ideal.send(property).inspect}.to_s"
|
81
|
+
actual.send(property).to_s != ideal.send(property).to_s
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def #{i}_needs_to_be_removed?(name)
|
86
|
+
ideal_#{i}(name).nil?
|
87
|
+
end
|
88
|
+
}
|
89
|
+
end
|
90
|
+
|
91
|
+
def ideal_column(name)
|
92
|
+
ideal_table[name.to_s]
|
93
|
+
end
|
94
|
+
|
95
|
+
def actual_column(name)
|
96
|
+
resource.columns_hash[name.to_s]
|
97
|
+
end
|
98
|
+
|
99
|
+
def ideal_index(name)
|
100
|
+
ideal_indexes.detect { |ideal| ideal.name == name.to_s }
|
101
|
+
end
|
102
|
+
|
103
|
+
def actual_index(name)
|
104
|
+
actual_indexes.detect { |actual| actual.name == name.to_s }
|
105
|
+
end
|
106
|
+
|
107
|
+
def place_column(name)
|
108
|
+
remove_column name if actual_column name
|
109
|
+
ideal = ideal_column name
|
110
|
+
DataMiner.log_info "adding column #{name}"
|
111
|
+
connection.add_column table_name, name, ideal.type.to_sym # symbol type!
|
112
|
+
resource.reset_column_information
|
113
|
+
end
|
114
|
+
|
115
|
+
def remove_column(name)
|
116
|
+
DataMiner.log_info "removing column #{name}"
|
117
|
+
connection.remove_column table_name, name
|
118
|
+
resource.reset_column_information
|
119
|
+
end
|
120
|
+
|
121
|
+
def place_index(name)
|
122
|
+
remove_index name if actual_index name
|
123
|
+
ideal = ideal_index name
|
124
|
+
DataMiner.log_info "adding index #{name}"
|
125
|
+
connection.add_index table_name, ideal.columns, :name => ideal.name
|
126
|
+
resource.reset_column_information
|
127
|
+
end
|
128
|
+
|
129
|
+
def remove_index(name)
|
130
|
+
DataMiner.log_info "removing index #{name}"
|
131
|
+
connection.remove_index table_name, :name => name
|
132
|
+
resource.reset_column_information
|
133
|
+
end
|
134
|
+
|
135
|
+
def run(run)
|
136
|
+
_add_extra_columns
|
137
|
+
_create_table
|
138
|
+
_set_primary_key
|
139
|
+
_remove_columns
|
140
|
+
_add_columns
|
141
|
+
_remove_indexes
|
142
|
+
_add_indexes
|
143
|
+
DataMiner.log_info "ran #{inspect}"
|
144
|
+
end
|
145
|
+
|
146
|
+
EXTRA_COLUMNS = {
|
147
|
+
:updated_at => :datetime,
|
148
|
+
:created_at => :datetime,
|
149
|
+
:data_miner_last_run_id => :integer,
|
150
|
+
:data_miner_touch_count => :integer
|
151
|
+
}
|
152
|
+
def _add_extra_columns
|
153
|
+
EXTRA_COLUMNS.each do |extra_name, extra_type|
|
154
|
+
send extra_type, extra_name unless ideal_column extra_name
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def _create_table
|
159
|
+
if not resource.table_exists?
|
160
|
+
DataMiner.log_info "creating table #{table_name} with #{create_table_options.inspect}"
|
161
|
+
connection.create_table table_name, create_table_options do |t|
|
162
|
+
t.integer :data_miner_placeholder
|
163
|
+
end
|
164
|
+
resource.reset_column_information
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# FIXME mysql only
|
169
|
+
def _set_primary_key
|
170
|
+
if column_needs_to_be_placed?(primary_key)
|
171
|
+
retries_allowed = 1
|
172
|
+
begin
|
173
|
+
place_column primary_key
|
174
|
+
DataMiner.log_info "adding primary key #{primary_key}"
|
175
|
+
connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{primary_key}`)"
|
176
|
+
rescue
|
177
|
+
if retries_allowed > 0 and $!.message =~ /primary/i
|
178
|
+
DataMiner.log_info "looks like primary key changed, re-creating table from scratch"
|
179
|
+
connection.drop_table table_name
|
180
|
+
resource.reset_column_information
|
181
|
+
_create_table
|
182
|
+
retries_allowed -= 1
|
183
|
+
retry
|
184
|
+
else
|
185
|
+
raise $!
|
186
|
+
end
|
187
|
+
end
|
188
|
+
resource.reset_column_information
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def _remove_columns
|
193
|
+
resource.columns_hash.values.each do |actual|
|
194
|
+
remove_column actual.name if column_needs_to_be_removed? actual.name
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def _add_columns
|
199
|
+
ideal_table.columns.each do |ideal|
|
200
|
+
place_column ideal.name if column_needs_to_be_placed? ideal.name
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def _remove_indexes
|
205
|
+
actual_indexes.each do |actual|
|
206
|
+
remove_index actual.name if index_needs_to_be_removed? actual.name
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def _add_indexes
|
211
|
+
ideal_indexes.each do |ideal|
|
212
|
+
next if ideal.name == primary_key # this should already have been taken care of
|
213
|
+
place_index ideal.name if index_needs_to_be_placed? ideal.name
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
data/test/data_miner_test.rb
CHANGED
@@ -1091,22 +1091,19 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
|
|
1091
1091
|
set_primary_key :name
|
1092
1092
|
|
1093
1093
|
data_miner do
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
t.integer 'data_miner_last_run_id'
|
1108
|
-
end
|
1109
|
-
execute 'ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (name)'
|
1094
|
+
schema :id => false do
|
1095
|
+
string "name"
|
1096
|
+
string "make_name"
|
1097
|
+
string "fleet"
|
1098
|
+
integer "year"
|
1099
|
+
float "fuel_efficiency"
|
1100
|
+
string "fuel_efficiency_units"
|
1101
|
+
integer "volume"
|
1102
|
+
string "make_year_name"
|
1103
|
+
datetime "created_at"
|
1104
|
+
datetime "updated_at"
|
1105
|
+
integer 'data_miner_touch_count'
|
1106
|
+
integer 'data_miner_last_run_id'
|
1110
1107
|
end
|
1111
1108
|
|
1112
1109
|
# CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
|
@@ -1123,12 +1120,55 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
|
|
1123
1120
|
end
|
1124
1121
|
end
|
1125
1122
|
|
1123
|
+
class CensusDivisionTrois < ActiveRecord::Base
|
1124
|
+
set_primary_key :number_code
|
1125
|
+
data_miner do
|
1126
|
+
schema :options => 'ENGINE=InnoDB default charset=utf8', :id => false do
|
1127
|
+
string 'number_code'
|
1128
|
+
string 'name'
|
1129
|
+
string 'census_region_name'
|
1130
|
+
integer 'census_region_number'
|
1131
|
+
index 'census_region_name', :name => 'homefry'
|
1132
|
+
end
|
1133
|
+
end
|
1134
|
+
end
|
1135
|
+
|
1126
1136
|
# todo: have somebody properly organize these
|
1127
1137
|
class DataMinerTest < Test::Unit::TestCase
|
1128
1138
|
if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
|
1129
1139
|
end
|
1130
1140
|
|
1131
1141
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
1142
|
+
should "eagerly enforce a schema" do
|
1143
|
+
ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
1144
|
+
t.string 'name'
|
1145
|
+
# t.datetime 'updated_at'
|
1146
|
+
# t.datetime 'created_at'
|
1147
|
+
t.string 'census_region_name'
|
1148
|
+
# t.integer 'census_region_number'
|
1149
|
+
# t.integer 'data_miner_touch_count'
|
1150
|
+
# t.integer 'data_miner_last_run_id'
|
1151
|
+
end
|
1152
|
+
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
|
1153
|
+
CensusDivisionTrois.reset_column_information
|
1154
|
+
missing_columns = %w{ updated_at created_at census_region_number data_miner_last_run_id data_miner_touch_count }
|
1155
|
+
|
1156
|
+
# sanity check
|
1157
|
+
missing_columns.each do |column|
|
1158
|
+
assert_equal false, CensusDivisionTrois.column_names.include?(column)
|
1159
|
+
end
|
1160
|
+
assert_equal false, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
1161
|
+
|
1162
|
+
3.times do
|
1163
|
+
CensusDivisionTrois.run_data_miner!
|
1164
|
+
missing_columns.each do |column|
|
1165
|
+
assert_equal true, CensusDivisionTrois.column_names.include?(column)
|
1166
|
+
end
|
1167
|
+
assert_equal true, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
1168
|
+
assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
|
1169
|
+
end
|
1170
|
+
end
|
1171
|
+
|
1132
1172
|
should "allow specifying dictionaries explicitly" do
|
1133
1173
|
CensusDivisionDeux.run_data_miner!
|
1134
1174
|
assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
|
data/test/test_helper.rb
CHANGED
@@ -15,7 +15,7 @@ ActiveRecord::Base.establish_connection(
|
|
15
15
|
)
|
16
16
|
|
17
17
|
ActiveSupport::Inflector.inflections do |inflect|
|
18
|
-
inflect.uncountable %w{ aircraft aircraft_deux census_division_deux }
|
18
|
+
inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
|
19
19
|
end
|
20
20
|
|
21
21
|
class Test::Unit::TestCase
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 31
|
9
|
+
version: 0.4.31
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-19 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -183,6 +183,7 @@ files:
|
|
183
183
|
- lib/data_miner/import.rb
|
184
184
|
- lib/data_miner/process.rb
|
185
185
|
- lib/data_miner/run.rb
|
186
|
+
- lib/data_miner/schema.rb
|
186
187
|
- lib/data_miner/tap.rb
|
187
188
|
- test/data_miner_test.rb
|
188
189
|
- test/test_helper.rb
|