data_miner 0.4.30 → 0.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/data_miner.gemspec +3 -2
- data/lib/data_miner.rb +5 -0
- data/lib/data_miner/configuration.rb +7 -0
- data/lib/data_miner/schema.rb +217 -0
- data/test/data_miner_test.rb +56 -16
- data/test/test_helper.rb +1 -1
- metadata +4 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.31
|
data/data_miner.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.31"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-19}
|
13
13
|
s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
"lib/data_miner/import.rb",
|
33
33
|
"lib/data_miner/process.rb",
|
34
34
|
"lib/data_miner/run.rb",
|
35
|
+
"lib/data_miner/schema.rb",
|
35
36
|
"lib/data_miner/tap.rb",
|
36
37
|
"test/data_miner_test.rb",
|
37
38
|
"test/test_helper.rb"
|
data/lib/data_miner.rb
CHANGED
@@ -25,6 +25,7 @@ require 'data_miner/import'
|
|
25
25
|
require 'data_miner/tap'
|
26
26
|
require 'data_miner/process'
|
27
27
|
require 'data_miner/run'
|
28
|
+
require 'data_miner/schema'
|
28
29
|
|
29
30
|
module DataMiner
|
30
31
|
class MissingHashColumn < RuntimeError; end
|
@@ -62,6 +63,10 @@ module DataMiner
|
|
62
63
|
logger.info "[data_miner gem] #{message}"
|
63
64
|
end
|
64
65
|
|
66
|
+
def self.log_debug(message)
|
67
|
+
logger.debug "[data_miner gem] #{message}"
|
68
|
+
end
|
69
|
+
|
65
70
|
def self.run(options = {})
|
66
71
|
DataMiner::Configuration.run options
|
67
72
|
end
|
@@ -11,6 +11,13 @@ module DataMiner
|
|
11
11
|
@attributes = HashWithIndifferentAccess.new
|
12
12
|
end
|
13
13
|
|
14
|
+
def schema(create_table_options = {}, &block)
|
15
|
+
step = DataMiner::Schema.new self, step_counter, create_table_options
|
16
|
+
Blockenspiel.invoke block, step
|
17
|
+
steps << step
|
18
|
+
self.step_counter += 1
|
19
|
+
end
|
20
|
+
|
14
21
|
def process(method_name_or_block_description, &block)
|
15
22
|
steps << DataMiner::Process.new(self, step_counter, method_name_or_block_description, &block)
|
16
23
|
self.step_counter += 1
|
@@ -0,0 +1,217 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Schema
|
3
|
+
include Blockenspiel::DSL
|
4
|
+
|
5
|
+
attr_reader :configuration
|
6
|
+
attr_reader :position_in_run
|
7
|
+
attr_reader :create_table_options
|
8
|
+
delegate :resource, :to => :configuration
|
9
|
+
|
10
|
+
def initialize(configuration, position_in_run, create_table_options)
|
11
|
+
@configuration = configuration
|
12
|
+
@position_in_run = position_in_run
|
13
|
+
@create_table_options = create_table_options
|
14
|
+
end
|
15
|
+
|
16
|
+
def connection
|
17
|
+
ActiveRecord::Base.connection
|
18
|
+
end
|
19
|
+
|
20
|
+
def table_name
|
21
|
+
resource.table_name
|
22
|
+
end
|
23
|
+
|
24
|
+
def ideal_table
|
25
|
+
@ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new(connection)
|
26
|
+
end
|
27
|
+
|
28
|
+
def ideal_indexes
|
29
|
+
@ideal_indexes ||= Array.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def actual_indexes
|
33
|
+
connection.indexes table_name
|
34
|
+
end
|
35
|
+
|
36
|
+
def description
|
37
|
+
"Define a table called #{table_name} with primary key #{primary_key}"
|
38
|
+
end
|
39
|
+
|
40
|
+
def inspect
|
41
|
+
"Block(#{resource}): #{description}"
|
42
|
+
end
|
43
|
+
|
44
|
+
# lifted straight from activerecord-3.0.0.beta3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
|
45
|
+
%w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
|
46
|
+
class_eval <<-EOV
|
47
|
+
def #{column_type}(*args) # def string(*args)
|
48
|
+
options = args.extract_options! # options = args.extract_options!
|
49
|
+
column_names = args # column_names = args
|
50
|
+
#
|
51
|
+
column_names.each { |name| ideal_table.column(name, '#{column_type}', options) } # column_names.each { |name| ideal_table.column(name, 'string', options) }
|
52
|
+
end # end
|
53
|
+
EOV
|
54
|
+
end
|
55
|
+
def column(*args)
|
56
|
+
ideal_table.column(*args)
|
57
|
+
end
|
58
|
+
# class IndexDefinition < Struct.new(:table, :name, :unique, :columns)
|
59
|
+
def index(columns, options = {})
|
60
|
+
options.symbolize_keys!
|
61
|
+
columns = Array.wrap columns
|
62
|
+
name = connection.index_name table_name, options.merge(:columns => columns)
|
63
|
+
index_unique = options.has_key?(:unique) ? options[:unique] : true
|
64
|
+
ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
|
65
|
+
end
|
66
|
+
|
67
|
+
def primary_key
|
68
|
+
resource.primary_key.to_s
|
69
|
+
end
|
70
|
+
|
71
|
+
INDEX_PROPERTIES = %w{ name columns }
|
72
|
+
COLUMN_PROPERTIES = %w{ name type }
|
73
|
+
%w{ column index }.each do |i|
|
74
|
+
eval %{
|
75
|
+
def #{i}_needs_to_be_placed?(name)
|
76
|
+
actual = actual_#{i} name
|
77
|
+
return true unless actual
|
78
|
+
ideal = ideal_#{i} name
|
79
|
+
#{i.upcase}_PROPERTIES.any? do |property|
|
80
|
+
DataMiner.log_debug "...comparing \#{actual.send(property).inspect}.to_s <-> \#{ideal.send(property).inspect}.to_s"
|
81
|
+
actual.send(property).to_s != ideal.send(property).to_s
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def #{i}_needs_to_be_removed?(name)
|
86
|
+
ideal_#{i}(name).nil?
|
87
|
+
end
|
88
|
+
}
|
89
|
+
end
|
90
|
+
|
91
|
+
def ideal_column(name)
|
92
|
+
ideal_table[name.to_s]
|
93
|
+
end
|
94
|
+
|
95
|
+
def actual_column(name)
|
96
|
+
resource.columns_hash[name.to_s]
|
97
|
+
end
|
98
|
+
|
99
|
+
def ideal_index(name)
|
100
|
+
ideal_indexes.detect { |ideal| ideal.name == name.to_s }
|
101
|
+
end
|
102
|
+
|
103
|
+
def actual_index(name)
|
104
|
+
actual_indexes.detect { |actual| actual.name == name.to_s }
|
105
|
+
end
|
106
|
+
|
107
|
+
def place_column(name)
|
108
|
+
remove_column name if actual_column name
|
109
|
+
ideal = ideal_column name
|
110
|
+
DataMiner.log_info "adding column #{name}"
|
111
|
+
connection.add_column table_name, name, ideal.type.to_sym # symbol type!
|
112
|
+
resource.reset_column_information
|
113
|
+
end
|
114
|
+
|
115
|
+
def remove_column(name)
|
116
|
+
DataMiner.log_info "removing column #{name}"
|
117
|
+
connection.remove_column table_name, name
|
118
|
+
resource.reset_column_information
|
119
|
+
end
|
120
|
+
|
121
|
+
def place_index(name)
|
122
|
+
remove_index name if actual_index name
|
123
|
+
ideal = ideal_index name
|
124
|
+
DataMiner.log_info "adding index #{name}"
|
125
|
+
connection.add_index table_name, ideal.columns, :name => ideal.name
|
126
|
+
resource.reset_column_information
|
127
|
+
end
|
128
|
+
|
129
|
+
def remove_index(name)
|
130
|
+
DataMiner.log_info "removing index #{name}"
|
131
|
+
connection.remove_index table_name, :name => name
|
132
|
+
resource.reset_column_information
|
133
|
+
end
|
134
|
+
|
135
|
+
def run(run)
|
136
|
+
_add_extra_columns
|
137
|
+
_create_table
|
138
|
+
_set_primary_key
|
139
|
+
_remove_columns
|
140
|
+
_add_columns
|
141
|
+
_remove_indexes
|
142
|
+
_add_indexes
|
143
|
+
DataMiner.log_info "ran #{inspect}"
|
144
|
+
end
|
145
|
+
|
146
|
+
EXTRA_COLUMNS = {
|
147
|
+
:updated_at => :datetime,
|
148
|
+
:created_at => :datetime,
|
149
|
+
:data_miner_last_run_id => :integer,
|
150
|
+
:data_miner_touch_count => :integer
|
151
|
+
}
|
152
|
+
def _add_extra_columns
|
153
|
+
EXTRA_COLUMNS.each do |extra_name, extra_type|
|
154
|
+
send extra_type, extra_name unless ideal_column extra_name
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def _create_table
|
159
|
+
if not resource.table_exists?
|
160
|
+
DataMiner.log_info "creating table #{table_name} with #{create_table_options.inspect}"
|
161
|
+
connection.create_table table_name, create_table_options do |t|
|
162
|
+
t.integer :data_miner_placeholder
|
163
|
+
end
|
164
|
+
resource.reset_column_information
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# FIXME mysql only
|
169
|
+
def _set_primary_key
|
170
|
+
if column_needs_to_be_placed?(primary_key)
|
171
|
+
retries_allowed = 1
|
172
|
+
begin
|
173
|
+
place_column primary_key
|
174
|
+
DataMiner.log_info "adding primary key #{primary_key}"
|
175
|
+
connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{primary_key}`)"
|
176
|
+
rescue
|
177
|
+
if retries_allowed > 0 and $!.message =~ /primary/i
|
178
|
+
DataMiner.log_info "looks like primary key changed, re-creating table from scratch"
|
179
|
+
connection.drop_table table_name
|
180
|
+
resource.reset_column_information
|
181
|
+
_create_table
|
182
|
+
retries_allowed -= 1
|
183
|
+
retry
|
184
|
+
else
|
185
|
+
raise $!
|
186
|
+
end
|
187
|
+
end
|
188
|
+
resource.reset_column_information
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def _remove_columns
|
193
|
+
resource.columns_hash.values.each do |actual|
|
194
|
+
remove_column actual.name if column_needs_to_be_removed? actual.name
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def _add_columns
|
199
|
+
ideal_table.columns.each do |ideal|
|
200
|
+
place_column ideal.name if column_needs_to_be_placed? ideal.name
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def _remove_indexes
|
205
|
+
actual_indexes.each do |actual|
|
206
|
+
remove_index actual.name if index_needs_to_be_removed? actual.name
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def _add_indexes
|
211
|
+
ideal_indexes.each do |ideal|
|
212
|
+
next if ideal.name == primary_key # this should already have been taken care of
|
213
|
+
place_index ideal.name if index_needs_to_be_placed? ideal.name
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
data/test/data_miner_test.rb
CHANGED
@@ -1091,22 +1091,19 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
|
|
1091
1091
|
set_primary_key :name
|
1092
1092
|
|
1093
1093
|
data_miner do
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
t.integer 'data_miner_last_run_id'
|
1108
|
-
end
|
1109
|
-
execute 'ALTER TABLE automobile_make_fleet_years ADD PRIMARY KEY (name)'
|
1094
|
+
schema :id => false do
|
1095
|
+
string "name"
|
1096
|
+
string "make_name"
|
1097
|
+
string "fleet"
|
1098
|
+
integer "year"
|
1099
|
+
float "fuel_efficiency"
|
1100
|
+
string "fuel_efficiency_units"
|
1101
|
+
integer "volume"
|
1102
|
+
string "make_year_name"
|
1103
|
+
datetime "created_at"
|
1104
|
+
datetime "updated_at"
|
1105
|
+
integer 'data_miner_touch_count'
|
1106
|
+
integer 'data_miner_last_run_id'
|
1110
1107
|
end
|
1111
1108
|
|
1112
1109
|
# CAFE data privately emailed to Andy from Terry Anderson at the DOT/NHTSA
|
@@ -1123,12 +1120,55 @@ class AutomobileMakeFleetYear < ActiveRecord::Base
|
|
1123
1120
|
end
|
1124
1121
|
end
|
1125
1122
|
|
1123
|
+
class CensusDivisionTrois < ActiveRecord::Base
|
1124
|
+
set_primary_key :number_code
|
1125
|
+
data_miner do
|
1126
|
+
schema :options => 'ENGINE=InnoDB default charset=utf8', :id => false do
|
1127
|
+
string 'number_code'
|
1128
|
+
string 'name'
|
1129
|
+
string 'census_region_name'
|
1130
|
+
integer 'census_region_number'
|
1131
|
+
index 'census_region_name', :name => 'homefry'
|
1132
|
+
end
|
1133
|
+
end
|
1134
|
+
end
|
1135
|
+
|
1126
1136
|
# todo: have somebody properly organize these
|
1127
1137
|
class DataMinerTest < Test::Unit::TestCase
|
1128
1138
|
if ENV['ALL'] == 'true' or ENV['NEW'] == 'true'
|
1129
1139
|
end
|
1130
1140
|
|
1131
1141
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
1142
|
+
should "eagerly enforce a schema" do
|
1143
|
+
ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
1144
|
+
t.string 'name'
|
1145
|
+
# t.datetime 'updated_at'
|
1146
|
+
# t.datetime 'created_at'
|
1147
|
+
t.string 'census_region_name'
|
1148
|
+
# t.integer 'census_region_number'
|
1149
|
+
# t.integer 'data_miner_touch_count'
|
1150
|
+
# t.integer 'data_miner_last_run_id'
|
1151
|
+
end
|
1152
|
+
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
|
1153
|
+
CensusDivisionTrois.reset_column_information
|
1154
|
+
missing_columns = %w{ updated_at created_at census_region_number data_miner_last_run_id data_miner_touch_count }
|
1155
|
+
|
1156
|
+
# sanity check
|
1157
|
+
missing_columns.each do |column|
|
1158
|
+
assert_equal false, CensusDivisionTrois.column_names.include?(column)
|
1159
|
+
end
|
1160
|
+
assert_equal false, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
1161
|
+
|
1162
|
+
3.times do
|
1163
|
+
CensusDivisionTrois.run_data_miner!
|
1164
|
+
missing_columns.each do |column|
|
1165
|
+
assert_equal true, CensusDivisionTrois.column_names.include?(column)
|
1166
|
+
end
|
1167
|
+
assert_equal true, ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
1168
|
+
assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
|
1169
|
+
end
|
1170
|
+
end
|
1171
|
+
|
1132
1172
|
should "allow specifying dictionaries explicitly" do
|
1133
1173
|
CensusDivisionDeux.run_data_miner!
|
1134
1174
|
assert_equal 'South Region', CensusDivisionDeux.find(5).census_region_name
|
data/test/test_helper.rb
CHANGED
@@ -15,7 +15,7 @@ ActiveRecord::Base.establish_connection(
|
|
15
15
|
)
|
16
16
|
|
17
17
|
ActiveSupport::Inflector.inflections do |inflect|
|
18
|
-
inflect.uncountable %w{ aircraft aircraft_deux census_division_deux }
|
18
|
+
inflect.uncountable %w{ aircraft aircraft_deux census_division_deux census_division_trois }
|
19
19
|
end
|
20
20
|
|
21
21
|
class Test::Unit::TestCase
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 31
|
9
|
+
version: 0.4.31
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-19 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -183,6 +183,7 @@ files:
|
|
183
183
|
- lib/data_miner/import.rb
|
184
184
|
- lib/data_miner/process.rb
|
185
185
|
- lib/data_miner/run.rb
|
186
|
+
- lib/data_miner/schema.rb
|
186
187
|
- lib/data_miner/tap.rb
|
187
188
|
- test/data_miner_test.rb
|
188
189
|
- test/test_helper.rb
|