data_miner 0.5.7 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +8 -0
- data/CHANGELOG +7 -0
- data/Gemfile +4 -0
- data/LICENSE +1 -1
- data/Rakefile +23 -0
- data/data_miner.gemspec +35 -0
- data/lib/data_miner.rb +55 -96
- data/lib/data_miner/active_record_extensions.rb +38 -0
- data/lib/data_miner/attribute.rb +63 -58
- data/lib/data_miner/config.rb +184 -0
- data/lib/data_miner/dictionary.rb +25 -12
- data/lib/data_miner/import.rb +59 -50
- data/lib/data_miner/process.rb +24 -19
- data/lib/data_miner/run.rb +3 -3
- data/lib/data_miner/schema.rb +50 -53
- data/lib/data_miner/tap.rb +24 -24
- data/lib/data_miner/verify.rb +17 -24
- data/lib/data_miner/version.rb +3 -0
- data/test/{test_helper.rb → helper.rb} +20 -3
- data/test/{data_miner/attribute_test.rb → test_attribute.rb} +2 -2
- data/test/{data_miner_test.rb → test_old_syntax.rb} +28 -32
- data/test/{data_miner/verify_test.rb → test_verify.rb} +4 -4
- metadata +80 -101
- data/lib/data_miner/base.rb +0 -204
data/lib/data_miner/schema.rb
CHANGED
@@ -1,24 +1,28 @@
|
|
1
|
-
|
1
|
+
require 'blockenspiel'
|
2
|
+
class DataMiner
|
2
3
|
class Schema
|
3
|
-
include Blockenspiel::DSL
|
4
|
+
include ::Blockenspiel::DSL
|
4
5
|
|
5
|
-
attr_reader :
|
6
|
-
attr_reader :position_in_run
|
6
|
+
attr_reader :config
|
7
7
|
attr_reader :create_table_options
|
8
|
-
delegate :resource, :to => :base
|
9
8
|
|
10
|
-
def initialize(
|
11
|
-
@
|
12
|
-
@
|
13
|
-
@create_table_options
|
14
|
-
@create_table_options
|
15
|
-
|
16
|
-
|
17
|
-
@create_table_options[:id] = false # always
|
9
|
+
def initialize(config, create_table_options)
|
10
|
+
@config = config
|
11
|
+
@create_table_options = create_table_options.dup
|
12
|
+
@create_table_options.stringify_keys!
|
13
|
+
raise "'id' => true is not allowed in create_table_options." if @create_table_options['id'] === true
|
14
|
+
raise "'primary_key' is not allowed in create_table_options. Use set_primary_key instead." if @create_table_options.has_key?('primary_key')
|
15
|
+
@create_table_options['id'] = false # always
|
18
16
|
end
|
19
17
|
|
18
|
+
def resource
|
19
|
+
config.resource
|
20
|
+
end
|
21
|
+
|
22
|
+
# sabshere 1/25/11 what if there were multiple connections
|
23
|
+
# blockenspiel doesn't like to delegate this to #resource
|
20
24
|
def connection
|
21
|
-
ActiveRecord::Base.connection
|
25
|
+
::ActiveRecord::Base.connection
|
22
26
|
end
|
23
27
|
|
24
28
|
def table_name
|
@@ -26,11 +30,11 @@ module DataMiner
|
|
26
30
|
end
|
27
31
|
|
28
32
|
def ideal_table
|
29
|
-
@ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new
|
33
|
+
@ideal_table ||= ::ActiveRecord::ConnectionAdapters::TableDefinition.new connection
|
30
34
|
end
|
31
35
|
|
32
36
|
def ideal_indexes
|
33
|
-
@ideal_indexes ||=
|
37
|
+
@ideal_indexes ||= []
|
34
38
|
end
|
35
39
|
|
36
40
|
def actual_indexes
|
@@ -42,10 +46,10 @@ module DataMiner
|
|
42
46
|
end
|
43
47
|
|
44
48
|
def inspect
|
45
|
-
|
49
|
+
%{#<DataMiner::Schema(#{resource}): #{description}>}
|
46
50
|
end
|
47
51
|
|
48
|
-
# lifted straight from activerecord-3.0.
|
52
|
+
# sabshere 1/25/11 lifted straight from activerecord-3.0.3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
|
49
53
|
%w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
|
50
54
|
class_eval <<-EOV
|
51
55
|
def #{column_type}(*args) # def string(*args)
|
@@ -60,16 +64,18 @@ module DataMiner
|
|
60
64
|
ideal_table.column(*args)
|
61
65
|
end
|
62
66
|
|
63
|
-
MAX_INDEX_NAME_LENGTH =
|
67
|
+
MAX_INDEX_NAME_LENGTH = 32
|
64
68
|
def index(columns, options = {})
|
65
|
-
options.
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
options = options.dup
|
70
|
+
options.stringify_keys!
|
71
|
+
columns = ::Array.wrap columns
|
72
|
+
unless name = options['name']
|
73
|
+
default_name = connection.index_name(table_name, options.symbolize_keys.merge(:column => columns))
|
74
|
+
name = default_name.length < MAX_INDEX_NAME_LENGTH ? default_name : default_name[0..MAX_INDEX_NAME_LENGTH-11] + ::Zlib.crc32(default_name).to_s
|
70
75
|
end
|
71
|
-
index_unique = options.has_key?(
|
72
|
-
ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
|
76
|
+
index_unique = options.has_key?('unique') ? options['unique'] : true
|
77
|
+
ideal_indexes.push ::ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
|
78
|
+
nil
|
73
79
|
end
|
74
80
|
|
75
81
|
def ideal_primary_key_name
|
@@ -84,7 +90,7 @@ module DataMiner
|
|
84
90
|
def index_equivalent?(a, b)
|
85
91
|
return false unless a and b
|
86
92
|
INDEX_PROPERTIES.all? do |property|
|
87
|
-
DataMiner.
|
93
|
+
::DataMiner.logger.debug "...comparing #{a.send(property).inspect}.to_s <-> #{b.send(property).inspect}.to_s"
|
88
94
|
a.send(property).to_s == b.send(property).to_s
|
89
95
|
end
|
90
96
|
end
|
@@ -131,13 +137,13 @@ module DataMiner
|
|
131
137
|
def place_column(name)
|
132
138
|
remove_column name if actual_column name
|
133
139
|
ideal = ideal_column name
|
134
|
-
DataMiner.
|
140
|
+
::DataMiner.logger.debug "ADDING COLUMN #{name}"
|
135
141
|
connection.add_column table_name, name, ideal.type.to_sym # symbol type!
|
136
142
|
resource.reset_column_information
|
137
143
|
end
|
138
144
|
|
139
145
|
def remove_column(name)
|
140
|
-
DataMiner.
|
146
|
+
::DataMiner.logger.debug "REMOVING COLUMN #{name}"
|
141
147
|
connection.remove_column table_name, name
|
142
148
|
resource.reset_column_information
|
143
149
|
end
|
@@ -145,43 +151,34 @@ module DataMiner
|
|
145
151
|
def place_index(name)
|
146
152
|
remove_index name if actual_index name
|
147
153
|
ideal = ideal_index name
|
148
|
-
DataMiner.
|
154
|
+
::DataMiner.logger.debug "ADDING INDEX #{name}"
|
149
155
|
connection.add_index table_name, ideal.columns, :name => ideal.name
|
150
156
|
resource.reset_column_information
|
151
157
|
end
|
152
158
|
|
153
159
|
def remove_index(name)
|
154
|
-
DataMiner.
|
160
|
+
::DataMiner.logger.debug "REMOVING INDEX #{name}"
|
155
161
|
connection.remove_index table_name, :name => name
|
156
162
|
resource.reset_column_information
|
157
163
|
end
|
158
164
|
|
159
|
-
def run
|
160
|
-
_add_extra_columns
|
165
|
+
def run
|
161
166
|
_create_table
|
162
167
|
_set_primary_key
|
163
168
|
_remove_columns
|
164
169
|
_add_columns
|
165
170
|
_remove_indexes
|
166
171
|
_add_indexes
|
167
|
-
|
172
|
+
nil
|
168
173
|
end
|
169
|
-
|
170
|
-
EXTRA_COLUMNS = {
|
171
|
-
:updated_at => :datetime,
|
172
|
-
:created_at => :datetime
|
173
|
-
}
|
174
|
-
def _add_extra_columns
|
175
|
-
EXTRA_COLUMNS.each do |extra_name, extra_type|
|
176
|
-
send extra_type, extra_name unless ideal_column extra_name
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
174
|
+
|
180
175
|
def _create_table
|
181
176
|
if not resource.table_exists?
|
182
|
-
|
177
|
+
create_table_options = @create_table_options.dup
|
178
|
+
create_table_options.symbolize_keys!
|
179
|
+
::DataMiner.logger.debug "CREATING TABLE #{table_name} with #{create_table_options.inspect}"
|
183
180
|
connection.create_table table_name, create_table_options do |t|
|
184
|
-
t.integer
|
181
|
+
t.integer 'data_miner_placeholder'
|
185
182
|
end
|
186
183
|
resource.reset_column_information
|
187
184
|
end
|
@@ -190,23 +187,23 @@ module DataMiner
|
|
190
187
|
# FIXME mysql only
|
191
188
|
def _set_primary_key
|
192
189
|
if ideal_primary_key_name == 'id' and not ideal_column('id')
|
193
|
-
DataMiner.
|
194
|
-
column 'id', :primary_key
|
190
|
+
::DataMiner.logger.debug "no special primary key set on #{table_name}, so using 'id'"
|
191
|
+
column 'id', :primary_key # needs to be a sym?
|
195
192
|
end
|
196
193
|
actual = actual_column actual_primary_key_name
|
197
194
|
ideal = ideal_column ideal_primary_key_name
|
198
195
|
if not column_equivalent? actual, ideal
|
199
|
-
DataMiner.
|
196
|
+
::DataMiner.logger.debug "looks like #{table_name} has a bad (or missing) primary key"
|
200
197
|
if actual
|
201
|
-
DataMiner.
|
198
|
+
::DataMiner.logger.debug "looks like primary key needs to change from #{actual_primary_key_name} to #{ideal_primary_key_name}, re-creating #{table_name} from scratch"
|
202
199
|
connection.drop_table table_name
|
203
200
|
resource.reset_column_information
|
204
201
|
_create_table
|
205
202
|
end
|
206
203
|
place_column ideal_primary_key_name
|
207
204
|
unless ideal.type.to_s == 'primary_key'
|
208
|
-
DataMiner.
|
209
|
-
if
|
205
|
+
::DataMiner.logger.debug "SETTING #{ideal_primary_key_name} AS PRIMARY KEY"
|
206
|
+
if connection.adapter_name.downcase == 'sqlite'
|
210
207
|
connection.execute "CREATE UNIQUE INDEX IDX_#{table_name}_#{ideal_primary_key_name} ON #{table_name} (#{ideal_primary_key_name} ASC)"
|
211
208
|
else
|
212
209
|
connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{ideal_primary_key_name}`)"
|
data/lib/data_miner/tap.rb
CHANGED
@@ -1,52 +1,52 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'escape'
|
2
|
+
class DataMiner
|
3
3
|
class Tap
|
4
|
-
attr_reader :
|
5
|
-
attr_reader :position_in_run
|
4
|
+
attr_reader :config
|
6
5
|
attr_reader :description
|
7
6
|
attr_reader :source
|
8
7
|
attr_reader :options
|
9
|
-
delegate :resource, :to => :base
|
10
8
|
|
11
|
-
def initialize(
|
12
|
-
|
13
|
-
|
14
|
-
@
|
15
|
-
@position_in_run = position_in_run
|
9
|
+
def initialize(config, description, source, options = {})
|
10
|
+
@config = config
|
11
|
+
@options = options.dup
|
12
|
+
@options.stringify_keys!
|
16
13
|
@description = description
|
17
14
|
@source = source
|
18
|
-
|
15
|
+
end
|
16
|
+
|
17
|
+
def resource
|
18
|
+
config.resource
|
19
19
|
end
|
20
20
|
|
21
21
|
def inspect
|
22
|
-
|
22
|
+
%{#<DataMiner::Tap(#{resource}): #{description} (#{source})>}
|
23
23
|
end
|
24
24
|
|
25
|
-
def run
|
25
|
+
def run
|
26
26
|
[ source_table_name, resource.table_name ].each do |possible_obstacle|
|
27
|
-
if connection.table_exists?
|
27
|
+
if connection.table_exists? possible_obstacle
|
28
28
|
connection.drop_table possible_obstacle
|
29
29
|
end
|
30
30
|
end
|
31
|
-
DataMiner.backtick_with_reporting taps_pull_cmd
|
31
|
+
::DataMiner.backtick_with_reporting taps_pull_cmd
|
32
32
|
if needs_table_rename?
|
33
33
|
connection.rename_table source_table_name, resource.table_name
|
34
34
|
end
|
35
|
-
|
35
|
+
nil
|
36
36
|
end
|
37
37
|
|
38
|
-
|
39
|
-
|
38
|
+
# sabshere 1/25/11 what if there were multiple connections
|
39
|
+
# blockenspiel doesn't like to delegate this to #resource
|
40
40
|
def connection
|
41
|
-
ActiveRecord::Base.connection
|
41
|
+
::ActiveRecord::Base.connection
|
42
42
|
end
|
43
43
|
|
44
44
|
def db_config
|
45
|
-
@
|
45
|
+
@db_config ||= connection.instance_variable_get(:@config).stringify_keys.merge(options.except('source_table_name'))
|
46
46
|
end
|
47
47
|
|
48
48
|
def source_table_name
|
49
|
-
options[
|
49
|
+
options['source_table_name'] || resource.table_name
|
50
50
|
end
|
51
51
|
|
52
52
|
def needs_table_rename?
|
@@ -66,7 +66,7 @@ module DataMiner
|
|
66
66
|
|
67
67
|
# never optional
|
68
68
|
def database
|
69
|
-
db_config[
|
69
|
+
db_config['database']
|
70
70
|
end
|
71
71
|
|
72
72
|
DEFAULT_PORTS = {
|
@@ -88,7 +88,7 @@ module DataMiner
|
|
88
88
|
%w{ username password port host }.each do |x|
|
89
89
|
module_eval %{
|
90
90
|
def #{x}
|
91
|
-
db_config[
|
91
|
+
db_config['#{x}'] || DEFAULT_#{x.upcase}S[adapter]
|
92
92
|
end
|
93
93
|
}
|
94
94
|
end
|
@@ -104,7 +104,7 @@ module DataMiner
|
|
104
104
|
|
105
105
|
# taps pull mysql://root:password@localhost/taps_test http://foo:bar@data.brighterplanet.com:5000 --tables aircraft
|
106
106
|
def taps_pull_cmd
|
107
|
-
Escape.shell_command [
|
107
|
+
::Escape.shell_command [
|
108
108
|
'taps',
|
109
109
|
'pull',
|
110
110
|
"#{adapter}://#{db_locator}",
|
data/lib/data_miner/verify.rb
CHANGED
@@ -1,35 +1,28 @@
|
|
1
|
-
|
1
|
+
class DataMiner
|
2
2
|
class Verify
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
attr_reader :config
|
4
|
+
attr_reader :description
|
5
|
+
attr_reader :blk
|
6
|
+
|
7
|
+
def initialize(config, description, &blk)
|
8
|
+
@config = config
|
9
|
+
@description = description
|
10
|
+
@blk = blk
|
11
|
+
end
|
7
12
|
|
8
|
-
def
|
9
|
-
|
10
|
-
self.position_in_run = position_in_run
|
11
|
-
self.description = description
|
12
|
-
self.check = check
|
13
|
+
def resource
|
14
|
+
config.resource
|
13
15
|
end
|
14
16
|
|
15
17
|
def inspect
|
16
|
-
|
18
|
+
%{#<DataMiner::Verify(#{resource}) (#{description})>}
|
17
19
|
end
|
18
20
|
|
19
|
-
def run
|
20
|
-
|
21
|
-
|
22
|
-
rescue Exception => e # need this to catch Test::Unit assertions
|
23
|
-
raise VerificationFailed,
|
24
|
-
"#{e.inspect}: #{e.backtrace.join("\n")}"
|
25
|
-
rescue => e
|
26
|
-
raise VerificationFailed,
|
27
|
-
"#{e.inspect}: #{e.backtrace.join("\n")}"
|
28
|
-
end
|
29
|
-
unless verification
|
30
|
-
raise VerificationFailed, "Result of check was false"
|
21
|
+
def run
|
22
|
+
unless blk.call
|
23
|
+
raise VerificationFailed, "FAILED VERIFICATION: #{inspect}"
|
31
24
|
end
|
32
|
-
|
25
|
+
nil
|
33
26
|
end
|
34
27
|
end
|
35
28
|
end
|
@@ -1,12 +1,29 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
unless RUBY_VERSION >= '1.9'
|
4
|
+
gem 'fastercsv'
|
5
|
+
require 'fastercsv'
|
6
|
+
end
|
7
|
+
Bundler.setup
|
2
8
|
require 'test/unit'
|
3
9
|
require 'shoulda'
|
4
|
-
|
5
|
-
|
10
|
+
unless RUBY_VERSION >= '1.9'
|
11
|
+
require 'ruby-debug'
|
12
|
+
end
|
6
13
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
15
|
require 'data_miner'
|
16
|
+
class Test::Unit::TestCase
|
17
|
+
end
|
18
|
+
|
19
|
+
test_log = File.open('test.log', 'w')
|
20
|
+
test_log.sync = true
|
21
|
+
DataMiner.logger = Logger.new test_log
|
22
|
+
|
23
|
+
# because some of the test files reference it
|
24
|
+
require 'errata'
|
8
25
|
|
9
|
-
ENV['WIP'] = true if ENV['ALL'] == 'true'
|
26
|
+
ENV['WIP'] = 'true' if ENV['ALL'] == 'true'
|
10
27
|
|
11
28
|
ActiveRecord::Base.establish_connection(
|
12
29
|
'adapter' => 'mysql',
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'helper'
|
2
2
|
|
3
3
|
class TappedAirport < ActiveRecord::Base
|
4
4
|
set_primary_key :iata_code
|
@@ -609,7 +609,7 @@ class CensusDivisionTrois < ActiveRecord::Base
|
|
609
609
|
string 'census_region_name'
|
610
610
|
integer 'census_region_number'
|
611
611
|
index 'census_region_name', :name => 'homefry'
|
612
|
-
index ['number_code', 'name', 'census_region_name', 'census_region_number'
|
612
|
+
index ['number_code', 'name', 'census_region_name', 'census_region_number']
|
613
613
|
end
|
614
614
|
end
|
615
615
|
end
|
@@ -627,7 +627,7 @@ class CensusDivisionFour < ActiveRecord::Base
|
|
627
627
|
end
|
628
628
|
|
629
629
|
# todo: have somebody properly organize these
|
630
|
-
class
|
630
|
+
class TestOldSyntax < Test::Unit::TestCase
|
631
631
|
if ENV['WIP']
|
632
632
|
context 'with nullify option' do
|
633
633
|
should 'treat blank fields as null values' do
|
@@ -660,9 +660,9 @@ class DataMinerTest < Test::Unit::TestCase
|
|
660
660
|
end
|
661
661
|
end
|
662
662
|
end
|
663
|
-
assert_kind_of DataMiner::Import, AutomobileFuelType.
|
664
|
-
assert_equal 'http://example.com', AutomobileFuelType.
|
665
|
-
assert_equal 1, AutomobileFuelType.
|
663
|
+
assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
|
664
|
+
assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.package.url
|
665
|
+
assert_equal 1, AutomobileFuelType.data_miner_config.step_counter
|
666
666
|
end
|
667
667
|
should "stop and finish if it gets a DataMiner::Finish" do
|
668
668
|
AutomobileMakeFleetYear.delete_all
|
@@ -670,8 +670,8 @@ class DataMinerTest < Test::Unit::TestCase
|
|
670
670
|
$force_finish = true
|
671
671
|
AutomobileMakeFleetYear.run_data_miner!
|
672
672
|
assert_equal 0, AutomobileMakeFleetYear.count
|
673
|
-
|
674
|
-
|
673
|
+
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
674
|
+
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.finished? and not run.skipped and not run.killed? }
|
675
675
|
$force_finish = false
|
676
676
|
AutomobileMakeFleetYear.run_data_miner!
|
677
677
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
@@ -683,8 +683,8 @@ class DataMinerTest < Test::Unit::TestCase
|
|
683
683
|
$force_skip = true
|
684
684
|
AutomobileMakeFleetYear.run_data_miner!
|
685
685
|
assert_equal 0, AutomobileMakeFleetYear.count
|
686
|
-
|
687
|
-
|
686
|
+
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
687
|
+
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.skipped? and not run.finished? and not run.killed? }
|
688
688
|
$force_skip = false
|
689
689
|
AutomobileMakeFleetYear.run_data_miner!
|
690
690
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
@@ -693,27 +693,25 @@ class DataMinerTest < Test::Unit::TestCase
|
|
693
693
|
should "eagerly enforce a schema" do
|
694
694
|
ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
695
695
|
t.string 'name'
|
696
|
-
# t.datetime 'updated_at'
|
697
|
-
# t.datetime 'created_at'
|
698
696
|
t.string 'census_region_name'
|
699
697
|
# t.integer 'census_region_number'
|
700
698
|
end
|
701
699
|
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
|
702
700
|
CensusDivisionTrois.reset_column_information
|
703
|
-
missing_columns = %w{
|
701
|
+
missing_columns = %w{ census_region_number }
|
704
702
|
|
705
703
|
# sanity check
|
706
704
|
missing_columns.each do |column|
|
707
|
-
|
705
|
+
assert_false CensusDivisionTrois.column_names.include?(column)
|
708
706
|
end
|
709
|
-
|
707
|
+
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
710
708
|
|
711
709
|
3.times do
|
712
710
|
CensusDivisionTrois.run_data_miner!
|
713
711
|
missing_columns.each do |column|
|
714
|
-
|
712
|
+
assert CensusDivisionTrois.column_names.include?(column)
|
715
713
|
end
|
716
|
-
|
714
|
+
assert ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
717
715
|
assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
|
718
716
|
end
|
719
717
|
end
|
@@ -721,27 +719,25 @@ class DataMinerTest < Test::Unit::TestCase
|
|
721
719
|
should "let schemas work with default id primary keys" do
|
722
720
|
ActiveRecord::Base.connection.create_table 'census_division_fours', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
723
721
|
t.string 'name'
|
724
|
-
# t.datetime 'updated_at'
|
725
|
-
# t.datetime 'created_at'
|
726
722
|
t.string 'census_region_name'
|
727
723
|
# t.integer 'census_region_number'
|
728
724
|
end
|
729
725
|
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_fours ADD INDEX (census_region_name)'
|
730
726
|
CensusDivisionFour.reset_column_information
|
731
|
-
missing_columns = %w{
|
727
|
+
missing_columns = %w{ census_region_number }
|
732
728
|
|
733
729
|
# sanity check
|
734
730
|
missing_columns.each do |column|
|
735
|
-
|
731
|
+
assert_false CensusDivisionFour.column_names.include?(column)
|
736
732
|
end
|
737
|
-
|
733
|
+
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
738
734
|
|
739
735
|
3.times do
|
740
736
|
CensusDivisionFour.run_data_miner!
|
741
737
|
missing_columns.each do |column|
|
742
|
-
|
738
|
+
assert CensusDivisionFour.column_names.include?(column)
|
743
739
|
end
|
744
|
-
|
740
|
+
assert ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
745
741
|
assert_equal :integer, CensusDivisionFour.columns_hash[CensusDivisionFour.primary_key].type
|
746
742
|
end
|
747
743
|
end
|
@@ -793,29 +789,29 @@ class DataMinerTest < Test::Unit::TestCase
|
|
793
789
|
end
|
794
790
|
|
795
791
|
should "be idempotent" do
|
796
|
-
Country.
|
792
|
+
Country.data_miner_config.run
|
797
793
|
a = Country.count
|
798
|
-
Country.
|
794
|
+
Country.data_miner_config.run
|
799
795
|
b = Country.count
|
800
796
|
assert_equal a, b
|
801
797
|
|
802
|
-
CensusRegion.
|
798
|
+
CensusRegion.data_miner_config.run
|
803
799
|
a = CensusRegion.count
|
804
|
-
CensusRegion.
|
800
|
+
CensusRegion.data_miner_config.run
|
805
801
|
b = CensusRegion.count
|
806
802
|
assert_equal a, b
|
807
803
|
end
|
808
804
|
|
809
805
|
should "hash things" do
|
810
|
-
AutomobileVariant.
|
806
|
+
AutomobileVariant.data_miner_config.steps[0].run
|
811
807
|
assert AutomobileVariant.first.row_hash.present?
|
812
808
|
end
|
813
809
|
|
814
810
|
should "process a callback block instead of a method" do
|
815
811
|
AutomobileVariant.delete_all
|
816
|
-
AutomobileVariant.
|
812
|
+
AutomobileVariant.data_miner_config.steps[0].run
|
817
813
|
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
818
|
-
AutomobileVariant.
|
814
|
+
AutomobileVariant.data_miner_config.steps.last.run
|
819
815
|
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
820
816
|
end
|
821
817
|
|
@@ -871,7 +867,7 @@ class DataMinerTest < Test::Unit::TestCase
|
|
871
867
|
end
|
872
868
|
end
|
873
869
|
should "mark the run as skipped if verification fails" do
|
874
|
-
AutomobileFuelType.
|
870
|
+
AutomobileFuelType.data_miner_config.instance_eval do
|
875
871
|
verify "failure" do
|
876
872
|
false
|
877
873
|
end
|