data_miner 0.5.7 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +8 -0
- data/CHANGELOG +7 -0
- data/Gemfile +4 -0
- data/LICENSE +1 -1
- data/Rakefile +23 -0
- data/data_miner.gemspec +35 -0
- data/lib/data_miner.rb +55 -96
- data/lib/data_miner/active_record_extensions.rb +38 -0
- data/lib/data_miner/attribute.rb +63 -58
- data/lib/data_miner/config.rb +184 -0
- data/lib/data_miner/dictionary.rb +25 -12
- data/lib/data_miner/import.rb +59 -50
- data/lib/data_miner/process.rb +24 -19
- data/lib/data_miner/run.rb +3 -3
- data/lib/data_miner/schema.rb +50 -53
- data/lib/data_miner/tap.rb +24 -24
- data/lib/data_miner/verify.rb +17 -24
- data/lib/data_miner/version.rb +3 -0
- data/test/{test_helper.rb → helper.rb} +20 -3
- data/test/{data_miner/attribute_test.rb → test_attribute.rb} +2 -2
- data/test/{data_miner_test.rb → test_old_syntax.rb} +28 -32
- data/test/{data_miner/verify_test.rb → test_verify.rb} +4 -4
- metadata +80 -101
- data/lib/data_miner/base.rb +0 -204
data/lib/data_miner/schema.rb
CHANGED
@@ -1,24 +1,28 @@
|
|
1
|
-
|
1
|
+
require 'blockenspiel'
|
2
|
+
class DataMiner
|
2
3
|
class Schema
|
3
|
-
include Blockenspiel::DSL
|
4
|
+
include ::Blockenspiel::DSL
|
4
5
|
|
5
|
-
attr_reader :
|
6
|
-
attr_reader :position_in_run
|
6
|
+
attr_reader :config
|
7
7
|
attr_reader :create_table_options
|
8
|
-
delegate :resource, :to => :base
|
9
8
|
|
10
|
-
def initialize(
|
11
|
-
@
|
12
|
-
@
|
13
|
-
@create_table_options
|
14
|
-
@create_table_options
|
15
|
-
|
16
|
-
|
17
|
-
@create_table_options[:id] = false # always
|
9
|
+
def initialize(config, create_table_options)
|
10
|
+
@config = config
|
11
|
+
@create_table_options = create_table_options.dup
|
12
|
+
@create_table_options.stringify_keys!
|
13
|
+
raise "'id' => true is not allowed in create_table_options." if @create_table_options['id'] === true
|
14
|
+
raise "'primary_key' is not allowed in create_table_options. Use set_primary_key instead." if @create_table_options.has_key?('primary_key')
|
15
|
+
@create_table_options['id'] = false # always
|
18
16
|
end
|
19
17
|
|
18
|
+
def resource
|
19
|
+
config.resource
|
20
|
+
end
|
21
|
+
|
22
|
+
# sabshere 1/25/11 what if there were multiple connections
|
23
|
+
# blockenspiel doesn't like to delegate this to #resource
|
20
24
|
def connection
|
21
|
-
ActiveRecord::Base.connection
|
25
|
+
::ActiveRecord::Base.connection
|
22
26
|
end
|
23
27
|
|
24
28
|
def table_name
|
@@ -26,11 +30,11 @@ module DataMiner
|
|
26
30
|
end
|
27
31
|
|
28
32
|
def ideal_table
|
29
|
-
@ideal_table ||= ActiveRecord::ConnectionAdapters::TableDefinition.new
|
33
|
+
@ideal_table ||= ::ActiveRecord::ConnectionAdapters::TableDefinition.new connection
|
30
34
|
end
|
31
35
|
|
32
36
|
def ideal_indexes
|
33
|
-
@ideal_indexes ||=
|
37
|
+
@ideal_indexes ||= []
|
34
38
|
end
|
35
39
|
|
36
40
|
def actual_indexes
|
@@ -42,10 +46,10 @@ module DataMiner
|
|
42
46
|
end
|
43
47
|
|
44
48
|
def inspect
|
45
|
-
|
49
|
+
%{#<DataMiner::Schema(#{resource}): #{description}>}
|
46
50
|
end
|
47
51
|
|
48
|
-
# lifted straight from activerecord-3.0.
|
52
|
+
# sabshere 1/25/11 lifted straight from activerecord-3.0.3/lib/active_record/connection_adapters/abstract/schema_definitions.rb
|
49
53
|
%w( string text integer float decimal datetime timestamp time date binary boolean ).each do |column_type|
|
50
54
|
class_eval <<-EOV
|
51
55
|
def #{column_type}(*args) # def string(*args)
|
@@ -60,16 +64,18 @@ module DataMiner
|
|
60
64
|
ideal_table.column(*args)
|
61
65
|
end
|
62
66
|
|
63
|
-
MAX_INDEX_NAME_LENGTH =
|
67
|
+
MAX_INDEX_NAME_LENGTH = 32
|
64
68
|
def index(columns, options = {})
|
65
|
-
options.
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
69
|
+
options = options.dup
|
70
|
+
options.stringify_keys!
|
71
|
+
columns = ::Array.wrap columns
|
72
|
+
unless name = options['name']
|
73
|
+
default_name = connection.index_name(table_name, options.symbolize_keys.merge(:column => columns))
|
74
|
+
name = default_name.length < MAX_INDEX_NAME_LENGTH ? default_name : default_name[0..MAX_INDEX_NAME_LENGTH-11] + ::Zlib.crc32(default_name).to_s
|
70
75
|
end
|
71
|
-
index_unique = options.has_key?(
|
72
|
-
ideal_indexes.push ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
|
76
|
+
index_unique = options.has_key?('unique') ? options['unique'] : true
|
77
|
+
ideal_indexes.push ::ActiveRecord::ConnectionAdapters::IndexDefinition.new(table_name, name, index_unique, columns)
|
78
|
+
nil
|
73
79
|
end
|
74
80
|
|
75
81
|
def ideal_primary_key_name
|
@@ -84,7 +90,7 @@ module DataMiner
|
|
84
90
|
def index_equivalent?(a, b)
|
85
91
|
return false unless a and b
|
86
92
|
INDEX_PROPERTIES.all? do |property|
|
87
|
-
DataMiner.
|
93
|
+
::DataMiner.logger.debug "...comparing #{a.send(property).inspect}.to_s <-> #{b.send(property).inspect}.to_s"
|
88
94
|
a.send(property).to_s == b.send(property).to_s
|
89
95
|
end
|
90
96
|
end
|
@@ -131,13 +137,13 @@ module DataMiner
|
|
131
137
|
def place_column(name)
|
132
138
|
remove_column name if actual_column name
|
133
139
|
ideal = ideal_column name
|
134
|
-
DataMiner.
|
140
|
+
::DataMiner.logger.debug "ADDING COLUMN #{name}"
|
135
141
|
connection.add_column table_name, name, ideal.type.to_sym # symbol type!
|
136
142
|
resource.reset_column_information
|
137
143
|
end
|
138
144
|
|
139
145
|
def remove_column(name)
|
140
|
-
DataMiner.
|
146
|
+
::DataMiner.logger.debug "REMOVING COLUMN #{name}"
|
141
147
|
connection.remove_column table_name, name
|
142
148
|
resource.reset_column_information
|
143
149
|
end
|
@@ -145,43 +151,34 @@ module DataMiner
|
|
145
151
|
def place_index(name)
|
146
152
|
remove_index name if actual_index name
|
147
153
|
ideal = ideal_index name
|
148
|
-
DataMiner.
|
154
|
+
::DataMiner.logger.debug "ADDING INDEX #{name}"
|
149
155
|
connection.add_index table_name, ideal.columns, :name => ideal.name
|
150
156
|
resource.reset_column_information
|
151
157
|
end
|
152
158
|
|
153
159
|
def remove_index(name)
|
154
|
-
DataMiner.
|
160
|
+
::DataMiner.logger.debug "REMOVING INDEX #{name}"
|
155
161
|
connection.remove_index table_name, :name => name
|
156
162
|
resource.reset_column_information
|
157
163
|
end
|
158
164
|
|
159
|
-
def run
|
160
|
-
_add_extra_columns
|
165
|
+
def run
|
161
166
|
_create_table
|
162
167
|
_set_primary_key
|
163
168
|
_remove_columns
|
164
169
|
_add_columns
|
165
170
|
_remove_indexes
|
166
171
|
_add_indexes
|
167
|
-
|
172
|
+
nil
|
168
173
|
end
|
169
|
-
|
170
|
-
EXTRA_COLUMNS = {
|
171
|
-
:updated_at => :datetime,
|
172
|
-
:created_at => :datetime
|
173
|
-
}
|
174
|
-
def _add_extra_columns
|
175
|
-
EXTRA_COLUMNS.each do |extra_name, extra_type|
|
176
|
-
send extra_type, extra_name unless ideal_column extra_name
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
174
|
+
|
180
175
|
def _create_table
|
181
176
|
if not resource.table_exists?
|
182
|
-
|
177
|
+
create_table_options = @create_table_options.dup
|
178
|
+
create_table_options.symbolize_keys!
|
179
|
+
::DataMiner.logger.debug "CREATING TABLE #{table_name} with #{create_table_options.inspect}"
|
183
180
|
connection.create_table table_name, create_table_options do |t|
|
184
|
-
t.integer
|
181
|
+
t.integer 'data_miner_placeholder'
|
185
182
|
end
|
186
183
|
resource.reset_column_information
|
187
184
|
end
|
@@ -190,23 +187,23 @@ module DataMiner
|
|
190
187
|
# FIXME mysql only
|
191
188
|
def _set_primary_key
|
192
189
|
if ideal_primary_key_name == 'id' and not ideal_column('id')
|
193
|
-
DataMiner.
|
194
|
-
column 'id', :primary_key
|
190
|
+
::DataMiner.logger.debug "no special primary key set on #{table_name}, so using 'id'"
|
191
|
+
column 'id', :primary_key # needs to be a sym?
|
195
192
|
end
|
196
193
|
actual = actual_column actual_primary_key_name
|
197
194
|
ideal = ideal_column ideal_primary_key_name
|
198
195
|
if not column_equivalent? actual, ideal
|
199
|
-
DataMiner.
|
196
|
+
::DataMiner.logger.debug "looks like #{table_name} has a bad (or missing) primary key"
|
200
197
|
if actual
|
201
|
-
DataMiner.
|
198
|
+
::DataMiner.logger.debug "looks like primary key needs to change from #{actual_primary_key_name} to #{ideal_primary_key_name}, re-creating #{table_name} from scratch"
|
202
199
|
connection.drop_table table_name
|
203
200
|
resource.reset_column_information
|
204
201
|
_create_table
|
205
202
|
end
|
206
203
|
place_column ideal_primary_key_name
|
207
204
|
unless ideal.type.to_s == 'primary_key'
|
208
|
-
DataMiner.
|
209
|
-
if
|
205
|
+
::DataMiner.logger.debug "SETTING #{ideal_primary_key_name} AS PRIMARY KEY"
|
206
|
+
if connection.adapter_name.downcase == 'sqlite'
|
210
207
|
connection.execute "CREATE UNIQUE INDEX IDX_#{table_name}_#{ideal_primary_key_name} ON #{table_name} (#{ideal_primary_key_name} ASC)"
|
211
208
|
else
|
212
209
|
connection.execute "ALTER TABLE `#{table_name}` ADD PRIMARY KEY (`#{ideal_primary_key_name}`)"
|
data/lib/data_miner/tap.rb
CHANGED
@@ -1,52 +1,52 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
require 'escape'
|
2
|
+
class DataMiner
|
3
3
|
class Tap
|
4
|
-
attr_reader :
|
5
|
-
attr_reader :position_in_run
|
4
|
+
attr_reader :config
|
6
5
|
attr_reader :description
|
7
6
|
attr_reader :source
|
8
7
|
attr_reader :options
|
9
|
-
delegate :resource, :to => :base
|
10
8
|
|
11
|
-
def initialize(
|
12
|
-
|
13
|
-
|
14
|
-
@
|
15
|
-
@position_in_run = position_in_run
|
9
|
+
def initialize(config, description, source, options = {})
|
10
|
+
@config = config
|
11
|
+
@options = options.dup
|
12
|
+
@options.stringify_keys!
|
16
13
|
@description = description
|
17
14
|
@source = source
|
18
|
-
|
15
|
+
end
|
16
|
+
|
17
|
+
def resource
|
18
|
+
config.resource
|
19
19
|
end
|
20
20
|
|
21
21
|
def inspect
|
22
|
-
|
22
|
+
%{#<DataMiner::Tap(#{resource}): #{description} (#{source})>}
|
23
23
|
end
|
24
24
|
|
25
|
-
def run
|
25
|
+
def run
|
26
26
|
[ source_table_name, resource.table_name ].each do |possible_obstacle|
|
27
|
-
if connection.table_exists?
|
27
|
+
if connection.table_exists? possible_obstacle
|
28
28
|
connection.drop_table possible_obstacle
|
29
29
|
end
|
30
30
|
end
|
31
|
-
DataMiner.backtick_with_reporting taps_pull_cmd
|
31
|
+
::DataMiner.backtick_with_reporting taps_pull_cmd
|
32
32
|
if needs_table_rename?
|
33
33
|
connection.rename_table source_table_name, resource.table_name
|
34
34
|
end
|
35
|
-
|
35
|
+
nil
|
36
36
|
end
|
37
37
|
|
38
|
-
|
39
|
-
|
38
|
+
# sabshere 1/25/11 what if there were multiple connections
|
39
|
+
# blockenspiel doesn't like to delegate this to #resource
|
40
40
|
def connection
|
41
|
-
ActiveRecord::Base.connection
|
41
|
+
::ActiveRecord::Base.connection
|
42
42
|
end
|
43
43
|
|
44
44
|
def db_config
|
45
|
-
@
|
45
|
+
@db_config ||= connection.instance_variable_get(:@config).stringify_keys.merge(options.except('source_table_name'))
|
46
46
|
end
|
47
47
|
|
48
48
|
def source_table_name
|
49
|
-
options[
|
49
|
+
options['source_table_name'] || resource.table_name
|
50
50
|
end
|
51
51
|
|
52
52
|
def needs_table_rename?
|
@@ -66,7 +66,7 @@ module DataMiner
|
|
66
66
|
|
67
67
|
# never optional
|
68
68
|
def database
|
69
|
-
db_config[
|
69
|
+
db_config['database']
|
70
70
|
end
|
71
71
|
|
72
72
|
DEFAULT_PORTS = {
|
@@ -88,7 +88,7 @@ module DataMiner
|
|
88
88
|
%w{ username password port host }.each do |x|
|
89
89
|
module_eval %{
|
90
90
|
def #{x}
|
91
|
-
db_config[
|
91
|
+
db_config['#{x}'] || DEFAULT_#{x.upcase}S[adapter]
|
92
92
|
end
|
93
93
|
}
|
94
94
|
end
|
@@ -104,7 +104,7 @@ module DataMiner
|
|
104
104
|
|
105
105
|
# taps pull mysql://root:password@localhost/taps_test http://foo:bar@data.brighterplanet.com:5000 --tables aircraft
|
106
106
|
def taps_pull_cmd
|
107
|
-
Escape.shell_command [
|
107
|
+
::Escape.shell_command [
|
108
108
|
'taps',
|
109
109
|
'pull',
|
110
110
|
"#{adapter}://#{db_locator}",
|
data/lib/data_miner/verify.rb
CHANGED
@@ -1,35 +1,28 @@
|
|
1
|
-
|
1
|
+
class DataMiner
|
2
2
|
class Verify
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
attr_reader :config
|
4
|
+
attr_reader :description
|
5
|
+
attr_reader :blk
|
6
|
+
|
7
|
+
def initialize(config, description, &blk)
|
8
|
+
@config = config
|
9
|
+
@description = description
|
10
|
+
@blk = blk
|
11
|
+
end
|
7
12
|
|
8
|
-
def
|
9
|
-
|
10
|
-
self.position_in_run = position_in_run
|
11
|
-
self.description = description
|
12
|
-
self.check = check
|
13
|
+
def resource
|
14
|
+
config.resource
|
13
15
|
end
|
14
16
|
|
15
17
|
def inspect
|
16
|
-
|
18
|
+
%{#<DataMiner::Verify(#{resource}) (#{description})>}
|
17
19
|
end
|
18
20
|
|
19
|
-
def run
|
20
|
-
|
21
|
-
|
22
|
-
rescue Exception => e # need this to catch Test::Unit assertions
|
23
|
-
raise VerificationFailed,
|
24
|
-
"#{e.inspect}: #{e.backtrace.join("\n")}"
|
25
|
-
rescue => e
|
26
|
-
raise VerificationFailed,
|
27
|
-
"#{e.inspect}: #{e.backtrace.join("\n")}"
|
28
|
-
end
|
29
|
-
unless verification
|
30
|
-
raise VerificationFailed, "Result of check was false"
|
21
|
+
def run
|
22
|
+
unless blk.call
|
23
|
+
raise VerificationFailed, "FAILED VERIFICATION: #{inspect}"
|
31
24
|
end
|
32
|
-
|
25
|
+
nil
|
33
26
|
end
|
34
27
|
end
|
35
28
|
end
|
@@ -1,12 +1,29 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
unless RUBY_VERSION >= '1.9'
|
4
|
+
gem 'fastercsv'
|
5
|
+
require 'fastercsv'
|
6
|
+
end
|
7
|
+
Bundler.setup
|
2
8
|
require 'test/unit'
|
3
9
|
require 'shoulda'
|
4
|
-
|
5
|
-
|
10
|
+
unless RUBY_VERSION >= '1.9'
|
11
|
+
require 'ruby-debug'
|
12
|
+
end
|
6
13
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
15
|
require 'data_miner'
|
16
|
+
class Test::Unit::TestCase
|
17
|
+
end
|
18
|
+
|
19
|
+
test_log = File.open('test.log', 'w')
|
20
|
+
test_log.sync = true
|
21
|
+
DataMiner.logger = Logger.new test_log
|
22
|
+
|
23
|
+
# because some of the test files reference it
|
24
|
+
require 'errata'
|
8
25
|
|
9
|
-
ENV['WIP'] = true if ENV['ALL'] == 'true'
|
26
|
+
ENV['WIP'] = 'true' if ENV['ALL'] == 'true'
|
10
27
|
|
11
28
|
ActiveRecord::Base.establish_connection(
|
12
29
|
'adapter' => 'mysql',
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'helper'
|
2
2
|
|
3
3
|
class TappedAirport < ActiveRecord::Base
|
4
4
|
set_primary_key :iata_code
|
@@ -609,7 +609,7 @@ class CensusDivisionTrois < ActiveRecord::Base
|
|
609
609
|
string 'census_region_name'
|
610
610
|
integer 'census_region_number'
|
611
611
|
index 'census_region_name', :name => 'homefry'
|
612
|
-
index ['number_code', 'name', 'census_region_name', 'census_region_number'
|
612
|
+
index ['number_code', 'name', 'census_region_name', 'census_region_number']
|
613
613
|
end
|
614
614
|
end
|
615
615
|
end
|
@@ -627,7 +627,7 @@ class CensusDivisionFour < ActiveRecord::Base
|
|
627
627
|
end
|
628
628
|
|
629
629
|
# todo: have somebody properly organize these
|
630
|
-
class
|
630
|
+
class TestOldSyntax < Test::Unit::TestCase
|
631
631
|
if ENV['WIP']
|
632
632
|
context 'with nullify option' do
|
633
633
|
should 'treat blank fields as null values' do
|
@@ -660,9 +660,9 @@ class DataMinerTest < Test::Unit::TestCase
|
|
660
660
|
end
|
661
661
|
end
|
662
662
|
end
|
663
|
-
assert_kind_of DataMiner::Import, AutomobileFuelType.
|
664
|
-
assert_equal 'http://example.com', AutomobileFuelType.
|
665
|
-
assert_equal 1, AutomobileFuelType.
|
663
|
+
assert_kind_of DataMiner::Import, AutomobileFuelType.data_miner_config.steps.first
|
664
|
+
assert_equal 'http://example.com', AutomobileFuelType.data_miner_config.steps.first.table.package.url
|
665
|
+
assert_equal 1, AutomobileFuelType.data_miner_config.step_counter
|
666
666
|
end
|
667
667
|
should "stop and finish if it gets a DataMiner::Finish" do
|
668
668
|
AutomobileMakeFleetYear.delete_all
|
@@ -670,8 +670,8 @@ class DataMinerTest < Test::Unit::TestCase
|
|
670
670
|
$force_finish = true
|
671
671
|
AutomobileMakeFleetYear.run_data_miner!
|
672
672
|
assert_equal 0, AutomobileMakeFleetYear.count
|
673
|
-
|
674
|
-
|
673
|
+
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
674
|
+
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.finished? and not run.skipped and not run.killed? }
|
675
675
|
$force_finish = false
|
676
676
|
AutomobileMakeFleetYear.run_data_miner!
|
677
677
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
@@ -683,8 +683,8 @@ class DataMinerTest < Test::Unit::TestCase
|
|
683
683
|
$force_skip = true
|
684
684
|
AutomobileMakeFleetYear.run_data_miner!
|
685
685
|
assert_equal 0, AutomobileMakeFleetYear.count
|
686
|
-
|
687
|
-
|
686
|
+
assert (AutomobileMakeFleetYear.data_miner_runs.count > 0)
|
687
|
+
assert AutomobileMakeFleetYear.data_miner_runs.all? { |run| run.skipped? and not run.finished? and not run.killed? }
|
688
688
|
$force_skip = false
|
689
689
|
AutomobileMakeFleetYear.run_data_miner!
|
690
690
|
assert AutomobileMakeFleetYear.exists?(:name => 'Alfa Romeo IP 1978')
|
@@ -693,27 +693,25 @@ class DataMinerTest < Test::Unit::TestCase
|
|
693
693
|
should "eagerly enforce a schema" do
|
694
694
|
ActiveRecord::Base.connection.create_table 'census_division_trois', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
695
695
|
t.string 'name'
|
696
|
-
# t.datetime 'updated_at'
|
697
|
-
# t.datetime 'created_at'
|
698
696
|
t.string 'census_region_name'
|
699
697
|
# t.integer 'census_region_number'
|
700
698
|
end
|
701
699
|
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_trois ADD INDEX (census_region_name)'
|
702
700
|
CensusDivisionTrois.reset_column_information
|
703
|
-
missing_columns = %w{
|
701
|
+
missing_columns = %w{ census_region_number }
|
704
702
|
|
705
703
|
# sanity check
|
706
704
|
missing_columns.each do |column|
|
707
|
-
|
705
|
+
assert_false CensusDivisionTrois.column_names.include?(column)
|
708
706
|
end
|
709
|
-
|
707
|
+
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
710
708
|
|
711
709
|
3.times do
|
712
710
|
CensusDivisionTrois.run_data_miner!
|
713
711
|
missing_columns.each do |column|
|
714
|
-
|
712
|
+
assert CensusDivisionTrois.column_names.include?(column)
|
715
713
|
end
|
716
|
-
|
714
|
+
assert ActiveRecord::Base.connection.indexes(CensusDivisionTrois.table_name).any? { |index| index.name == 'homefry' }
|
717
715
|
assert_equal :string, CensusDivisionTrois.columns_hash[CensusDivisionTrois.primary_key].type
|
718
716
|
end
|
719
717
|
end
|
@@ -721,27 +719,25 @@ class DataMinerTest < Test::Unit::TestCase
|
|
721
719
|
should "let schemas work with default id primary keys" do
|
722
720
|
ActiveRecord::Base.connection.create_table 'census_division_fours', :force => true, :options => 'ENGINE=InnoDB default charset=utf8' do |t|
|
723
721
|
t.string 'name'
|
724
|
-
# t.datetime 'updated_at'
|
725
|
-
# t.datetime 'created_at'
|
726
722
|
t.string 'census_region_name'
|
727
723
|
# t.integer 'census_region_number'
|
728
724
|
end
|
729
725
|
ActiveRecord::Base.connection.execute 'ALTER TABLE census_division_fours ADD INDEX (census_region_name)'
|
730
726
|
CensusDivisionFour.reset_column_information
|
731
|
-
missing_columns = %w{
|
727
|
+
missing_columns = %w{ census_region_number }
|
732
728
|
|
733
729
|
# sanity check
|
734
730
|
missing_columns.each do |column|
|
735
|
-
|
731
|
+
assert_false CensusDivisionFour.column_names.include?(column)
|
736
732
|
end
|
737
|
-
|
733
|
+
assert_false ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
738
734
|
|
739
735
|
3.times do
|
740
736
|
CensusDivisionFour.run_data_miner!
|
741
737
|
missing_columns.each do |column|
|
742
|
-
|
738
|
+
assert CensusDivisionFour.column_names.include?(column)
|
743
739
|
end
|
744
|
-
|
740
|
+
assert ActiveRecord::Base.connection.indexes(CensusDivisionFour.table_name).any? { |index| index.name == 'homefry' }
|
745
741
|
assert_equal :integer, CensusDivisionFour.columns_hash[CensusDivisionFour.primary_key].type
|
746
742
|
end
|
747
743
|
end
|
@@ -793,29 +789,29 @@ class DataMinerTest < Test::Unit::TestCase
|
|
793
789
|
end
|
794
790
|
|
795
791
|
should "be idempotent" do
|
796
|
-
Country.
|
792
|
+
Country.data_miner_config.run
|
797
793
|
a = Country.count
|
798
|
-
Country.
|
794
|
+
Country.data_miner_config.run
|
799
795
|
b = Country.count
|
800
796
|
assert_equal a, b
|
801
797
|
|
802
|
-
CensusRegion.
|
798
|
+
CensusRegion.data_miner_config.run
|
803
799
|
a = CensusRegion.count
|
804
|
-
CensusRegion.
|
800
|
+
CensusRegion.data_miner_config.run
|
805
801
|
b = CensusRegion.count
|
806
802
|
assert_equal a, b
|
807
803
|
end
|
808
804
|
|
809
805
|
should "hash things" do
|
810
|
-
AutomobileVariant.
|
806
|
+
AutomobileVariant.data_miner_config.steps[0].run
|
811
807
|
assert AutomobileVariant.first.row_hash.present?
|
812
808
|
end
|
813
809
|
|
814
810
|
should "process a callback block instead of a method" do
|
815
811
|
AutomobileVariant.delete_all
|
816
|
-
AutomobileVariant.
|
812
|
+
AutomobileVariant.data_miner_config.steps[0].run
|
817
813
|
assert !AutomobileVariant.first.fuel_efficiency_city.present?
|
818
|
-
AutomobileVariant.
|
814
|
+
AutomobileVariant.data_miner_config.steps.last.run
|
819
815
|
assert AutomobileVariant.first.fuel_efficiency_city.present?
|
820
816
|
end
|
821
817
|
|
@@ -871,7 +867,7 @@ class DataMinerTest < Test::Unit::TestCase
|
|
871
867
|
end
|
872
868
|
end
|
873
869
|
should "mark the run as skipped if verification fails" do
|
874
|
-
AutomobileFuelType.
|
870
|
+
AutomobileFuelType.data_miner_config.instance_eval do
|
875
871
|
verify "failure" do
|
876
872
|
false
|
877
873
|
end
|