data_miner 0.4.14 → 0.4.15
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/data_miner.gemspec +2 -1
- data/lib/data_miner/clone.rb +88 -0
- data/lib/data_miner/configuration.rb +7 -2
- data/lib/data_miner.rb +1 -0
- data/test/data_miner_test.rb +32 -0
- data/test/test_helper.rb +4 -0
- metadata +3 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.15
|
data/data_miner.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.15"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
"data_miner.gemspec",
|
28
28
|
"lib/data_miner.rb",
|
29
29
|
"lib/data_miner/attribute.rb",
|
30
|
+
"lib/data_miner/clone.rb",
|
30
31
|
"lib/data_miner/configuration.rb",
|
31
32
|
"lib/data_miner/dictionary.rb",
|
32
33
|
"lib/data_miner/import.rb",
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Clone
|
3
|
+
attr_accessor :configuration
|
4
|
+
attr_accessor :position_in_run
|
5
|
+
attr_accessor :description
|
6
|
+
attr_accessor :options
|
7
|
+
delegate :resource, :to => :configuration
|
8
|
+
|
9
|
+
def initialize(configuration, position_in_run, description, options = {})
|
10
|
+
DataMiner.log_or_raise "Clone has to be the first step." unless position_in_run == 0
|
11
|
+
DataMiner.log_or_raise "Clone needs :url" unless options[:url].present?
|
12
|
+
@configuration = configuration
|
13
|
+
@position_in_run = position_in_run
|
14
|
+
@description = description
|
15
|
+
@options = options
|
16
|
+
end
|
17
|
+
|
18
|
+
def inspect
|
19
|
+
"Clone(#{resource}): #{description}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def run(run)
|
23
|
+
download_sql_source
|
24
|
+
perform_sanity_check unless options[:sanity_check] == false
|
25
|
+
execute_sql_source
|
26
|
+
DataMiner.log_info "ran #{inspect}"
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
# from remote_table
|
32
|
+
def tempfile_path
|
33
|
+
return @_tempfile_path if @_tempfile_path
|
34
|
+
@_tempfile_path = Tempfile.open(options[:url].gsub(/[^a-z0-9]+/i, '_')[0,100]).path
|
35
|
+
FileUtils.rm_f @_tempfile_path
|
36
|
+
at_exit { FileUtils.rm_rf @_tempfile_path }
|
37
|
+
@_tempfile_path
|
38
|
+
end
|
39
|
+
|
40
|
+
def download_sql_source
|
41
|
+
cmd = %{
|
42
|
+
curl \
|
43
|
+
--silent \
|
44
|
+
--header "Expect: " \
|
45
|
+
--location \
|
46
|
+
"#{options[:url]}" \
|
47
|
+
--output "#{tempfile_path}"
|
48
|
+
}
|
49
|
+
`#{cmd}`
|
50
|
+
end
|
51
|
+
|
52
|
+
def perform_sanity_check
|
53
|
+
File.open(tempfile_path, 'r') do |infile|
|
54
|
+
while (line = infile.gets)
|
55
|
+
line_essence = line.gsub /[^\-\_\.a-zA-Z0-9]+/, ' '
|
56
|
+
if line_essence =~ /(INSERT\s+INTO|CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE\s+[^I]|DROP\s+TABLE\s+IF\s+EXISTS)\s+([^\s]+)/i
|
57
|
+
one = $1
|
58
|
+
two = $2
|
59
|
+
unless two.split('.').last == resource.table_name
|
60
|
+
DataMiner.log_or_raise %{
|
61
|
+
|
62
|
+
Warning: clone SQL tries to #{one} on `#{two}` instead of `#{resource.table_name}`. (#{line[0,100]}...)
|
63
|
+
|
64
|
+
If you want to ignore this, use clone 'X', :url => 'Y', :sanity_check => false
|
65
|
+
|
66
|
+
If you need to set a different table name, you could say set_table_name '#{two}' in your ActiveRecord model.
|
67
|
+
}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def execute_sql_source
|
75
|
+
mysql_config = ActiveRecord::Base.connection.instance_variable_get :@config
|
76
|
+
cmd = %{
|
77
|
+
mysql \
|
78
|
+
--batch \
|
79
|
+
#{"--host=\"#{mysql_config[:hostname]}\"" if mysql_config[:hostname].present?} \
|
80
|
+
--user="#{mysql_config[:username]}" \
|
81
|
+
--password="#{mysql_config[:password]}" \
|
82
|
+
--database="#{mysql_config[:database]}" \
|
83
|
+
--execute="SOURCE #{tempfile_path}"
|
84
|
+
}
|
85
|
+
`#{cmd}`
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -12,8 +12,13 @@ module DataMiner
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def process(method_name_or_block_description, &block)
|
15
|
-
self.runnable_counter += 1
|
16
15
|
runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
|
16
|
+
self.runnable_counter += 1
|
17
|
+
end
|
18
|
+
|
19
|
+
def clone(description, options = {})
|
20
|
+
runnables << DataMiner::Clone.new(self, runnable_counter, description, options)
|
21
|
+
self.runnable_counter += 1
|
17
22
|
end
|
18
23
|
|
19
24
|
def import(*args, &block)
|
@@ -24,10 +29,10 @@ module DataMiner
|
|
24
29
|
end
|
25
30
|
options = args.last
|
26
31
|
|
27
|
-
self.runnable_counter += 1
|
28
32
|
runnable = DataMiner::Import.new self, runnable_counter, description, options
|
29
33
|
Blockenspiel.invoke block, runnable
|
30
34
|
runnables << runnable
|
35
|
+
self.runnable_counter += 1
|
31
36
|
end
|
32
37
|
|
33
38
|
# Mine data for this class.
|
data/lib/data_miner.rb
CHANGED
data/test/data_miner_test.rb
CHANGED
@@ -438,6 +438,24 @@ class Airport < ActiveRecord::Base
|
|
438
438
|
end
|
439
439
|
end
|
440
440
|
|
441
|
+
class ClonedAirport < ActiveRecord::Base
|
442
|
+
set_primary_key :iata_code
|
443
|
+
set_table_name 'airports'
|
444
|
+
|
445
|
+
data_miner do
|
446
|
+
clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
class BadlyClonedAirport < ActiveRecord::Base
|
451
|
+
set_primary_key :iata_code
|
452
|
+
set_table_name 'badly_cloned_airports'
|
453
|
+
|
454
|
+
data_miner do
|
455
|
+
clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
441
459
|
class CensusRegion < ActiveRecord::Base
|
442
460
|
set_primary_key :number
|
443
461
|
|
@@ -884,7 +902,21 @@ end
|
|
884
902
|
|
885
903
|
# todo: have somebody properly organize these
|
886
904
|
class DataMinerTest < Test::Unit::TestCase
|
905
|
+
if ENV['NEW'] == 'true'
|
906
|
+
end
|
907
|
+
|
887
908
|
if ENV['FAST'] == 'true'
|
909
|
+
should "clone airports" do
|
910
|
+
ClonedAirport.run_data_miner!
|
911
|
+
assert ClonedAirport.count > 0
|
912
|
+
end
|
913
|
+
|
914
|
+
should "raise an error when the SQL for cloning doesn't seem to match up" do
|
915
|
+
assert_raises(RuntimeError) do
|
916
|
+
BadlyClonedAirport.run_data_miner!
|
917
|
+
end
|
918
|
+
end
|
919
|
+
|
888
920
|
should "import airports" do
|
889
921
|
Airport.run_data_miner!
|
890
922
|
assert Airport.count > 0
|
data/test/test_helper.rb
CHANGED
@@ -83,6 +83,10 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
83
83
|
end
|
84
84
|
execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
|
85
85
|
|
86
|
+
create_table 'badly_cloned_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
87
|
+
t.string 'placeholder'
|
88
|
+
end
|
89
|
+
|
86
90
|
create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
87
91
|
t.string 'iata_code'
|
88
92
|
t.string 'name'
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 15
|
9
|
+
version: 0.4.15
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -150,6 +150,7 @@ files:
|
|
150
150
|
- data_miner.gemspec
|
151
151
|
- lib/data_miner.rb
|
152
152
|
- lib/data_miner/attribute.rb
|
153
|
+
- lib/data_miner/clone.rb
|
153
154
|
- lib/data_miner/configuration.rb
|
154
155
|
- lib/data_miner/dictionary.rb
|
155
156
|
- lib/data_miner/import.rb
|