data_miner 0.4.14 → 0.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/data_miner.gemspec +2 -1
- data/lib/data_miner/clone.rb +88 -0
- data/lib/data_miner/configuration.rb +7 -2
- data/lib/data_miner.rb +1 -0
- data/test/data_miner_test.rb +32 -0
- data/test/test_helper.rb +4 -0
- metadata +3 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.15
|
data/data_miner.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{data_miner}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.15"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
|
|
27
27
|
"data_miner.gemspec",
|
28
28
|
"lib/data_miner.rb",
|
29
29
|
"lib/data_miner/attribute.rb",
|
30
|
+
"lib/data_miner/clone.rb",
|
30
31
|
"lib/data_miner/configuration.rb",
|
31
32
|
"lib/data_miner/dictionary.rb",
|
32
33
|
"lib/data_miner/import.rb",
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module DataMiner
|
2
|
+
class Clone
|
3
|
+
attr_accessor :configuration
|
4
|
+
attr_accessor :position_in_run
|
5
|
+
attr_accessor :description
|
6
|
+
attr_accessor :options
|
7
|
+
delegate :resource, :to => :configuration
|
8
|
+
|
9
|
+
def initialize(configuration, position_in_run, description, options = {})
|
10
|
+
DataMiner.log_or_raise "Clone has to be the first step." unless position_in_run == 0
|
11
|
+
DataMiner.log_or_raise "Clone needs :url" unless options[:url].present?
|
12
|
+
@configuration = configuration
|
13
|
+
@position_in_run = position_in_run
|
14
|
+
@description = description
|
15
|
+
@options = options
|
16
|
+
end
|
17
|
+
|
18
|
+
def inspect
|
19
|
+
"Clone(#{resource}): #{description}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def run(run)
|
23
|
+
download_sql_source
|
24
|
+
perform_sanity_check unless options[:sanity_check] == false
|
25
|
+
execute_sql_source
|
26
|
+
DataMiner.log_info "ran #{inspect}"
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
# from remote_table
|
32
|
+
def tempfile_path
|
33
|
+
return @_tempfile_path if @_tempfile_path
|
34
|
+
@_tempfile_path = Tempfile.open(options[:url].gsub(/[^a-z0-9]+/i, '_')[0,100]).path
|
35
|
+
FileUtils.rm_f @_tempfile_path
|
36
|
+
at_exit { FileUtils.rm_rf @_tempfile_path }
|
37
|
+
@_tempfile_path
|
38
|
+
end
|
39
|
+
|
40
|
+
def download_sql_source
|
41
|
+
cmd = %{
|
42
|
+
curl \
|
43
|
+
--silent \
|
44
|
+
--header "Expect: " \
|
45
|
+
--location \
|
46
|
+
"#{options[:url]}" \
|
47
|
+
--output "#{tempfile_path}"
|
48
|
+
}
|
49
|
+
`#{cmd}`
|
50
|
+
end
|
51
|
+
|
52
|
+
def perform_sanity_check
|
53
|
+
File.open(tempfile_path, 'r') do |infile|
|
54
|
+
while (line = infile.gets)
|
55
|
+
line_essence = line.gsub /[^\-\_\.a-zA-Z0-9]+/, ' '
|
56
|
+
if line_essence =~ /(INSERT\s+INTO|CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE\s+[^I]|DROP\s+TABLE\s+IF\s+EXISTS)\s+([^\s]+)/i
|
57
|
+
one = $1
|
58
|
+
two = $2
|
59
|
+
unless two.split('.').last == resource.table_name
|
60
|
+
DataMiner.log_or_raise %{
|
61
|
+
|
62
|
+
Warning: clone SQL tries to #{one} on `#{two}` instead of `#{resource.table_name}`. (#{line[0,100]}...)
|
63
|
+
|
64
|
+
If you want to ignore this, use clone 'X', :url => 'Y', :sanity_check => false
|
65
|
+
|
66
|
+
If you need to set a different table name, you could say set_table_name '#{two}' in your ActiveRecord model.
|
67
|
+
}
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def execute_sql_source
|
75
|
+
mysql_config = ActiveRecord::Base.connection.instance_variable_get :@config
|
76
|
+
cmd = %{
|
77
|
+
mysql \
|
78
|
+
--batch \
|
79
|
+
#{"--host=\"#{mysql_config[:hostname]}\"" if mysql_config[:hostname].present?} \
|
80
|
+
--user="#{mysql_config[:username]}" \
|
81
|
+
--password="#{mysql_config[:password]}" \
|
82
|
+
--database="#{mysql_config[:database]}" \
|
83
|
+
--execute="SOURCE #{tempfile_path}"
|
84
|
+
}
|
85
|
+
`#{cmd}`
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -12,8 +12,13 @@ module DataMiner
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def process(method_name_or_block_description, &block)
|
15
|
-
self.runnable_counter += 1
|
16
15
|
runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
|
16
|
+
self.runnable_counter += 1
|
17
|
+
end
|
18
|
+
|
19
|
+
def clone(description, options = {})
|
20
|
+
runnables << DataMiner::Clone.new(self, runnable_counter, description, options)
|
21
|
+
self.runnable_counter += 1
|
17
22
|
end
|
18
23
|
|
19
24
|
def import(*args, &block)
|
@@ -24,10 +29,10 @@ module DataMiner
|
|
24
29
|
end
|
25
30
|
options = args.last
|
26
31
|
|
27
|
-
self.runnable_counter += 1
|
28
32
|
runnable = DataMiner::Import.new self, runnable_counter, description, options
|
29
33
|
Blockenspiel.invoke block, runnable
|
30
34
|
runnables << runnable
|
35
|
+
self.runnable_counter += 1
|
31
36
|
end
|
32
37
|
|
33
38
|
# Mine data for this class.
|
data/lib/data_miner.rb
CHANGED
data/test/data_miner_test.rb
CHANGED
@@ -438,6 +438,24 @@ class Airport < ActiveRecord::Base
|
|
438
438
|
end
|
439
439
|
end
|
440
440
|
|
441
|
+
class ClonedAirport < ActiveRecord::Base
|
442
|
+
set_primary_key :iata_code
|
443
|
+
set_table_name 'airports'
|
444
|
+
|
445
|
+
data_miner do
|
446
|
+
clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
class BadlyClonedAirport < ActiveRecord::Base
|
451
|
+
set_primary_key :iata_code
|
452
|
+
set_table_name 'badly_cloned_airports'
|
453
|
+
|
454
|
+
data_miner do
|
455
|
+
clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
441
459
|
class CensusRegion < ActiveRecord::Base
|
442
460
|
set_primary_key :number
|
443
461
|
|
@@ -884,7 +902,21 @@ end
|
|
884
902
|
|
885
903
|
# todo: have somebody properly organize these
|
886
904
|
class DataMinerTest < Test::Unit::TestCase
|
905
|
+
if ENV['NEW'] == 'true'
|
906
|
+
end
|
907
|
+
|
887
908
|
if ENV['FAST'] == 'true'
|
909
|
+
should "clone airports" do
|
910
|
+
ClonedAirport.run_data_miner!
|
911
|
+
assert ClonedAirport.count > 0
|
912
|
+
end
|
913
|
+
|
914
|
+
should "raise an error when the SQL for cloning doesn't seem to match up" do
|
915
|
+
assert_raises(RuntimeError) do
|
916
|
+
BadlyClonedAirport.run_data_miner!
|
917
|
+
end
|
918
|
+
end
|
919
|
+
|
888
920
|
should "import airports" do
|
889
921
|
Airport.run_data_miner!
|
890
922
|
assert Airport.count > 0
|
data/test/test_helper.rb
CHANGED
@@ -83,6 +83,10 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
|
|
83
83
|
end
|
84
84
|
execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
|
85
85
|
|
86
|
+
create_table 'badly_cloned_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
87
|
+
t.string 'placeholder'
|
88
|
+
end
|
89
|
+
|
86
90
|
create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
|
87
91
|
t.string 'iata_code'
|
88
92
|
t.string 'name'
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 15
|
9
|
+
version: 0.4.15
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -150,6 +150,7 @@ files:
|
|
150
150
|
- data_miner.gemspec
|
151
151
|
- lib/data_miner.rb
|
152
152
|
- lib/data_miner/attribute.rb
|
153
|
+
- lib/data_miner/clone.rb
|
153
154
|
- lib/data_miner/configuration.rb
|
154
155
|
- lib/data_miner/dictionary.rb
|
155
156
|
- lib/data_miner/import.rb
|