data_miner 0.4.14 → 0.4.15

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.14
1
+ 0.4.15
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.14"
8
+ s.version = "0.4.15"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
27
27
  "data_miner.gemspec",
28
28
  "lib/data_miner.rb",
29
29
  "lib/data_miner/attribute.rb",
30
+ "lib/data_miner/clone.rb",
30
31
  "lib/data_miner/configuration.rb",
31
32
  "lib/data_miner/dictionary.rb",
32
33
  "lib/data_miner/import.rb",
@@ -0,0 +1,88 @@
1
+ module DataMiner
2
+ class Clone
3
+ attr_accessor :configuration
4
+ attr_accessor :position_in_run
5
+ attr_accessor :description
6
+ attr_accessor :options
7
+ delegate :resource, :to => :configuration
8
+
9
+ def initialize(configuration, position_in_run, description, options = {})
10
+ DataMiner.log_or_raise "Clone has to be the first step." unless position_in_run == 0
11
+ DataMiner.log_or_raise "Clone needs :url" unless options[:url].present?
12
+ @configuration = configuration
13
+ @position_in_run = position_in_run
14
+ @description = description
15
+ @options = options
16
+ end
17
+
18
+ def inspect
19
+ "Clone(#{resource}): #{description}"
20
+ end
21
+
22
+ def run(run)
23
+ download_sql_source
24
+ perform_sanity_check unless options[:sanity_check] == false
25
+ execute_sql_source
26
+ DataMiner.log_info "ran #{inspect}"
27
+ end
28
+
29
+ private
30
+
31
+ # from remote_table
32
+ def tempfile_path
33
+ return @_tempfile_path if @_tempfile_path
34
+ @_tempfile_path = Tempfile.open(options[:url].gsub(/[^a-z0-9]+/i, '_')[0,100]).path
35
+ FileUtils.rm_f @_tempfile_path
36
+ at_exit { FileUtils.rm_rf @_tempfile_path }
37
+ @_tempfile_path
38
+ end
39
+
40
+ def download_sql_source
41
+ cmd = %{
42
+ curl \
43
+ --silent \
44
+ --header "Expect: " \
45
+ --location \
46
+ "#{options[:url]}" \
47
+ --output "#{tempfile_path}"
48
+ }
49
+ `#{cmd}`
50
+ end
51
+
52
+ def perform_sanity_check
53
+ File.open(tempfile_path, 'r') do |infile|
54
+ while (line = infile.gets)
55
+ line_essence = line.gsub /[^\-\_\.a-zA-Z0-9]+/, ' '
56
+ if line_essence =~ /(INSERT\s+INTO|CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE\s+[^I]|DROP\s+TABLE\s+IF\s+EXISTS)\s+([^\s]+)/i
57
+ one = $1
58
+ two = $2
59
+ unless two.split('.').last == resource.table_name
60
+ DataMiner.log_or_raise %{
61
+
62
+ Warning: clone SQL tries to #{one} on `#{two}` instead of `#{resource.table_name}`. (#{line[0,100]}...)
63
+
64
+ If you want to ignore this, use clone 'X', :url => 'Y', :sanity_check => false
65
+
66
+ If you need to set a different table name, you could say set_table_name '#{two}' in your ActiveRecord model.
67
+ }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def execute_sql_source
75
+ mysql_config = ActiveRecord::Base.connection.instance_variable_get :@config
76
+ cmd = %{
77
+ mysql \
78
+ --batch \
79
+ #{"--host=\"#{mysql_config[:hostname]}\"" if mysql_config[:hostname].present?} \
80
+ --user="#{mysql_config[:username]}" \
81
+ --password="#{mysql_config[:password]}" \
82
+ --database="#{mysql_config[:database]}" \
83
+ --execute="SOURCE #{tempfile_path}"
84
+ }
85
+ `#{cmd}`
86
+ end
87
+ end
88
+ end
@@ -12,8 +12,13 @@ module DataMiner
12
12
  end
13
13
 
14
14
  def process(method_name_or_block_description, &block)
15
- self.runnable_counter += 1
16
15
  runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
16
+ self.runnable_counter += 1
17
+ end
18
+
19
+ def clone(description, options = {})
20
+ runnables << DataMiner::Clone.new(self, runnable_counter, description, options)
21
+ self.runnable_counter += 1
17
22
  end
18
23
 
19
24
  def import(*args, &block)
@@ -24,10 +29,10 @@ module DataMiner
24
29
  end
25
30
  options = args.last
26
31
 
27
- self.runnable_counter += 1
28
32
  runnable = DataMiner::Import.new self, runnable_counter, description, options
29
33
  Blockenspiel.invoke block, runnable
30
34
  runnables << runnable
35
+ self.runnable_counter += 1
31
36
  end
32
37
 
33
38
  # Mine data for this class.
data/lib/data_miner.rb CHANGED
@@ -12,6 +12,7 @@ require 'data_miner/configuration'
12
12
  require 'data_miner/dictionary'
13
13
  require 'data_miner/import'
14
14
  require 'data_miner/process'
15
+ require 'data_miner/clone'
15
16
  require 'data_miner/run'
16
17
 
17
18
  module DataMiner
@@ -438,6 +438,24 @@ class Airport < ActiveRecord::Base
438
438
  end
439
439
  end
440
440
 
441
+ class ClonedAirport < ActiveRecord::Base
442
+ set_primary_key :iata_code
443
+ set_table_name 'airports'
444
+
445
+ data_miner do
446
+ clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
447
+ end
448
+ end
449
+
450
+ class BadlyClonedAirport < ActiveRecord::Base
451
+ set_primary_key :iata_code
452
+ set_table_name 'badly_cloned_airports'
453
+
454
+ data_miner do
455
+ clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
456
+ end
457
+ end
458
+
441
459
  class CensusRegion < ActiveRecord::Base
442
460
  set_primary_key :number
443
461
 
@@ -884,7 +902,21 @@ end
884
902
 
885
903
  # todo: have somebody properly organize these
886
904
  class DataMinerTest < Test::Unit::TestCase
905
+ if ENV['NEW'] == 'true'
906
+ end
907
+
887
908
  if ENV['FAST'] == 'true'
909
+ should "clone airports" do
910
+ ClonedAirport.run_data_miner!
911
+ assert ClonedAirport.count > 0
912
+ end
913
+
914
+ should "raise an error when the SQL for cloning doesn't seem to match up" do
915
+ assert_raises(RuntimeError) do
916
+ BadlyClonedAirport.run_data_miner!
917
+ end
918
+ end
919
+
888
920
  should "import airports" do
889
921
  Airport.run_data_miner!
890
922
  assert Airport.count > 0
data/test/test_helper.rb CHANGED
@@ -83,6 +83,10 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
83
83
  end
84
84
  execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
85
85
 
86
+ create_table 'badly_cloned_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
87
+ t.string 'placeholder'
88
+ end
89
+
86
90
  create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
87
91
  t.string 'iata_code'
88
92
  t.string 'name'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 14
9
- version: 0.4.14
8
+ - 15
9
+ version: 0.4.15
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -150,6 +150,7 @@ files:
150
150
  - data_miner.gemspec
151
151
  - lib/data_miner.rb
152
152
  - lib/data_miner/attribute.rb
153
+ - lib/data_miner/clone.rb
153
154
  - lib/data_miner/configuration.rb
154
155
  - lib/data_miner/dictionary.rb
155
156
  - lib/data_miner/import.rb