data_miner 0.4.14 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.14
1
+ 0.4.15
data/data_miner.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.14"
8
+ s.version = "0.4.15"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
27
27
  "data_miner.gemspec",
28
28
  "lib/data_miner.rb",
29
29
  "lib/data_miner/attribute.rb",
30
+ "lib/data_miner/clone.rb",
30
31
  "lib/data_miner/configuration.rb",
31
32
  "lib/data_miner/dictionary.rb",
32
33
  "lib/data_miner/import.rb",
@@ -0,0 +1,88 @@
1
+ module DataMiner
2
+ class Clone
3
+ attr_accessor :configuration
4
+ attr_accessor :position_in_run
5
+ attr_accessor :description
6
+ attr_accessor :options
7
+ delegate :resource, :to => :configuration
8
+
9
+ def initialize(configuration, position_in_run, description, options = {})
10
+ DataMiner.log_or_raise "Clone has to be the first step." unless position_in_run == 0
11
+ DataMiner.log_or_raise "Clone needs :url" unless options[:url].present?
12
+ @configuration = configuration
13
+ @position_in_run = position_in_run
14
+ @description = description
15
+ @options = options
16
+ end
17
+
18
+ def inspect
19
+ "Clone(#{resource}): #{description}"
20
+ end
21
+
22
+ def run(run)
23
+ download_sql_source
24
+ perform_sanity_check unless options[:sanity_check] == false
25
+ execute_sql_source
26
+ DataMiner.log_info "ran #{inspect}"
27
+ end
28
+
29
+ private
30
+
31
+ # from remote_table
32
+ def tempfile_path
33
+ return @_tempfile_path if @_tempfile_path
34
+ @_tempfile_path = Tempfile.open(options[:url].gsub(/[^a-z0-9]+/i, '_')[0,100]).path
35
+ FileUtils.rm_f @_tempfile_path
36
+ at_exit { FileUtils.rm_rf @_tempfile_path }
37
+ @_tempfile_path
38
+ end
39
+
40
+ def download_sql_source
41
+ cmd = %{
42
+ curl \
43
+ --silent \
44
+ --header "Expect: " \
45
+ --location \
46
+ "#{options[:url]}" \
47
+ --output "#{tempfile_path}"
48
+ }
49
+ `#{cmd}`
50
+ end
51
+
52
+ def perform_sanity_check
53
+ File.open(tempfile_path, 'r') do |infile|
54
+ while (line = infile.gets)
55
+ line_essence = line.gsub /[^\-\_\.a-zA-Z0-9]+/, ' '
56
+ if line_essence =~ /(INSERT\s+INTO|CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE\s+[^I]|DROP\s+TABLE\s+IF\s+EXISTS)\s+([^\s]+)/i
57
+ one = $1
58
+ two = $2
59
+ unless two.split('.').last == resource.table_name
60
+ DataMiner.log_or_raise %{
61
+
62
+ Warning: clone SQL tries to #{one} on `#{two}` instead of `#{resource.table_name}`. (#{line[0,100]}...)
63
+
64
+ If you want to ignore this, use clone 'X', :url => 'Y', :sanity_check => false
65
+
66
+ If you need to set a different table name, you could say set_table_name '#{two}' in your ActiveRecord model.
67
+ }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def execute_sql_source
75
+ mysql_config = ActiveRecord::Base.connection.instance_variable_get :@config
76
+ cmd = %{
77
+ mysql \
78
+ --batch \
79
+ #{"--host=\"#{mysql_config[:hostname]}\"" if mysql_config[:hostname].present?} \
80
+ --user="#{mysql_config[:username]}" \
81
+ --password="#{mysql_config[:password]}" \
82
+ --database="#{mysql_config[:database]}" \
83
+ --execute="SOURCE #{tempfile_path}"
84
+ }
85
+ `#{cmd}`
86
+ end
87
+ end
88
+ end
@@ -12,8 +12,13 @@ module DataMiner
12
12
  end
13
13
 
14
14
  def process(method_name_or_block_description, &block)
15
- self.runnable_counter += 1
16
15
  runnables << DataMiner::Process.new(self, runnable_counter, method_name_or_block_description, &block)
16
+ self.runnable_counter += 1
17
+ end
18
+
19
+ def clone(description, options = {})
20
+ runnables << DataMiner::Clone.new(self, runnable_counter, description, options)
21
+ self.runnable_counter += 1
17
22
  end
18
23
 
19
24
  def import(*args, &block)
@@ -24,10 +29,10 @@ module DataMiner
24
29
  end
25
30
  options = args.last
26
31
 
27
- self.runnable_counter += 1
28
32
  runnable = DataMiner::Import.new self, runnable_counter, description, options
29
33
  Blockenspiel.invoke block, runnable
30
34
  runnables << runnable
35
+ self.runnable_counter += 1
31
36
  end
32
37
 
33
38
  # Mine data for this class.
data/lib/data_miner.rb CHANGED
@@ -12,6 +12,7 @@ require 'data_miner/configuration'
12
12
  require 'data_miner/dictionary'
13
13
  require 'data_miner/import'
14
14
  require 'data_miner/process'
15
+ require 'data_miner/clone'
15
16
  require 'data_miner/run'
16
17
 
17
18
  module DataMiner
@@ -438,6 +438,24 @@ class Airport < ActiveRecord::Base
438
438
  end
439
439
  end
440
440
 
441
+ class ClonedAirport < ActiveRecord::Base
442
+ set_primary_key :iata_code
443
+ set_table_name 'airports'
444
+
445
+ data_miner do
446
+ clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
447
+ end
448
+ end
449
+
450
+ class BadlyClonedAirport < ActiveRecord::Base
451
+ set_primary_key :iata_code
452
+ set_table_name 'badly_cloned_airports'
453
+
454
+ data_miner do
455
+ clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
456
+ end
457
+ end
458
+
441
459
  class CensusRegion < ActiveRecord::Base
442
460
  set_primary_key :number
443
461
 
@@ -884,7 +902,21 @@ end
884
902
 
885
903
  # todo: have somebody properly organize these
886
904
  class DataMinerTest < Test::Unit::TestCase
905
+ if ENV['NEW'] == 'true'
906
+ end
907
+
887
908
  if ENV['FAST'] == 'true'
909
+ should "clone airports" do
910
+ ClonedAirport.run_data_miner!
911
+ assert ClonedAirport.count > 0
912
+ end
913
+
914
+ should "raise an error when the SQL for cloning doesn't seem to match up" do
915
+ assert_raises(RuntimeError) do
916
+ BadlyClonedAirport.run_data_miner!
917
+ end
918
+ end
919
+
888
920
  should "import airports" do
889
921
  Airport.run_data_miner!
890
922
  assert Airport.count > 0
data/test/test_helper.rb CHANGED
@@ -83,6 +83,10 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
83
83
  end
84
84
  execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
85
85
 
86
+ create_table 'badly_cloned_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
87
+ t.string 'placeholder'
88
+ end
89
+
86
90
  create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
87
91
  t.string 'iata_code'
88
92
  t.string 'name'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 14
9
- version: 0.4.14
8
+ - 15
9
+ version: 0.4.15
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -150,6 +150,7 @@ files:
150
150
  - data_miner.gemspec
151
151
  - lib/data_miner.rb
152
152
  - lib/data_miner/attribute.rb
153
+ - lib/data_miner/clone.rb
153
154
  - lib/data_miner/configuration.rb
154
155
  - lib/data_miner/dictionary.rb
155
156
  - lib/data_miner/import.rb