data_miner 0.4.26 → 0.4.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -18,6 +18,7 @@ begin
18
18
  gem.add_dependency 'blockenspiel', '>=0.3.2'
19
19
  gem.add_dependency 'log4r', '>=1.1.7'
20
20
  gem.add_dependency 'errata', '>=0.2.1'
21
+ gem.add_dependency 'taps', '>=0.3.5'
21
22
  gem.add_development_dependency "loose_tight_dictionary", ">=0.0.5"
22
23
  gem.require_path = "lib"
23
24
  gem.files.include %w(lib/data_miner) unless gem.files.empty? # seems to fail once it's in the wild
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.26
1
+ 0.4.27
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{data_miner}
8
- s.version = "0.4.26"
8
+ s.version = "0.4.27"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-05-06}
12
+ s.date = %q{2010-05-07}
13
13
  s.description = %q{Mine remote data into your ActiveRecord models. You can also perform associations and convert units.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -27,12 +27,12 @@ Gem::Specification.new do |s|
27
27
  "data_miner.gemspec",
28
28
  "lib/data_miner.rb",
29
29
  "lib/data_miner/attribute.rb",
30
- "lib/data_miner/clone.rb",
31
30
  "lib/data_miner/configuration.rb",
32
31
  "lib/data_miner/dictionary.rb",
33
32
  "lib/data_miner/import.rb",
34
33
  "lib/data_miner/process.rb",
35
34
  "lib/data_miner/run.rb",
35
+ "lib/data_miner/tap.rb",
36
36
  "test/data_miner_test.rb",
37
37
  "test/test_helper.rb"
38
38
  ]
@@ -60,6 +60,7 @@ Gem::Specification.new do |s|
60
60
  s.add_runtime_dependency(%q<blockenspiel>, [">= 0.3.2"])
61
61
  s.add_runtime_dependency(%q<log4r>, [">= 1.1.7"])
62
62
  s.add_runtime_dependency(%q<errata>, [">= 0.2.1"])
63
+ s.add_runtime_dependency(%q<taps>, [">= 0.3.5"])
63
64
  s.add_development_dependency(%q<loose_tight_dictionary>, [">= 0.0.5"])
64
65
  else
65
66
  s.add_dependency(%q<remote_table>, [">= 0.2.20"])
@@ -70,6 +71,7 @@ Gem::Specification.new do |s|
70
71
  s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
71
72
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
72
73
  s.add_dependency(%q<errata>, [">= 0.2.1"])
74
+ s.add_dependency(%q<taps>, [">= 0.3.5"])
73
75
  s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.5"])
74
76
  end
75
77
  else
@@ -81,6 +83,7 @@ Gem::Specification.new do |s|
81
83
  s.add_dependency(%q<blockenspiel>, [">= 0.3.2"])
82
84
  s.add_dependency(%q<log4r>, [">= 1.1.7"])
83
85
  s.add_dependency(%q<errata>, [">= 0.2.1"])
86
+ s.add_dependency(%q<taps>, [">= 0.3.5"])
84
87
  s.add_dependency(%q<loose_tight_dictionary>, [">= 0.0.5"])
85
88
  end
86
89
  end
@@ -22,8 +22,8 @@ require 'data_miner/attribute'
22
22
  require 'data_miner/configuration'
23
23
  require 'data_miner/dictionary'
24
24
  require 'data_miner/import'
25
+ require 'data_miner/tap'
25
26
  require 'data_miner/process'
26
- require 'data_miner/clone'
27
27
  require 'data_miner/run'
28
28
 
29
29
  module DataMiner
@@ -69,6 +69,24 @@ module DataMiner
69
69
  def self.resource_names
70
70
  DataMiner::Configuration.resource_names
71
71
  end
72
+
73
+ # TODO this should probably live somewhere else
74
+ def self.backtick_with_reporting(cmd, raise_on_error = true)
75
+ cmd = cmd.gsub /\s+/m, ' '
76
+ output = `#{cmd}`
77
+ if raise_on_error and not $?.success?
78
+ raise %{
79
+ From the data_miner gem...
80
+
81
+ Command failed:
82
+ #{cmd}
83
+
84
+ Output:
85
+ #{output}
86
+ }
87
+ end
88
+ end
89
+
72
90
  end
73
91
 
74
92
  ActiveRecord::Base.class_eval do
@@ -16,8 +16,8 @@ module DataMiner
16
16
  self.step_counter += 1
17
17
  end
18
18
 
19
- def clone(description, options = {})
20
- steps << DataMiner::Clone.new(self, step_counter, description, options)
19
+ def tap(description, source, options = {})
20
+ steps << DataMiner::Tap.new(self, step_counter, description, source, options)
21
21
  self.step_counter += 1
22
22
  end
23
23
 
@@ -0,0 +1,137 @@
1
+
2
+ module DataMiner
3
+ class Tap
4
+ attr_reader :configuration
5
+ attr_reader :position_in_run
6
+ attr_reader :description
7
+ attr_reader :source
8
+ attr_reader :options
9
+ delegate :resource, :to => :configuration
10
+
11
+ def initialize(configuration, position_in_run, description, source, options = {})
12
+ options.symbolize_keys!
13
+ DataMiner.log_or_raise "Tap has to be the first step." unless position_in_run == 0
14
+ @configuration = configuration
15
+ @position_in_run = position_in_run
16
+ @description = description
17
+ @source = source
18
+ @options = options
19
+ end
20
+
21
+ def inspect
22
+ "Tap(#{resource}): #{description} (#{source})"
23
+ end
24
+
25
+ def run(run)
26
+ [ source_table_name, resource.table_name ].each do |possible_obstacle|
27
+ if connection.table_exists?(possible_obstacle)
28
+ connection.drop_table possible_obstacle
29
+ end
30
+ end
31
+ DataMiner.backtick_with_reporting taps_pull_cmd
32
+ if needs_table_rename?
33
+ connection.rename_table source_table_name, resource.table_name
34
+ end
35
+ DataMiner.log_info "ran #{inspect}"
36
+ end
37
+
38
+ private
39
+
40
+ def connection
41
+ ActiveRecord::Base.connection
42
+ end
43
+
44
+ def db_config
45
+ @_db_config ||= connection.instance_variable_get(:@config).dup.merge(options.except(:source_table_name))
46
+ end
47
+
48
+ def source_table_name
49
+ options[:source_table_name] || resource.table_name
50
+ end
51
+
52
+ def needs_table_rename?
53
+ source_table_name != resource.table_name
54
+ end
55
+
56
+ def adapter
57
+ case connection.adapter_name
58
+ when /mysql/i
59
+ 'mysql'
60
+ when /postgres/i
61
+ 'postgres'
62
+ when /sqlite/i
63
+ 'sqlite'
64
+ end
65
+ end
66
+
67
+ # never optional
68
+ def database
69
+ db_config[:database]
70
+ end
71
+
72
+ DEFAULT_PORTS = {
73
+ 'mysql' => 3306,
74
+ 'postgres' => 5432
75
+ }
76
+
77
+ DEFAULT_USERNAMES = {
78
+ 'mysql' => 'root',
79
+ 'postgres' => ''
80
+ }
81
+
82
+ DEFAULT_PASSWORDS = {}
83
+ DEFAULT_PASSWORDS.default = ''
84
+
85
+ DEFAULT_HOSTS = {}
86
+ DEFAULT_HOSTS.default = 'localhost'
87
+
88
+ %w{ username password port host }.each do |x|
89
+ module_eval %{
90
+ def #{x}
91
+ db_config[:#{x}] || DEFAULT_#{x.upcase}S[adapter]
92
+ end
93
+ }
94
+ end
95
+
96
+ def db_locator
97
+ case adapter
98
+ when 'mysql', 'postgres'
99
+ "#{username}:#{password}@#{host}:#{port}/#{database}"
100
+ when 'sqlite'
101
+ database
102
+ end
103
+ end
104
+
105
+ # taps pull mysql://root:password@localhost/taps_test http://foo:bar@data.brighterplanet.com:5000 --tables aircraft
106
+ def taps_pull_cmd
107
+ "taps pull #{adapter}://#{db_locator} #{source} --tables #{source_table_name}"
108
+ end
109
+
110
+ # 2.3.5 mysql
111
+ # * <tt>:host</tt> - Defaults to "localhost".
112
+ # * <tt>:port</tt> - Defaults to 3306.
113
+ # * <tt>:socket</tt> - Defaults to "/tmp/mysql.sock".
114
+ # * <tt>:username</tt> - Defaults to "root"
115
+ # * <tt>:password</tt> - Defaults to nothing.
116
+ # * <tt>:database</tt> - The name of the database. No default, must be provided.
117
+ # * <tt>:encoding</tt> - (Optional) Sets the client encoding by executing "SET NAMES <encoding>" after connection.
118
+ # * <tt>:reconnect</tt> - Defaults to false (See MySQL documentation: http://dev.mysql.com/doc/refman/5.0/en/auto-reconnect.html).
119
+ # * <tt>:sslca</tt> - Necessary to use MySQL with an SSL connection.
120
+ # * <tt>:sslkey</tt> - Necessary to use MySQL with an SSL connection.
121
+ # * <tt>:sslcert</tt> - Necessary to use MySQL with an SSL connection.
122
+ # * <tt>:sslcapath</tt> - Necessary to use MySQL with an SSL connection.
123
+ # * <tt>:sslcipher</tt> - Necessary to use MySQL with an SSL connection.
124
+ # 2.3.5 mysql
125
+ # * <tt>:host</tt> - Defaults to "localhost".
126
+ # * <tt>:port</tt> - Defaults to 5432.
127
+ # * <tt>:username</tt> - Defaults to nothing.
128
+ # * <tt>:password</tt> - Defaults to nothing.
129
+ # * <tt>:database</tt> - The name of the database. No default, must be provided.
130
+ # * <tt>:schema_search_path</tt> - An optional schema search path for the connection given as a string of comma-separated schema names. This is backward-compatible with the <tt>:schema_order</tt> option.
131
+ # * <tt>:encoding</tt> - An optional client encoding that is used in a <tt>SET client_encoding TO <encoding></tt> call on the connection.
132
+ # * <tt>:min_messages</tt> - An optional client min messages that is used in a <tt>SET client_min_messages TO <min_messages></tt> call on the connection.
133
+ # * <tt>:allow_concurrency</tt> - If true, use async query methods so Ruby threads don't deadlock; otherwise, use blocking query methods.
134
+ # 2.3.5 sqlite[3]
135
+ # * <tt>:database</tt> - Path to the database file.
136
+ end
137
+ end
@@ -441,21 +441,12 @@ class Airport < ActiveRecord::Base
441
441
  end
442
442
  end
443
443
 
444
- class ClonedAirport < ActiveRecord::Base
444
+ class TappedAirport < ActiveRecord::Base
445
445
  set_primary_key :iata_code
446
- set_table_name 'airports'
447
446
 
448
447
  data_miner do
449
- clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
450
- end
451
- end
452
-
453
- class BadlyClonedAirport < ActiveRecord::Base
454
- set_primary_key :iata_code
455
- set_table_name 'badly_cloned_airports'
456
-
457
- data_miner do
458
- clone 'a sanitized airports table', :url => 'http://data.brighterplanet.com/airports.sql', :sanity_check => true
448
+ tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@data.brighterplanet.com:5001", :source_table_name => 'airports'
449
+ # tap "Brighter Planet's sanitized airports table", "http://carbon:neutral@localhost:5000", :source_table_name => 'airports'
459
450
  end
460
451
  end
461
452
 
@@ -1063,7 +1054,7 @@ class AircraftDeux < ActiveRecord::Base
1063
1054
  %w{ D }.each do |letter|
1064
1055
  import("ICAO codes starting with letter #{letter} used by the FAA",
1065
1056
  :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-#{letter}.htm",
1066
- :encoding => 'US-ASCII',
1057
+ :encoding => 'windows-1252',
1067
1058
  :errata => 'http://spreadsheets.google.com/pub?key=tObVAGyqOkCBtGid0tJUZrw',
1068
1059
  :row_xpath => '//table/tr[2]/td/table/tr',
1069
1060
  :column_xpath => 'td') do
@@ -1094,23 +1085,17 @@ class DataMinerTest < Test::Unit::TestCase
1094
1085
  assert CrosscallingCensusDivision.exists? :name => 'Mountain Division', :number => 8, :census_region_number => 4, :census_region_name => 'West Region'
1095
1086
  assert CrosscallingCensusRegion.exists? :name => 'West Region', :number => 4
1096
1087
  end
1097
-
1098
- should "clone airports" do
1099
- ClonedAirport.run_data_miner!
1100
- assert ClonedAirport.count > 0
1101
- end
1102
-
1103
- should "raise an error when the SQL for cloning doesn't seem to match up" do
1104
- assert_raises(RuntimeError) do
1105
- BadlyClonedAirport.run_data_miner!
1106
- end
1107
- end
1108
-
1088
+
1109
1089
  should "import airports" do
1110
1090
  Airport.run_data_miner!
1111
1091
  assert Airport.count > 0
1112
1092
  end
1113
1093
 
1094
+ should "tap airports" do
1095
+ TappedAirport.run_data_miner!
1096
+ assert TappedAirport.count > 0
1097
+ end
1098
+
1114
1099
  should "pull in census divisions using a data.brighterplanet.com dictionary" do
1115
1100
  CensusDivision.run_data_miner!
1116
1101
  assert CensusDivision.count > 0
@@ -86,9 +86,9 @@ ActiveRecord::Schema.define(:version => 20090819143429) do
86
86
  t.integer 'data_miner_last_run_id'
87
87
  end
88
88
  execute 'ALTER TABLE t100_flight_segments ADD PRIMARY KEY (row_hash);'
89
-
90
- create_table 'badly_cloned_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
91
- t.string 'placeholder'
89
+
90
+ create_table 'tapped_airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
91
+ t.string 'i_am_just_here_to_get_in_the_way'
92
92
  end
93
93
 
94
94
  create_table 'airports', :force => true, :options => 'ENGINE=InnoDB default charset=utf8', :id => false do |t|
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 26
9
- version: 0.4.26
8
+ - 27
9
+ version: 0.4.27
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-06 00:00:00 -04:00
18
+ date: 2010-05-07 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -131,9 +131,23 @@ dependencies:
131
131
  type: :runtime
132
132
  version_requirements: *id008
133
133
  - !ruby/object:Gem::Dependency
134
- name: loose_tight_dictionary
134
+ name: taps
135
135
  prerelease: false
136
136
  requirement: &id009 !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ segments:
141
+ - 0
142
+ - 3
143
+ - 5
144
+ version: 0.3.5
145
+ type: :runtime
146
+ version_requirements: *id009
147
+ - !ruby/object:Gem::Dependency
148
+ name: loose_tight_dictionary
149
+ prerelease: false
150
+ requirement: &id010 !ruby/object:Gem::Requirement
137
151
  requirements:
138
152
  - - ">="
139
153
  - !ruby/object:Gem::Version
@@ -143,7 +157,7 @@ dependencies:
143
157
  - 5
144
158
  version: 0.0.5
145
159
  type: :development
146
- version_requirements: *id009
160
+ version_requirements: *id010
147
161
  description: Mine remote data into your ActiveRecord models. You can also perform associations and convert units.
148
162
  email: seamus@abshere.net
149
163
  executables: []
@@ -164,12 +178,12 @@ files:
164
178
  - data_miner.gemspec
165
179
  - lib/data_miner.rb
166
180
  - lib/data_miner/attribute.rb
167
- - lib/data_miner/clone.rb
168
181
  - lib/data_miner/configuration.rb
169
182
  - lib/data_miner/dictionary.rb
170
183
  - lib/data_miner/import.rb
171
184
  - lib/data_miner/process.rb
172
185
  - lib/data_miner/run.rb
186
+ - lib/data_miner/tap.rb
173
187
  - test/data_miner_test.rb
174
188
  - test/test_helper.rb
175
189
  has_rdoc: true
@@ -1,86 +0,0 @@
1
- module DataMiner
2
- class Clone
3
- attr_accessor :configuration
4
- attr_accessor :position_in_run
5
- attr_accessor :description
6
- attr_accessor :options
7
- delegate :resource, :to => :configuration
8
-
9
- def initialize(configuration, position_in_run, description, options = {})
10
- DataMiner.log_or_raise "Clone has to be the first step." unless position_in_run == 0
11
- DataMiner.log_or_raise "Clone needs :url" unless options[:url].present?
12
- @configuration = configuration
13
- @position_in_run = position_in_run
14
- @description = description
15
- @options = options
16
- end
17
-
18
- def inspect
19
- "Clone(#{resource}): #{description}"
20
- end
21
-
22
- def run(run)
23
- download_sql_source
24
- perform_sanity_check unless options[:sanity_check] == false
25
- execute_sql_source
26
- DataMiner.log_info "ran #{inspect}"
27
- end
28
-
29
- private
30
-
31
- def tempfile_path
32
- return @_tempfile_path if @_tempfile_path
33
- @_tempfile_path = File.join Dir.tmpdir, rand.to_s
34
- at_exit { FileUtils.rm_f @_tempfile_path }
35
- @_tempfile_path
36
- end
37
-
38
- def download_sql_source
39
- cmd = %{
40
- curl \
41
- --silent \
42
- --header "Expect: " \
43
- --location \
44
- "#{options[:url]}" \
45
- --output "#{tempfile_path}"
46
- }
47
- `#{cmd}`
48
- end
49
-
50
- def perform_sanity_check
51
- File.open(tempfile_path, 'r') do |infile|
52
- while (line = infile.gets)
53
- line_essence = line.gsub /[^\-\_\.a-zA-Z0-9]+/, ' '
54
- if line_essence =~ /(INSERT\s+INTO|CREATE\s+TABLE|ALTER\s+TABLE|DROP\s+TABLE\s+[^I]|DROP\s+TABLE\s+IF\s+EXISTS)\s+([^\s]+)/i
55
- one = $1
56
- two = $2
57
- unless two.split('.').last == resource.table_name
58
- DataMiner.log_or_raise %{
59
-
60
- Warning: clone SQL tries to #{one} on `#{two}` instead of `#{resource.table_name}`. (#{line[0,100]}...)
61
-
62
- If you want to ignore this, use clone 'X', :url => 'Y', :sanity_check => false
63
-
64
- If you need to set a different table name, you could say set_table_name '#{two}' in your ActiveRecord model.
65
- }
66
- end
67
- end
68
- end
69
- end
70
- end
71
-
72
- def execute_sql_source
73
- mysql_config = ActiveRecord::Base.connection.instance_variable_get :@config
74
- cmd = %{
75
- mysql \
76
- --batch \
77
- #{"--host=\"#{mysql_config[:hostname]}\"" if mysql_config[:hostname].present?} \
78
- --user="#{mysql_config[:username]}" \
79
- --password="#{mysql_config[:password]}" \
80
- --database="#{mysql_config[:database]}" \
81
- --execute="SOURCE #{tempfile_path}"
82
- }
83
- `#{cmd}`
84
- end
85
- end
86
- end