impaler 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +36 -0
- data/Rakefile +1 -0
- data/impaler.gemspec +27 -0
- data/lib/impaler.rb +42 -0
- data/lib/impaler/manager.rb +164 -0
- data/lib/impaler/version.rb +3 -0
- data/spec/impaler_connected_spec.rb +211 -0
- data/spec/impaler_spec.rb +91 -0
- metadata +151 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 LivingSocial Inc
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Impaler
|
2
|
+
|
3
|
+
Impaler combines the best of Impala and Hive. Queries are run on Impala and if it fails there it will fallback to running the query in Hive.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'impaler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install impaler
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Basic Usage
|
22
|
+
|
23
|
+
require 'impaler'
|
24
|
+
c = Impaler.connect(['impala_server:21000'], ['hivethrift_server:10000'])
|
25
|
+
c.query("select count(*) from my_table") # This will run in Impala
|
26
|
+
c.query("select name, collect_set(foo) from my_table") # This will run in Hive (after a quick error on Impala)
|
27
|
+
c.query("select count(*) from my_table", Impaler::HIVE_ONLY) # This is forced to run on Hive
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
|
31
|
+
1. Fork it
|
32
|
+
1. Create your feature branch (`git checkout -b my-new-feature`)
|
33
|
+
1. Test your changes in both connected and unconnected modes (`rspec` and `IMPALA_SERVER=server:21000 HIVETHRIFT_SERVER=server:10000 TEST_TABLE=my_test_table TEST_TABLE_COLUMN=some_test_column rspec`
|
34
|
+
1. Commit your changes (`git commit -am 'Add some feature'`)
|
35
|
+
1. Push to the branch (`git push origin my-new-feature`)
|
36
|
+
1. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/impaler.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'impaler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "impaler"
|
8
|
+
spec.version = Impaler::VERSION
|
9
|
+
spec.authors = ["John Meagher","Trent Albright"]
|
10
|
+
spec.email = ["john.meagher@gmail.com","trent.albright@gmail.com"]
|
11
|
+
spec.description = %q{Wrapper around Impala and Hive gems}
|
12
|
+
spec.summary = %q{Run in Impala when possible and fall back to Hive when needed}
|
13
|
+
spec.homepage = "https://github.com/livingsocial/impaler"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "impala"
|
22
|
+
spec.add_dependency "rbhive"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
end
|
data/lib/impaler.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require "impaler/version"
|
2
|
+
|
3
|
+
require "impaler/manager"
|
4
|
+
|
5
|
+
module Impaler
|
6
|
+
|
7
|
+
class ImpalerError < StandardError; end
|
8
|
+
class ConnectionError < ImpalerError; end
|
9
|
+
class QueryError < ImpalerError; end
|
10
|
+
|
11
|
+
class ImpalerDefaultLogger < Logger
|
12
|
+
def initialize
|
13
|
+
super(STDOUT)
|
14
|
+
self.level = Logger::WARN
|
15
|
+
end
|
16
|
+
end
|
17
|
+
DEFAULT_LOGGER = ImpalerDefaultLogger.new()
|
18
|
+
|
19
|
+
# Connect to the servers and optionally execute a block of code
|
20
|
+
# with the servers.
|
21
|
+
# @param [String] host:port for the impala server or an array of host:port to pick from many
|
22
|
+
# @param [String] host:port for the hive thirft server (v1) or an array of host:port to pick from many
|
23
|
+
# @yieldparam [Connection] conn the open connection. Will be closed once the block
|
24
|
+
# finishes
|
25
|
+
# @return [Connection] the open connection, or, if a block is
|
26
|
+
# passed, the return value of the block
|
27
|
+
def self.connect(impala_servers, hivethrift_servers, logger=Impaler::DEFAULT_LOGGER)
|
28
|
+
manager = Manager.new(impala_servers, hivethrift_servers, logger=logger)
|
29
|
+
|
30
|
+
if block_given?
|
31
|
+
begin
|
32
|
+
ret = yield manager
|
33
|
+
ensure
|
34
|
+
manager.close
|
35
|
+
end
|
36
|
+
else
|
37
|
+
ret = manager
|
38
|
+
end
|
39
|
+
|
40
|
+
ret
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
require "rbhive"
|
2
|
+
require "impala"
|
3
|
+
|
4
|
+
module Impaler
|
5
|
+
|
6
|
+
IMPALA_THEN_HIVE = 1
|
7
|
+
HIVE_ONLY = 2
|
8
|
+
IMPALA_ONLY = 3
|
9
|
+
|
10
|
+
class Manager
|
11
|
+
|
12
|
+
def initialize(impala_servers, hivethrift_servers, logger=Impaler::DEFAULT_LOGGER)
|
13
|
+
if impala_servers.nil? and hivethrift_servers.nil?
|
14
|
+
raise Impaler::ConnectionError.new("No impaler or hive servers were specified, at least one is required")
|
15
|
+
end
|
16
|
+
|
17
|
+
if !impala_servers.nil?
|
18
|
+
if impala_servers.respond_to?(:choice)
|
19
|
+
@impala_servers=impala_servers
|
20
|
+
else
|
21
|
+
@impala_servers=[impala_servers]
|
22
|
+
end
|
23
|
+
|
24
|
+
impala_server = @impala_servers.choice.split(":")
|
25
|
+
@impala_host = impala_server[0]
|
26
|
+
@impala_port = impala_server[1]
|
27
|
+
end
|
28
|
+
|
29
|
+
if !hivethrift_servers.nil?
|
30
|
+
if hivethrift_servers.respond_to?(:choice)
|
31
|
+
@hivethrift_servers=hivethrift_servers
|
32
|
+
else
|
33
|
+
@hivethrift_servers=[hivethrift_servers]
|
34
|
+
end
|
35
|
+
hivethrift_server = @hivethrift_servers.choice.split(":")
|
36
|
+
@hivethrift_host = hivethrift_server[0]
|
37
|
+
@hivethrift_port = hivethrift_server[1]
|
38
|
+
end
|
39
|
+
|
40
|
+
@logger = logger
|
41
|
+
open
|
42
|
+
end
|
43
|
+
|
44
|
+
def open
|
45
|
+
connected=false
|
46
|
+
if !@impala_host.nil? && !@impala_port.nil?
|
47
|
+
@logger.debug "Impala connection #{@impala_host}:#{@impala_port}"
|
48
|
+
@impala_connection = Impala.connect(@impala_host, @impala_port)
|
49
|
+
@impala_connection.open
|
50
|
+
@impala_connection.refresh
|
51
|
+
connected=true
|
52
|
+
else
|
53
|
+
@impala_connection = nil
|
54
|
+
end
|
55
|
+
|
56
|
+
if !@hivethrift_host.nil? && !@hivethrift_port.nil?
|
57
|
+
@logger.debug "Hivethrift connection #{@hivethrift_host}:#{@hivethrift_port}"
|
58
|
+
@hivethrift_connection = RBHive::Connection.new(@hivethrift_host, @hivethrift_port)
|
59
|
+
@hivethrift_connection.open
|
60
|
+
connected=true
|
61
|
+
else
|
62
|
+
@hivethrift_connection = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
if !connected
|
66
|
+
raise Impaler::ConnectionError.new("All connections failed")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def close
|
71
|
+
if !@impala_connection.nil?
|
72
|
+
@impala_connection.close
|
73
|
+
@impala_connection = nil
|
74
|
+
end
|
75
|
+
|
76
|
+
if !@hivethrift_connection.nil?
|
77
|
+
@hivethrift_connection.close
|
78
|
+
@hivethrift_connection = nil
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# ###########################################################################
|
84
|
+
# General use methods
|
85
|
+
|
86
|
+
def query(sql, query_mode = Impaler::IMPALA_THEN_HIVE)
|
87
|
+
ret = nil
|
88
|
+
error = nil
|
89
|
+
success = false
|
90
|
+
unless query_mode == Impaler::HIVE_ONLY or @impala_connection.nil?
|
91
|
+
begin
|
92
|
+
@logger.debug "Trying query in impala"
|
93
|
+
ret = @impala_connection.query(sql)
|
94
|
+
@logger.debug "Successful query in impala"
|
95
|
+
success = true
|
96
|
+
rescue StandardError => e
|
97
|
+
error = e
|
98
|
+
@logger.warn "Impala error: #{e}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
unless @hivethrift_connection.nil? || success || query_mode == Impaler::IMPALA_ONLY
|
103
|
+
begin
|
104
|
+
@logger.debug "Trying query in hive"
|
105
|
+
ret = @hivethrift_connection.fetch(sql)
|
106
|
+
@logger.debug "Successful query in hive"
|
107
|
+
success = true
|
108
|
+
rescue StandardError => e
|
109
|
+
error = e
|
110
|
+
@logger.warn "Hive error: #{e}"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
if !success && !error.nil?
|
115
|
+
raise error
|
116
|
+
elsif !success
|
117
|
+
raise Impaler::QueryError.new("Query did not run due to no connections being available")
|
118
|
+
end
|
119
|
+
return ret
|
120
|
+
end
|
121
|
+
|
122
|
+
def set(key, value)
|
123
|
+
# Only run on hive since that's the only one that supports set for now
|
124
|
+
if !@hivethrift_connection.nil?
|
125
|
+
@hivethrift_connection.set(key, value)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
# ###########################################################################
|
131
|
+
# Helper query methods
|
132
|
+
|
133
|
+
def row_count(tablename)
|
134
|
+
query("SELECT COUNT(1) c FROM #{tablename}").first[:c]
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
# ###########################################################################
|
142
|
+
# Metadata methods
|
143
|
+
|
144
|
+
def columns(tablename)
|
145
|
+
desc = {}
|
146
|
+
(query "describe #{tablename}").each { |col|
|
147
|
+
cname=col[:name] || col[:col_name]
|
148
|
+
ctype=col[:type] || col[:data_type]
|
149
|
+
desc[cname.intern] = ctype.intern
|
150
|
+
}
|
151
|
+
desc
|
152
|
+
end
|
153
|
+
|
154
|
+
def tables(pattern=nil)
|
155
|
+
q = "SHOW TABLES" + ((pattern.nil?) ? "" : " '#{pattern}'")
|
156
|
+
query(q).collect { |table|
|
157
|
+
table[:name] || table[:tab_name]
|
158
|
+
}
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'impaler'
|
2
|
+
|
3
|
+
# These tests cover the Impaler interaction with a real impala and hive server.
|
4
|
+
# To run these define environment variables for IMPALA_SERVER and HIVETHRIFT_SERVER
|
5
|
+
# in the format "server:port".
|
6
|
+
|
7
|
+
IMPALA_SERVER=ENV['IMPALA_SERVER']
|
8
|
+
HIVETHRIFT_SERVER=ENV['HIVETHRIFT_SERVER']
|
9
|
+
has_servers=!IMPALA_SERVER.nil? && !HIVETHRIFT_SERVER.nil?
|
10
|
+
IMPALA_SERVERS=[IMPALA_SERVER]
|
11
|
+
HIVETHRIFT_SERVERS=[HIVETHRIFT_SERVER]
|
12
|
+
|
13
|
+
TEST_TABLE=ENV['TEST_TABLE']
|
14
|
+
TEST_TABLE_COLUMN=ENV['TEST_TABLE_COLUMN']
|
15
|
+
has_tables=!TEST_TABLE.nil? && !TEST_TABLE_COLUMN.nil?
|
16
|
+
|
17
|
+
run_tests=has_tables && has_servers
|
18
|
+
|
19
|
+
SKIP_SLOW=!ENV['SKIP_SLOW'].nil?
|
20
|
+
|
21
|
+
def connect
|
22
|
+
Impaler.connect(IMPALA_SERVERS, HIVETHRIFT_SERVERS)
|
23
|
+
end
|
24
|
+
|
25
|
+
def connect_impala
|
26
|
+
Impaler.connect(IMPALA_SERVERS, nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
def connect_hivethrift
|
30
|
+
Impaler.connect(nil, HIVETHRIFT_SERVERS)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
describe Impaler, :if => run_tests do
|
35
|
+
it("Connected skip", :if=>!run_tests) {
|
36
|
+
puts "Skipping connected tests for Impaler, set the environment variables IMPALA_SERVER, HIVETHRIFT_SERVER, TEST_TABLE, and TEST_TABLE_COLUMN to enable these"
|
37
|
+
puts "IMPALA_SERVER and HIVETHRIFT_SERVER are in the format 'server:port'"
|
38
|
+
puts "TEST_TABLE should be a fairly small table for quick tests and TEST_TABLE_COLUMN will be used for some simple test queries where Impala is known to not support the query"
|
39
|
+
puts "Optionally set the environment varialbe SKIP_SLOW=true to skip the hive invocations which are a bit slow"
|
40
|
+
}
|
41
|
+
|
42
|
+
|
43
|
+
describe "connect" do
|
44
|
+
it "connects without error" do
|
45
|
+
expect { Impaler::connect(IMPALA_SERVERS, HIVETHRIFT_SERVERS) }.not_to raise_error
|
46
|
+
expect { Impaler::connect(IMPALA_SERVERS, nil) }.not_to raise_error
|
47
|
+
expect { Impaler::connect(nil, HIVETHRIFT_SERVERS) }.not_to raise_error
|
48
|
+
end
|
49
|
+
|
50
|
+
it "connects with single value server entries without error" do
|
51
|
+
expect { Impaler::connect(IMPALA_SERVER, HIVETHRIFT_SERVER) }.not_to raise_error
|
52
|
+
expect { Impaler::connect(IMPALA_SERVER, nil) }.not_to raise_error
|
53
|
+
expect { Impaler::connect(nil, HIVETHRIFT_SERVER) }.not_to raise_error
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe "close" do
|
58
|
+
it "opens and closes without errors" do
|
59
|
+
expect { connect.close }.not_to raise_error
|
60
|
+
expect { connect_impala.close }.not_to raise_error
|
61
|
+
expect { connect_hivethrift.close }.not_to raise_error
|
62
|
+
end
|
63
|
+
|
64
|
+
it "fails after a close" do
|
65
|
+
expect {
|
66
|
+
c=connect
|
67
|
+
c.close
|
68
|
+
c.query "select count(*) from #{TEST_TABLE}"
|
69
|
+
}.to raise_error(Impaler::QueryError)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe "simple query" do
|
74
|
+
it "supports a count(*) query" do
|
75
|
+
c = connect
|
76
|
+
count = (c.query "select count(*) c from #{TEST_TABLE}").first[:c]
|
77
|
+
(c.query "select count(*) c from #{TEST_TABLE}").first[:c].should eq(count)
|
78
|
+
(c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_ONLY).first[:c].should eq(count)
|
79
|
+
(c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_THEN_HIVE).first[:c].should eq(count)
|
80
|
+
if !SKIP_SLOW
|
81
|
+
(c.query "select count(*) c from #{TEST_TABLE}", Impaler::HIVE_ONLY).first[:c].should eq(count)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
it "fails with garbage queries" do
|
86
|
+
c = connect
|
87
|
+
expect { c.query "select sdffdsa from lkjasdfjkhadf", Impaler::IMPALA_ONLY }.to raise_error(Impala::Protocol::Beeswax::BeeswaxException)
|
88
|
+
expect { c.query "select sdffdsa from lkjasdfjkhadf", Impaler::HIVE_ONLY }.to raise_error(HiveServerException)
|
89
|
+
expect { c.query "select sdffdsa from lkjasdfjkhadf" }.to raise_error(HiveServerException)
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "unsupported impala queries", :unless => SKIP_SLOW do
|
95
|
+
|
96
|
+
it "fails when run with impala only" do
|
97
|
+
c = connect
|
98
|
+
expect { c.query "select collect_set(#{TEST_TABLE_COLUMN}) from #{TEST_TABLE}", Impaler::IMPALA_ONLY }.to raise_error(Impala::Protocol::Beeswax::BeeswaxException)
|
99
|
+
end
|
100
|
+
|
101
|
+
it "falls back to hive if impala generates an error" do
|
102
|
+
c = connect
|
103
|
+
expect { c.query "select collect_set(#{TEST_TABLE_COLUMN}) from #{TEST_TABLE}" }.not_to raise_error
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
describe "handles down servers" do
|
109
|
+
it "handles having no impala server" do
|
110
|
+
c = connect_hivethrift
|
111
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_ONLY }.to raise_error(Impaler::QueryError)
|
112
|
+
if !SKIP_SLOW
|
113
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::HIVE_ONLY }.not_to raise_error
|
114
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_THEN_HIVE }.not_to raise_error
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
it "handles having no hive server" do
|
119
|
+
c = connect_impala
|
120
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::HIVE_ONLY }.to raise_error(Impaler::QueryError)
|
121
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_ONLY }.not_to raise_error
|
122
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_THEN_HIVE }.not_to raise_error
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe "query consistency", :unless => SKIP_SLOW do
|
127
|
+
|
128
|
+
it "queries return the same regardless of connection type" do
|
129
|
+
q="select * from #{TEST_TABLE} limit 5"
|
130
|
+
c = connect
|
131
|
+
base = c.query(q)
|
132
|
+
|
133
|
+
t=connect_impala.query(q)
|
134
|
+
expect(t).to eq(base)
|
135
|
+
|
136
|
+
t=connect_hivethrift.query(q)
|
137
|
+
expect(t).to eq(base)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "columns method works" do
|
142
|
+
|
143
|
+
it "columns returns the same regardless of connection type" do
|
144
|
+
c = connect
|
145
|
+
base = c.columns(TEST_TABLE)
|
146
|
+
|
147
|
+
t=connect_impala.columns(TEST_TABLE)
|
148
|
+
expect(t).to eq(base)
|
149
|
+
|
150
|
+
t=connect_hivethrift.columns(TEST_TABLE)
|
151
|
+
expect(t).to eq(base)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
describe "row_count method works", :unless => SKIP_SLOW do
|
156
|
+
|
157
|
+
it "row_count returns the same regardless of connection type" do
|
158
|
+
c = connect
|
159
|
+
base = c.row_count(TEST_TABLE)
|
160
|
+
|
161
|
+
t=connect_impala.row_count(TEST_TABLE)
|
162
|
+
expect(t).to eq(base)
|
163
|
+
|
164
|
+
t=connect_hivethrift.row_count(TEST_TABLE)
|
165
|
+
expect(t).to eq(base)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
|
170
|
+
describe "tables method works" do
|
171
|
+
|
172
|
+
it "appears to work correctly in a basic mode" do
|
173
|
+
t=connect.tables
|
174
|
+
expect(t).to include(TEST_TABLE)
|
175
|
+
end
|
176
|
+
|
177
|
+
it "works with patterns" do
|
178
|
+
t=connect.tables("#{TEST_TABLE[0,2]}*")
|
179
|
+
expect(t).to include(TEST_TABLE)
|
180
|
+
|
181
|
+
t=connect_impala.tables("#{TEST_TABLE[0,2]}*")
|
182
|
+
expect(t).to include(TEST_TABLE)
|
183
|
+
|
184
|
+
t=connect_hivethrift.tables("#{TEST_TABLE[0,2]}*")
|
185
|
+
expect(t).to include(TEST_TABLE)
|
186
|
+
end
|
187
|
+
|
188
|
+
it "tables returns the same regardless of connection type" do
|
189
|
+
c = connect
|
190
|
+
base = c.tables(TEST_TABLE)
|
191
|
+
|
192
|
+
t=connect_impala.tables(TEST_TABLE)
|
193
|
+
expect(t).to eq(base)
|
194
|
+
|
195
|
+
t=connect_hivethrift.tables(TEST_TABLE)
|
196
|
+
expect(t).to eq(base)
|
197
|
+
end
|
198
|
+
|
199
|
+
it "tables returns the same with no args regardless of connection type" do
|
200
|
+
c = connect
|
201
|
+
base = c.tables
|
202
|
+
|
203
|
+
t=connect_impala.tables
|
204
|
+
expect(t).to eq(base)
|
205
|
+
|
206
|
+
t=connect_hivethrift.tables
|
207
|
+
expect(t).to eq(base)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'impaler'
|
2
|
+
|
3
|
+
# These tests cover the internals of the Impaler library in isolation as much as possible
|
4
|
+
|
5
|
+
describe Impaler do
|
6
|
+
before(:each) do
|
7
|
+
RBHive::Connection.any_instance.stub(:open).and_return(true)
|
8
|
+
Impala::Connection.any_instance.stub(:open).and_return(true)
|
9
|
+
Impala::Connection.any_instance.stub(:refresh).and_return(true)
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "connect" do
|
13
|
+
it "fails with no servers" do
|
14
|
+
expect { Impaler::connect(nil, nil) }.to raise_error(Impaler::ConnectionError)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "handles valid server strings" do
|
18
|
+
|
19
|
+
impala_server = "impala"
|
20
|
+
impala_port = "123"
|
21
|
+
hivethrift_server = "hivethrift"
|
22
|
+
hivethrift_port = "456"
|
23
|
+
|
24
|
+
verify = lambda{|conn, check_impala, check_hivethrift|
|
25
|
+
if check_impala
|
26
|
+
conn.instance_variable_get(:@impala_host).should eq(impala_server)
|
27
|
+
conn.instance_variable_get(:@impala_port).should eq(impala_port)
|
28
|
+
end
|
29
|
+
if check_hivethrift
|
30
|
+
conn.instance_variable_get(:@hivethrift_host).should eq(hivethrift_server)
|
31
|
+
conn.instance_variable_get(:@hivethrift_port).should eq(hivethrift_port)
|
32
|
+
end
|
33
|
+
}
|
34
|
+
|
35
|
+
#Impaler::Manager.any_instance.stub(:open).and_return(true)
|
36
|
+
impala = "#{impala_server}:#{impala_port}"
|
37
|
+
hivethrift = "#{hivethrift_server}:#{hivethrift_port}"
|
38
|
+
|
39
|
+
verify.call( Impaler.connect( impala, nil ), true, false)
|
40
|
+
verify.call( Impaler.connect( nil, hivethrift ), false, true)
|
41
|
+
verify.call( Impaler.connect( impala, hivethrift ), true, true)
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "handles errors" do
|
47
|
+
let(:conn) { Impaler.connect("impala:999","hive:999") }
|
48
|
+
let(:impala_conn) { Impaler.connect("impala:999",nil) }
|
49
|
+
let(:hive_conn) { Impaler.connect(nil,"hive:999") }
|
50
|
+
let(:q) { "select count(*) from foo" }
|
51
|
+
let(:q_return) { [{:c=>1}] }
|
52
|
+
|
53
|
+
it "handles the no error state and uses impala" do
|
54
|
+
Impala::Connection.any_instance.stub(:query).and_return(q_return)
|
55
|
+
RBHive::Connection.any_instance.stub(:fetch).and_raise(StandardError)
|
56
|
+
conn.query(q).should eq(q_return)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "handles an impala error and falls back to hive" do
|
60
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
61
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
62
|
+
conn.query(q).should eq(q_return)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "failure of both throws an error" do
|
66
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
67
|
+
RBHive::Connection.any_instance.stub(:fetch).and_raise(StandardError)
|
68
|
+
expect { conn.query(q) }.to raise_error(StandardError)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "impala only throws an error if impala errs" do
|
72
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
73
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
74
|
+
expect { impala_conn.query(q) }.to raise_error(StandardError)
|
75
|
+
end
|
76
|
+
|
77
|
+
it "skipping impala works" do
|
78
|
+
Impala::Connection.any_instance.stub(:query).and_return([])
|
79
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
80
|
+
conn.query(q, Impaler::HIVE_ONLY).should eq(q_return)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "skipping hive works" do
|
84
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
85
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
86
|
+
expect { conn.query(q, Impaler::IMPALA_ONLY) }.to raise_error(StandardError)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
end
|
metadata
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: impaler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- John Meagher
|
14
|
+
- Trent Albright
|
15
|
+
autorequire:
|
16
|
+
bindir: bin
|
17
|
+
cert_chain: []
|
18
|
+
|
19
|
+
date: 2013-09-09 00:00:00 -04:00
|
20
|
+
default_executable:
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: impala
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
hash: 3
|
31
|
+
segments:
|
32
|
+
- 0
|
33
|
+
version: "0"
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rbhive
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 3
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: bundler
|
52
|
+
prerelease: false
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ~>
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
hash: 9
|
59
|
+
segments:
|
60
|
+
- 1
|
61
|
+
- 3
|
62
|
+
version: "1.3"
|
63
|
+
type: :development
|
64
|
+
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: rake
|
67
|
+
prerelease: false
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
hash: 3
|
74
|
+
segments:
|
75
|
+
- 0
|
76
|
+
version: "0"
|
77
|
+
type: :development
|
78
|
+
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
name: rspec
|
81
|
+
prerelease: false
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :development
|
92
|
+
version_requirements: *id005
|
93
|
+
description: Wrapper around Impala and Hive gems
|
94
|
+
email:
|
95
|
+
- john.meagher@gmail.com
|
96
|
+
- trent.albright@gmail.com
|
97
|
+
executables: []
|
98
|
+
|
99
|
+
extensions: []
|
100
|
+
|
101
|
+
extra_rdoc_files: []
|
102
|
+
|
103
|
+
files:
|
104
|
+
- .gitignore
|
105
|
+
- Gemfile
|
106
|
+
- LICENSE.txt
|
107
|
+
- README.md
|
108
|
+
- Rakefile
|
109
|
+
- impaler.gemspec
|
110
|
+
- lib/impaler.rb
|
111
|
+
- lib/impaler/manager.rb
|
112
|
+
- lib/impaler/version.rb
|
113
|
+
- spec/impaler_connected_spec.rb
|
114
|
+
- spec/impaler_spec.rb
|
115
|
+
has_rdoc: true
|
116
|
+
homepage: https://github.com/livingsocial/impaler
|
117
|
+
licenses:
|
118
|
+
- MIT
|
119
|
+
post_install_message:
|
120
|
+
rdoc_options: []
|
121
|
+
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
none: false
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
hash: 3
|
130
|
+
segments:
|
131
|
+
- 0
|
132
|
+
version: "0"
|
133
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
|
+
none: false
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
hash: 3
|
139
|
+
segments:
|
140
|
+
- 0
|
141
|
+
version: "0"
|
142
|
+
requirements: []
|
143
|
+
|
144
|
+
rubyforge_project:
|
145
|
+
rubygems_version: 1.6.2
|
146
|
+
signing_key:
|
147
|
+
specification_version: 3
|
148
|
+
summary: Run in Impala when possible and fall back to Hive when needed
|
149
|
+
test_files:
|
150
|
+
- spec/impaler_connected_spec.rb
|
151
|
+
- spec/impaler_spec.rb
|