impaler 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +36 -0
- data/Rakefile +1 -0
- data/impaler.gemspec +27 -0
- data/lib/impaler.rb +42 -0
- data/lib/impaler/manager.rb +164 -0
- data/lib/impaler/version.rb +3 -0
- data/spec/impaler_connected_spec.rb +211 -0
- data/spec/impaler_spec.rb +91 -0
- metadata +151 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 LivingSocial Inc
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Impaler
|
2
|
+
|
3
|
+
Impaler combines the best of Impala and Hive. Queries are run on Impala and if it fails there it will fallback to running the query in Hive.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'impaler'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install impaler
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Basic Usage
|
22
|
+
|
23
|
+
require 'impaler'
|
24
|
+
c = Impaler.connect(['impala_server:21000'], ['hivethrift_server:10000'])
|
25
|
+
c.query("select count(*) from my_table") # This will run in Impala
|
26
|
+
c.query("select name, collect_set(foo) from my_table") # This will run in Hive (after a quick error on Impala)
|
27
|
+
c.query("select count(*) from my_table", Impaler::HIVE_ONLY) # This is forced to run on Hive
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
|
31
|
+
1. Fork it
|
32
|
+
1. Create your feature branch (`git checkout -b my-new-feature`)
|
33
|
+
1. Test your changes in both connected and unconnected modes (`rspec` and `IMPALA_SERVER=server:21000 HIVETHRIFT_SERVER=server:10000 TEST_TABLE=my_test_table TEST_TABLE_COLUMN=some_test_column rspec`
|
34
|
+
1. Commit your changes (`git commit -am 'Add some feature'`)
|
35
|
+
1. Push to the branch (`git push origin my-new-feature`)
|
36
|
+
1. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/impaler.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'impaler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "impaler"
|
8
|
+
spec.version = Impaler::VERSION
|
9
|
+
spec.authors = ["John Meagher","Trent Albright"]
|
10
|
+
spec.email = ["john.meagher@gmail.com","trent.albright@gmail.com"]
|
11
|
+
spec.description = %q{Wrapper around Impala and Hive gems}
|
12
|
+
spec.summary = %q{Run in Impala when possible and fall back to Hive when needed}
|
13
|
+
spec.homepage = "https://github.com/livingsocial/impaler"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "impala"
|
22
|
+
spec.add_dependency "rbhive"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
end
|
data/lib/impaler.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require "impaler/version"
|
2
|
+
|
3
|
+
require "impaler/manager"
|
4
|
+
|
5
|
+
module Impaler
|
6
|
+
|
7
|
+
class ImpalerError < StandardError; end
|
8
|
+
class ConnectionError < ImpalerError; end
|
9
|
+
class QueryError < ImpalerError; end
|
10
|
+
|
11
|
+
class ImpalerDefaultLogger < Logger
|
12
|
+
def initialize
|
13
|
+
super(STDOUT)
|
14
|
+
self.level = Logger::WARN
|
15
|
+
end
|
16
|
+
end
|
17
|
+
DEFAULT_LOGGER = ImpalerDefaultLogger.new()
|
18
|
+
|
19
|
+
# Connect to the servers and optionally execute a block of code
|
20
|
+
# with the servers.
|
21
|
+
# @param [String] host:port for the impala server or an array of host:port to pick from many
|
22
|
+
# @param [String] host:port for the hive thirft server (v1) or an array of host:port to pick from many
|
23
|
+
# @yieldparam [Connection] conn the open connection. Will be closed once the block
|
24
|
+
# finishes
|
25
|
+
# @return [Connection] the open connection, or, if a block is
|
26
|
+
# passed, the return value of the block
|
27
|
+
def self.connect(impala_servers, hivethrift_servers, logger=Impaler::DEFAULT_LOGGER)
|
28
|
+
manager = Manager.new(impala_servers, hivethrift_servers, logger=logger)
|
29
|
+
|
30
|
+
if block_given?
|
31
|
+
begin
|
32
|
+
ret = yield manager
|
33
|
+
ensure
|
34
|
+
manager.close
|
35
|
+
end
|
36
|
+
else
|
37
|
+
ret = manager
|
38
|
+
end
|
39
|
+
|
40
|
+
ret
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
require "rbhive"
|
2
|
+
require "impala"
|
3
|
+
|
4
|
+
module Impaler
|
5
|
+
|
6
|
+
IMPALA_THEN_HIVE = 1
|
7
|
+
HIVE_ONLY = 2
|
8
|
+
IMPALA_ONLY = 3
|
9
|
+
|
10
|
+
class Manager
|
11
|
+
|
12
|
+
def initialize(impala_servers, hivethrift_servers, logger=Impaler::DEFAULT_LOGGER)
|
13
|
+
if impala_servers.nil? and hivethrift_servers.nil?
|
14
|
+
raise Impaler::ConnectionError.new("No impaler or hive servers were specified, at least one is required")
|
15
|
+
end
|
16
|
+
|
17
|
+
if !impala_servers.nil?
|
18
|
+
if impala_servers.respond_to?(:choice)
|
19
|
+
@impala_servers=impala_servers
|
20
|
+
else
|
21
|
+
@impala_servers=[impala_servers]
|
22
|
+
end
|
23
|
+
|
24
|
+
impala_server = @impala_servers.choice.split(":")
|
25
|
+
@impala_host = impala_server[0]
|
26
|
+
@impala_port = impala_server[1]
|
27
|
+
end
|
28
|
+
|
29
|
+
if !hivethrift_servers.nil?
|
30
|
+
if hivethrift_servers.respond_to?(:choice)
|
31
|
+
@hivethrift_servers=hivethrift_servers
|
32
|
+
else
|
33
|
+
@hivethrift_servers=[hivethrift_servers]
|
34
|
+
end
|
35
|
+
hivethrift_server = @hivethrift_servers.choice.split(":")
|
36
|
+
@hivethrift_host = hivethrift_server[0]
|
37
|
+
@hivethrift_port = hivethrift_server[1]
|
38
|
+
end
|
39
|
+
|
40
|
+
@logger = logger
|
41
|
+
open
|
42
|
+
end
|
43
|
+
|
44
|
+
def open
|
45
|
+
connected=false
|
46
|
+
if !@impala_host.nil? && !@impala_port.nil?
|
47
|
+
@logger.debug "Impala connection #{@impala_host}:#{@impala_port}"
|
48
|
+
@impala_connection = Impala.connect(@impala_host, @impala_port)
|
49
|
+
@impala_connection.open
|
50
|
+
@impala_connection.refresh
|
51
|
+
connected=true
|
52
|
+
else
|
53
|
+
@impala_connection = nil
|
54
|
+
end
|
55
|
+
|
56
|
+
if !@hivethrift_host.nil? && !@hivethrift_port.nil?
|
57
|
+
@logger.debug "Hivethrift connection #{@hivethrift_host}:#{@hivethrift_port}"
|
58
|
+
@hivethrift_connection = RBHive::Connection.new(@hivethrift_host, @hivethrift_port)
|
59
|
+
@hivethrift_connection.open
|
60
|
+
connected=true
|
61
|
+
else
|
62
|
+
@hivethrift_connection = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
if !connected
|
66
|
+
raise Impaler::ConnectionError.new("All connections failed")
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def close
|
71
|
+
if !@impala_connection.nil?
|
72
|
+
@impala_connection.close
|
73
|
+
@impala_connection = nil
|
74
|
+
end
|
75
|
+
|
76
|
+
if !@hivethrift_connection.nil?
|
77
|
+
@hivethrift_connection.close
|
78
|
+
@hivethrift_connection = nil
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# ###########################################################################
|
84
|
+
# General use methods
|
85
|
+
|
86
|
+
def query(sql, query_mode = Impaler::IMPALA_THEN_HIVE)
|
87
|
+
ret = nil
|
88
|
+
error = nil
|
89
|
+
success = false
|
90
|
+
unless query_mode == Impaler::HIVE_ONLY or @impala_connection.nil?
|
91
|
+
begin
|
92
|
+
@logger.debug "Trying query in impala"
|
93
|
+
ret = @impala_connection.query(sql)
|
94
|
+
@logger.debug "Successful query in impala"
|
95
|
+
success = true
|
96
|
+
rescue StandardError => e
|
97
|
+
error = e
|
98
|
+
@logger.warn "Impala error: #{e}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
unless @hivethrift_connection.nil? || success || query_mode == Impaler::IMPALA_ONLY
|
103
|
+
begin
|
104
|
+
@logger.debug "Trying query in hive"
|
105
|
+
ret = @hivethrift_connection.fetch(sql)
|
106
|
+
@logger.debug "Successful query in hive"
|
107
|
+
success = true
|
108
|
+
rescue StandardError => e
|
109
|
+
error = e
|
110
|
+
@logger.warn "Hive error: #{e}"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
if !success && !error.nil?
|
115
|
+
raise error
|
116
|
+
elsif !success
|
117
|
+
raise Impaler::QueryError.new("Query did not run due to no connections being available")
|
118
|
+
end
|
119
|
+
return ret
|
120
|
+
end
|
121
|
+
|
122
|
+
def set(key, value)
|
123
|
+
# Only run on hive since that's the only one that supports set for now
|
124
|
+
if !@hivethrift_connection.nil?
|
125
|
+
@hivethrift_connection.set(key, value)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
# ###########################################################################
|
131
|
+
# Helper query methods
|
132
|
+
|
133
|
+
def row_count(tablename)
|
134
|
+
query("SELECT COUNT(1) c FROM #{tablename}").first[:c]
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
# ###########################################################################
|
142
|
+
# Metadata methods
|
143
|
+
|
144
|
+
def columns(tablename)
|
145
|
+
desc = {}
|
146
|
+
(query "describe #{tablename}").each { |col|
|
147
|
+
cname=col[:name] || col[:col_name]
|
148
|
+
ctype=col[:type] || col[:data_type]
|
149
|
+
desc[cname.intern] = ctype.intern
|
150
|
+
}
|
151
|
+
desc
|
152
|
+
end
|
153
|
+
|
154
|
+
def tables(pattern=nil)
|
155
|
+
q = "SHOW TABLES" + ((pattern.nil?) ? "" : " '#{pattern}'")
|
156
|
+
query(q).collect { |table|
|
157
|
+
table[:name] || table[:tab_name]
|
158
|
+
}
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'impaler'
|
2
|
+
|
3
|
+
# These tests cover the Impaler interaction with a real impala and hive server.
|
4
|
+
# To run these define environment variables for IMPALA_SERVER and HIVETHRIFT_SERVER
|
5
|
+
# in the format "server:port".
|
6
|
+
|
7
|
+
IMPALA_SERVER=ENV['IMPALA_SERVER']
|
8
|
+
HIVETHRIFT_SERVER=ENV['HIVETHRIFT_SERVER']
|
9
|
+
has_servers=!IMPALA_SERVER.nil? && !HIVETHRIFT_SERVER.nil?
|
10
|
+
IMPALA_SERVERS=[IMPALA_SERVER]
|
11
|
+
HIVETHRIFT_SERVERS=[HIVETHRIFT_SERVER]
|
12
|
+
|
13
|
+
TEST_TABLE=ENV['TEST_TABLE']
|
14
|
+
TEST_TABLE_COLUMN=ENV['TEST_TABLE_COLUMN']
|
15
|
+
has_tables=!TEST_TABLE.nil? && !TEST_TABLE_COLUMN.nil?
|
16
|
+
|
17
|
+
run_tests=has_tables && has_servers
|
18
|
+
|
19
|
+
SKIP_SLOW=!ENV['SKIP_SLOW'].nil?
|
20
|
+
|
21
|
+
def connect
|
22
|
+
Impaler.connect(IMPALA_SERVERS, HIVETHRIFT_SERVERS)
|
23
|
+
end
|
24
|
+
|
25
|
+
def connect_impala
|
26
|
+
Impaler.connect(IMPALA_SERVERS, nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
def connect_hivethrift
|
30
|
+
Impaler.connect(nil, HIVETHRIFT_SERVERS)
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
describe Impaler, :if => run_tests do
|
35
|
+
it("Connected skip", :if=>!run_tests) {
|
36
|
+
puts "Skipping connected tests for Impaler, set the environment variables IMPALA_SERVER, HIVETHRIFT_SERVER, TEST_TABLE, and TEST_TABLE_COLUMN to enable these"
|
37
|
+
puts "IMPALA_SERVER and HIVETHRIFT_SERVER are in the format 'server:port'"
|
38
|
+
puts "TEST_TABLE should be a fairly small table for quick tests and TEST_TABLE_COLUMN will be used for some simple test queries where Impala is known to not support the query"
|
39
|
+
puts "Optionally set the environment varialbe SKIP_SLOW=true to skip the hive invocations which are a bit slow"
|
40
|
+
}
|
41
|
+
|
42
|
+
|
43
|
+
describe "connect" do
|
44
|
+
it "connects without error" do
|
45
|
+
expect { Impaler::connect(IMPALA_SERVERS, HIVETHRIFT_SERVERS) }.not_to raise_error
|
46
|
+
expect { Impaler::connect(IMPALA_SERVERS, nil) }.not_to raise_error
|
47
|
+
expect { Impaler::connect(nil, HIVETHRIFT_SERVERS) }.not_to raise_error
|
48
|
+
end
|
49
|
+
|
50
|
+
it "connects with single value server entries without error" do
|
51
|
+
expect { Impaler::connect(IMPALA_SERVER, HIVETHRIFT_SERVER) }.not_to raise_error
|
52
|
+
expect { Impaler::connect(IMPALA_SERVER, nil) }.not_to raise_error
|
53
|
+
expect { Impaler::connect(nil, HIVETHRIFT_SERVER) }.not_to raise_error
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe "close" do
|
58
|
+
it "opens and closes without errors" do
|
59
|
+
expect { connect.close }.not_to raise_error
|
60
|
+
expect { connect_impala.close }.not_to raise_error
|
61
|
+
expect { connect_hivethrift.close }.not_to raise_error
|
62
|
+
end
|
63
|
+
|
64
|
+
it "fails after a close" do
|
65
|
+
expect {
|
66
|
+
c=connect
|
67
|
+
c.close
|
68
|
+
c.query "select count(*) from #{TEST_TABLE}"
|
69
|
+
}.to raise_error(Impaler::QueryError)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
describe "simple query" do
|
74
|
+
it "supports a count(*) query" do
|
75
|
+
c = connect
|
76
|
+
count = (c.query "select count(*) c from #{TEST_TABLE}").first[:c]
|
77
|
+
(c.query "select count(*) c from #{TEST_TABLE}").first[:c].should eq(count)
|
78
|
+
(c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_ONLY).first[:c].should eq(count)
|
79
|
+
(c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_THEN_HIVE).first[:c].should eq(count)
|
80
|
+
if !SKIP_SLOW
|
81
|
+
(c.query "select count(*) c from #{TEST_TABLE}", Impaler::HIVE_ONLY).first[:c].should eq(count)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
it "fails with garbage queries" do
|
86
|
+
c = connect
|
87
|
+
expect { c.query "select sdffdsa from lkjasdfjkhadf", Impaler::IMPALA_ONLY }.to raise_error(Impala::Protocol::Beeswax::BeeswaxException)
|
88
|
+
expect { c.query "select sdffdsa from lkjasdfjkhadf", Impaler::HIVE_ONLY }.to raise_error(HiveServerException)
|
89
|
+
expect { c.query "select sdffdsa from lkjasdfjkhadf" }.to raise_error(HiveServerException)
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "unsupported impala queries", :unless => SKIP_SLOW do
|
95
|
+
|
96
|
+
it "fails when run with impala only" do
|
97
|
+
c = connect
|
98
|
+
expect { c.query "select collect_set(#{TEST_TABLE_COLUMN}) from #{TEST_TABLE}", Impaler::IMPALA_ONLY }.to raise_error(Impala::Protocol::Beeswax::BeeswaxException)
|
99
|
+
end
|
100
|
+
|
101
|
+
it "falls back to hive if impala generates an error" do
|
102
|
+
c = connect
|
103
|
+
expect { c.query "select collect_set(#{TEST_TABLE_COLUMN}) from #{TEST_TABLE}" }.not_to raise_error
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
describe "handles down servers" do
|
109
|
+
it "handles having no impala server" do
|
110
|
+
c = connect_hivethrift
|
111
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_ONLY }.to raise_error(Impaler::QueryError)
|
112
|
+
if !SKIP_SLOW
|
113
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::HIVE_ONLY }.not_to raise_error
|
114
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_THEN_HIVE }.not_to raise_error
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
it "handles having no hive server" do
|
119
|
+
c = connect_impala
|
120
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::HIVE_ONLY }.to raise_error(Impaler::QueryError)
|
121
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_ONLY }.not_to raise_error
|
122
|
+
expect { c.query "select count(*) c from #{TEST_TABLE}", Impaler::IMPALA_THEN_HIVE }.not_to raise_error
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe "query consistency", :unless => SKIP_SLOW do
|
127
|
+
|
128
|
+
it "queries return the same regardless of connection type" do
|
129
|
+
q="select * from #{TEST_TABLE} limit 5"
|
130
|
+
c = connect
|
131
|
+
base = c.query(q)
|
132
|
+
|
133
|
+
t=connect_impala.query(q)
|
134
|
+
expect(t).to eq(base)
|
135
|
+
|
136
|
+
t=connect_hivethrift.query(q)
|
137
|
+
expect(t).to eq(base)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "columns method works" do
|
142
|
+
|
143
|
+
it "columns returns the same regardless of connection type" do
|
144
|
+
c = connect
|
145
|
+
base = c.columns(TEST_TABLE)
|
146
|
+
|
147
|
+
t=connect_impala.columns(TEST_TABLE)
|
148
|
+
expect(t).to eq(base)
|
149
|
+
|
150
|
+
t=connect_hivethrift.columns(TEST_TABLE)
|
151
|
+
expect(t).to eq(base)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
describe "row_count method works", :unless => SKIP_SLOW do
|
156
|
+
|
157
|
+
it "row_count returns the same regardless of connection type" do
|
158
|
+
c = connect
|
159
|
+
base = c.row_count(TEST_TABLE)
|
160
|
+
|
161
|
+
t=connect_impala.row_count(TEST_TABLE)
|
162
|
+
expect(t).to eq(base)
|
163
|
+
|
164
|
+
t=connect_hivethrift.row_count(TEST_TABLE)
|
165
|
+
expect(t).to eq(base)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
|
170
|
+
describe "tables method works" do
|
171
|
+
|
172
|
+
it "appears to work correctly in a basic mode" do
|
173
|
+
t=connect.tables
|
174
|
+
expect(t).to include(TEST_TABLE)
|
175
|
+
end
|
176
|
+
|
177
|
+
it "works with patterns" do
|
178
|
+
t=connect.tables("#{TEST_TABLE[0,2]}*")
|
179
|
+
expect(t).to include(TEST_TABLE)
|
180
|
+
|
181
|
+
t=connect_impala.tables("#{TEST_TABLE[0,2]}*")
|
182
|
+
expect(t).to include(TEST_TABLE)
|
183
|
+
|
184
|
+
t=connect_hivethrift.tables("#{TEST_TABLE[0,2]}*")
|
185
|
+
expect(t).to include(TEST_TABLE)
|
186
|
+
end
|
187
|
+
|
188
|
+
it "tables returns the same regardless of connection type" do
|
189
|
+
c = connect
|
190
|
+
base = c.tables(TEST_TABLE)
|
191
|
+
|
192
|
+
t=connect_impala.tables(TEST_TABLE)
|
193
|
+
expect(t).to eq(base)
|
194
|
+
|
195
|
+
t=connect_hivethrift.tables(TEST_TABLE)
|
196
|
+
expect(t).to eq(base)
|
197
|
+
end
|
198
|
+
|
199
|
+
it "tables returns the same with no args regardless of connection type" do
|
200
|
+
c = connect
|
201
|
+
base = c.tables
|
202
|
+
|
203
|
+
t=connect_impala.tables
|
204
|
+
expect(t).to eq(base)
|
205
|
+
|
206
|
+
t=connect_hivethrift.tables
|
207
|
+
expect(t).to eq(base)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'impaler'
|
2
|
+
|
3
|
+
# These tests cover the internals of the Impaler library in isolation as much as possible
|
4
|
+
|
5
|
+
describe Impaler do
|
6
|
+
before(:each) do
|
7
|
+
RBHive::Connection.any_instance.stub(:open).and_return(true)
|
8
|
+
Impala::Connection.any_instance.stub(:open).and_return(true)
|
9
|
+
Impala::Connection.any_instance.stub(:refresh).and_return(true)
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "connect" do
|
13
|
+
it "fails with no servers" do
|
14
|
+
expect { Impaler::connect(nil, nil) }.to raise_error(Impaler::ConnectionError)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "handles valid server strings" do
|
18
|
+
|
19
|
+
impala_server = "impala"
|
20
|
+
impala_port = "123"
|
21
|
+
hivethrift_server = "hivethrift"
|
22
|
+
hivethrift_port = "456"
|
23
|
+
|
24
|
+
verify = lambda{|conn, check_impala, check_hivethrift|
|
25
|
+
if check_impala
|
26
|
+
conn.instance_variable_get(:@impala_host).should eq(impala_server)
|
27
|
+
conn.instance_variable_get(:@impala_port).should eq(impala_port)
|
28
|
+
end
|
29
|
+
if check_hivethrift
|
30
|
+
conn.instance_variable_get(:@hivethrift_host).should eq(hivethrift_server)
|
31
|
+
conn.instance_variable_get(:@hivethrift_port).should eq(hivethrift_port)
|
32
|
+
end
|
33
|
+
}
|
34
|
+
|
35
|
+
#Impaler::Manager.any_instance.stub(:open).and_return(true)
|
36
|
+
impala = "#{impala_server}:#{impala_port}"
|
37
|
+
hivethrift = "#{hivethrift_server}:#{hivethrift_port}"
|
38
|
+
|
39
|
+
verify.call( Impaler.connect( impala, nil ), true, false)
|
40
|
+
verify.call( Impaler.connect( nil, hivethrift ), false, true)
|
41
|
+
verify.call( Impaler.connect( impala, hivethrift ), true, true)
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "handles errors" do
|
47
|
+
let(:conn) { Impaler.connect("impala:999","hive:999") }
|
48
|
+
let(:impala_conn) { Impaler.connect("impala:999",nil) }
|
49
|
+
let(:hive_conn) { Impaler.connect(nil,"hive:999") }
|
50
|
+
let(:q) { "select count(*) from foo" }
|
51
|
+
let(:q_return) { [{:c=>1}] }
|
52
|
+
|
53
|
+
it "handles the no error state and uses impala" do
|
54
|
+
Impala::Connection.any_instance.stub(:query).and_return(q_return)
|
55
|
+
RBHive::Connection.any_instance.stub(:fetch).and_raise(StandardError)
|
56
|
+
conn.query(q).should eq(q_return)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "handles an impala error and falls back to hive" do
|
60
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
61
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
62
|
+
conn.query(q).should eq(q_return)
|
63
|
+
end
|
64
|
+
|
65
|
+
it "failure of both throws an error" do
|
66
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
67
|
+
RBHive::Connection.any_instance.stub(:fetch).and_raise(StandardError)
|
68
|
+
expect { conn.query(q) }.to raise_error(StandardError)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "impala only throws an error if impala errs" do
|
72
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
73
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
74
|
+
expect { impala_conn.query(q) }.to raise_error(StandardError)
|
75
|
+
end
|
76
|
+
|
77
|
+
it "skipping impala works" do
|
78
|
+
Impala::Connection.any_instance.stub(:query).and_return([])
|
79
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
80
|
+
conn.query(q, Impaler::HIVE_ONLY).should eq(q_return)
|
81
|
+
end
|
82
|
+
|
83
|
+
it "skipping hive works" do
|
84
|
+
Impala::Connection.any_instance.stub(:query).and_raise(StandardError)
|
85
|
+
RBHive::Connection.any_instance.stub(:fetch).and_return(q_return)
|
86
|
+
expect { conn.query(q, Impaler::IMPALA_ONLY) }.to raise_error(StandardError)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
end
|
metadata
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: impaler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- John Meagher
|
14
|
+
- Trent Albright
|
15
|
+
autorequire:
|
16
|
+
bindir: bin
|
17
|
+
cert_chain: []
|
18
|
+
|
19
|
+
date: 2013-09-09 00:00:00 -04:00
|
20
|
+
default_executable:
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: impala
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
hash: 3
|
31
|
+
segments:
|
32
|
+
- 0
|
33
|
+
version: "0"
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rbhive
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 3
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
51
|
+
name: bundler
|
52
|
+
prerelease: false
|
53
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ~>
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
hash: 9
|
59
|
+
segments:
|
60
|
+
- 1
|
61
|
+
- 3
|
62
|
+
version: "1.3"
|
63
|
+
type: :development
|
64
|
+
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: rake
|
67
|
+
prerelease: false
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
hash: 3
|
74
|
+
segments:
|
75
|
+
- 0
|
76
|
+
version: "0"
|
77
|
+
type: :development
|
78
|
+
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
name: rspec
|
81
|
+
prerelease: false
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :development
|
92
|
+
version_requirements: *id005
|
93
|
+
description: Wrapper around Impala and Hive gems
|
94
|
+
email:
|
95
|
+
- john.meagher@gmail.com
|
96
|
+
- trent.albright@gmail.com
|
97
|
+
executables: []
|
98
|
+
|
99
|
+
extensions: []
|
100
|
+
|
101
|
+
extra_rdoc_files: []
|
102
|
+
|
103
|
+
files:
|
104
|
+
- .gitignore
|
105
|
+
- Gemfile
|
106
|
+
- LICENSE.txt
|
107
|
+
- README.md
|
108
|
+
- Rakefile
|
109
|
+
- impaler.gemspec
|
110
|
+
- lib/impaler.rb
|
111
|
+
- lib/impaler/manager.rb
|
112
|
+
- lib/impaler/version.rb
|
113
|
+
- spec/impaler_connected_spec.rb
|
114
|
+
- spec/impaler_spec.rb
|
115
|
+
has_rdoc: true
|
116
|
+
homepage: https://github.com/livingsocial/impaler
|
117
|
+
licenses:
|
118
|
+
- MIT
|
119
|
+
post_install_message:
|
120
|
+
rdoc_options: []
|
121
|
+
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
none: false
|
126
|
+
requirements:
|
127
|
+
- - ">="
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
hash: 3
|
130
|
+
segments:
|
131
|
+
- 0
|
132
|
+
version: "0"
|
133
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
|
+
none: false
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
hash: 3
|
139
|
+
segments:
|
140
|
+
- 0
|
141
|
+
version: "0"
|
142
|
+
requirements: []
|
143
|
+
|
144
|
+
rubyforge_project:
|
145
|
+
rubygems_version: 1.6.2
|
146
|
+
signing_key:
|
147
|
+
specification_version: 3
|
148
|
+
summary: Run in Impala when possible and fall back to Hive when needed
|
149
|
+
test_files:
|
150
|
+
- spec/impaler_connected_spec.rb
|
151
|
+
- spec/impaler_spec.rb
|