impala 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +6 -6
- data/impala.gemspec +1 -2
- data/lib/impala.rb +8 -0
- data/lib/impala/connection.rb +20 -0
- data/lib/impala/cursor.rb +29 -3
- data/lib/impala/protocol.rb +2 -0
- data/lib/impala/version.rb +1 -1
- data/test/test_impala.rb +51 -21
- data/test/test_impala_connected.rb +47 -0
- metadata +9 -23
data/Rakefile
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
2
3
|
|
3
4
|
task :default => [:test]
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
10
|
-
exit(ret)
|
6
|
+
Rake::TestTask.new do |t|
|
7
|
+
t.libs.push "lib"
|
8
|
+
t.test_files = FileList['test/test_*.rb']
|
9
|
+
t.verbose = true
|
11
10
|
end
|
12
11
|
|
13
12
|
THRIFT_FILES = FileList['./thrift/*.thrift']
|
14
13
|
GENNED_FILES = FileList['./lib/impala/protocol/*']
|
15
14
|
|
15
|
+
# eden only works on 1.8, unfortunately =(
|
16
16
|
task :gen do
|
17
17
|
THRIFT_FILES.each do |f|
|
18
18
|
sh "thrift -out lib/impala/protocol --gen rb #{f}"
|
data/impala.gemspec
CHANGED
@@ -18,8 +18,7 @@ Gem::Specification.new do |gem|
|
|
18
18
|
gem.add_development_dependency('eden')
|
19
19
|
gem.add_development_dependency('pry')
|
20
20
|
|
21
|
-
gem.add_development_dependency('
|
22
|
-
gem.add_development_dependency('shoulda')
|
21
|
+
gem.add_development_dependency('minitest', '~> 4.5')
|
23
22
|
gem.add_development_dependency('mocha')
|
24
23
|
|
25
24
|
gem.add_development_dependency('yard')
|
data/lib/impala.rb
CHANGED
@@ -21,6 +21,14 @@ module Impala
|
|
21
21
|
class CursorError < StandardError; end
|
22
22
|
class ParsingError < StandardError; end
|
23
23
|
|
24
|
+
# Connect to an Impala server. If a block is given, it will close the
|
25
|
+
# connection after calling the block with the collection.
|
26
|
+
# @param [String] host the hostname or IP address of the Impala server
|
27
|
+
# @param [int] port the port that the Impala server is listening on
|
28
|
+
# @yieldparam [Connection] conn the open connection. Will be closed once the block
|
29
|
+
# finishes
|
30
|
+
# @return [Connection] the open connection, or, if a block is
|
31
|
+
# passed, the return value of the block
|
24
32
|
def self.connect(host=DEFAULT_HOST, port=DEFAULT_PORT)
|
25
33
|
connection = Connection.new(host, port)
|
26
34
|
|
data/lib/impala/connection.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module Impala
|
2
|
+
# This object represents a connection to an Impala server. It can be used to
|
3
|
+
# perform queries on the database.
|
2
4
|
class Connection
|
3
5
|
SLEEP_INTERVAL = 0.1
|
4
6
|
|
7
|
+
# Don't instantiate Connections directly; instead, use {Impala.connect}.
|
5
8
|
def initialize(host, port)
|
6
9
|
@host = host
|
7
10
|
@port = port
|
@@ -9,6 +12,11 @@ module Impala
|
|
9
12
|
open
|
10
13
|
end
|
11
14
|
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class} #{@host}:#{@port}#{open? ? '' : ' (DISCONNECTED)'}>"
|
17
|
+
end
|
18
|
+
|
19
|
+
# Open the connection if it's currently closed.
|
12
20
|
def open
|
13
21
|
return if @connected
|
14
22
|
|
@@ -22,19 +30,31 @@ module Impala
|
|
22
30
|
@connected = true
|
23
31
|
end
|
24
32
|
|
33
|
+
# Close this connection. It can still be reopened with {#open}.
|
25
34
|
def close
|
35
|
+
return unless @connected
|
36
|
+
|
26
37
|
@transport.close
|
27
38
|
@connected = false
|
28
39
|
end
|
29
40
|
|
41
|
+
# Returns true if the connection is currently open.
|
30
42
|
def open?
|
31
43
|
@connected
|
32
44
|
end
|
33
45
|
|
46
|
+
# Perform a query and return all the results. This will
|
47
|
+
# load the entire result set into memory, so if you're dealing with lots
|
48
|
+
# of rows, {#execute} may work better.
|
49
|
+
# @param [String] query the query you want to run
|
50
|
+
# @return [Array<Hash>] an array of hashes, one for each row.
|
34
51
|
def query(raw_query)
|
35
52
|
execute(raw_query).fetch_all
|
36
53
|
end
|
37
54
|
|
55
|
+
# Perform a query and return a cursor for iterating over the results.
|
56
|
+
# @param [String] query the query you want to run
|
57
|
+
# @return [Cursor] a cursor for the result rows
|
38
58
|
def execute(raw_query)
|
39
59
|
raise ConnectionError.new("Connection closed") unless open?
|
40
60
|
|
data/lib/impala/cursor.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
module Impala
|
2
|
+
# Cursors are used to iterate over result sets without loading them all
|
3
|
+
# into memory at once. This can be useful if you're dealing with lots of
|
4
|
+
# rows. It implements Enumerable, so you can use each/select/map/etc.
|
2
5
|
class Cursor
|
3
6
|
include Enumerable
|
4
7
|
|
@@ -11,7 +14,11 @@ module Impala
|
|
11
14
|
@row_buffer = []
|
12
15
|
|
13
16
|
@done = false
|
14
|
-
@
|
17
|
+
@open = true
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect
|
21
|
+
"#<#{self.class}#{open? ? '' : ' (CLOSED)'}>"
|
15
22
|
end
|
16
23
|
|
17
24
|
def each
|
@@ -20,8 +27,12 @@ module Impala
|
|
20
27
|
end
|
21
28
|
end
|
22
29
|
|
30
|
+
# Returns the next available row as a hash, or nil if there are none left.
|
31
|
+
# @return [Hash, nil] the next available row, or nil if there are none
|
32
|
+
# left
|
33
|
+
# @see #fetch_all
|
23
34
|
def fetch_row
|
24
|
-
raise CursorError.new("Cursor has expired or been closed")
|
35
|
+
raise CursorError.new("Cursor has expired or been closed") unless @open
|
25
36
|
|
26
37
|
if @row_buffer.empty?
|
27
38
|
if @done
|
@@ -34,15 +45,30 @@ module Impala
|
|
34
45
|
@row_buffer.shift
|
35
46
|
end
|
36
47
|
|
48
|
+
# Returns all the remaining rows in the result set.
|
49
|
+
# @return [Array<Hash>] the remaining rows in the result set
|
50
|
+
# @see #fetch_one
|
37
51
|
def fetch_all
|
38
52
|
self.to_a
|
39
53
|
end
|
40
54
|
|
55
|
+
# Close the cursor on the remote server. Once a cursor is closed, you
|
56
|
+
# can no longer fetch any rows from it.
|
41
57
|
def close
|
42
|
-
@
|
58
|
+
@open = false
|
43
59
|
@service.close(@handle)
|
44
60
|
end
|
45
61
|
|
62
|
+
# Returns true if the cursor is still open.
|
63
|
+
def open?
|
64
|
+
@open
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns true if there are any more rows to fetch.
|
68
|
+
def has_more?
|
69
|
+
!@done || !@row_buffer.empty?
|
70
|
+
end
|
71
|
+
|
46
72
|
private
|
47
73
|
|
48
74
|
def fetch_more
|
data/lib/impala/protocol.rb
CHANGED
data/lib/impala/version.rb
CHANGED
data/test/test_impala.rb
CHANGED
@@ -1,34 +1,64 @@
|
|
1
1
|
require 'impala'
|
2
|
-
require '
|
3
|
-
require 'shoulda'
|
2
|
+
require 'minitest/autorun'
|
4
3
|
require 'mocha'
|
5
4
|
|
6
|
-
|
7
|
-
|
5
|
+
describe 'Impala.connect' do
|
6
|
+
before do
|
7
|
+
Thrift::Socket.expects(:new).with('host', 12345)
|
8
|
+
Thrift::BufferedTransport.expects(:new).once.returns(stub(:open => nil))
|
9
|
+
Thrift::BinaryProtocol.expects(:new).once
|
10
|
+
Impala::Protocol::ImpalaService::Client.expects(:new).once
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should return an open connection when passed a block' do
|
14
|
+
connection = Impala.connect('host', 12345)
|
15
|
+
assert_equal(Impala::Connection, connection.class)
|
16
|
+
assert_equal(true, connection.open?)
|
17
|
+
end
|
8
18
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
19
|
+
it 'should return the results of the query when given a block with a query, and then close tho connection' do
|
20
|
+
Impala::Connection.any_instance.stubs(:query => 'result')
|
21
|
+
Impala::Connection.any_instance.expects(:close).once
|
22
|
+
|
23
|
+
ret = Impala.connect('host', 12345) do |conn|
|
24
|
+
conn.query('query')
|
15
25
|
end
|
16
26
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
27
|
+
assert_equal('result', ret)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe Impala::Connection do
|
32
|
+
describe '#sanitize_query' do
|
33
|
+
before do
|
34
|
+
Impala::Connection.any_instance.stubs(:open)
|
35
|
+
@connection = Impala::Connection.new('test', 1234)
|
21
36
|
end
|
22
37
|
|
23
|
-
|
24
|
-
|
25
|
-
|
38
|
+
it 'should downcase the command but nothing else' do
|
39
|
+
query = 'SELECT blah FROM Blah'
|
40
|
+
assert_equal('select blah FROM Blah', @connection.send(:sanitize_query, query))
|
41
|
+
end
|
26
42
|
|
27
|
-
|
28
|
-
|
29
|
-
|
43
|
+
it 'should reject empty or invalid queries' do
|
44
|
+
assert_raises(Impala::InvalidQueryError) { @connection.send(:sanitize_query, '')}
|
45
|
+
assert_raises(Impala::InvalidQueryError) { @connection.send(:sanitize_query, 'HERRO herro herro')}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#wait_for_result' do
|
50
|
+
before do
|
51
|
+
Impala::Connection.any_instance.stubs(:open)
|
52
|
+
@connection = Impala::Connection.new('test', 1234)
|
53
|
+
@service = stub(:get_state => nil)
|
54
|
+
@connection.instance_variable_set('@service', @service)
|
55
|
+
end
|
30
56
|
|
31
|
-
|
57
|
+
it 'should close the handle if an exception is raised, and then re-raise' do
|
58
|
+
handle = stub()
|
59
|
+
@service.expects(:close).with(handle).once
|
60
|
+
@service.expects(:get_state).raises(StandardError)
|
61
|
+
assert_raises(StandardError) { @connection.send(:wait_for_result, handle) }
|
32
62
|
end
|
33
63
|
end
|
34
64
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'impala'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'mocha'
|
4
|
+
|
5
|
+
# these are tests that require an available Impala server. To run them,
|
6
|
+
# declare a IMPALA_SERVER env var, e.g. `IMPALA_SERVER=localhost:21000 rake`
|
7
|
+
IMPALA_SERVER = ENV['IMPALA_SERVER']
|
8
|
+
|
9
|
+
def connect
|
10
|
+
parts = IMPALA_SERVER.split(':')
|
11
|
+
if parts.length != 2 || parts.any? { |p| p.empty? }
|
12
|
+
raise "Invalid IMPALA_SERVER: #{IMPALA_SERVER}"
|
13
|
+
end
|
14
|
+
|
15
|
+
host, port = parts
|
16
|
+
Impala.connect(host, port)
|
17
|
+
end
|
18
|
+
|
19
|
+
describe 'connected tests' do
|
20
|
+
before do
|
21
|
+
skip unless IMPALA_SERVER
|
22
|
+
@connection = connect
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'can successfully connect' do
|
26
|
+
assert_instance_of(Impala::Connection, @connection)
|
27
|
+
assert(@connection.open?, "the connection should be open")
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'can run a basic query' do
|
31
|
+
ret = @connection.query('SELECT 1 AS a')
|
32
|
+
assert_equal([{:a=>1}], ret, "the result should be a list of hashes")
|
33
|
+
end
|
34
|
+
|
35
|
+
# TODO: this test sucks because there's no way to get multiple records
|
36
|
+
# with a literal select. perhaps there should be importable test data?
|
37
|
+
it 'can get a cursor and fetch one row at a time' do
|
38
|
+
cursor = @connection.execute('SELECT 1 AS a')
|
39
|
+
assert_instance_of(Impala::Cursor, cursor, "the result should be a cursor")
|
40
|
+
|
41
|
+
row = cursor.fetch_row
|
42
|
+
assert_equal({:a=>1}, row, "the row should be a hash")
|
43
|
+
|
44
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
45
|
+
assert_nil(cursor.fetch_row, "subsequent calls to fetch_row should be nil")
|
46
|
+
end
|
47
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: impala
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: thrift
|
@@ -76,37 +76,21 @@ dependencies:
|
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
79
|
+
name: minitest
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
83
|
-
- -
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
|
-
type: :development
|
87
|
-
prerelease: false
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
|
-
requirements:
|
91
|
-
- - ! '>='
|
92
|
-
- !ruby/object:Gem::Version
|
93
|
-
version: '0'
|
94
|
-
- !ruby/object:Gem::Dependency
|
95
|
-
name: shoulda
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
|
-
requirements:
|
99
|
-
- - ! '>='
|
83
|
+
- - ~>
|
100
84
|
- !ruby/object:Gem::Version
|
101
|
-
version: '
|
85
|
+
version: '4.5'
|
102
86
|
type: :development
|
103
87
|
prerelease: false
|
104
88
|
version_requirements: !ruby/object:Gem::Requirement
|
105
89
|
none: false
|
106
90
|
requirements:
|
107
|
-
- -
|
91
|
+
- - ~>
|
108
92
|
- !ruby/object:Gem::Version
|
109
|
-
version: '
|
93
|
+
version: '4.5'
|
110
94
|
- !ruby/object:Gem::Dependency
|
111
95
|
name: mocha
|
112
96
|
requirement: !ruby/object:Gem::Requirement
|
@@ -242,6 +226,7 @@ files:
|
|
242
226
|
- lib/impala/protocol/types_types.rb
|
243
227
|
- lib/impala/version.rb
|
244
228
|
- test/test_impala.rb
|
229
|
+
- test/test_impala_connected.rb
|
245
230
|
- thrift/Data.thrift
|
246
231
|
- thrift/DataSinks.thrift
|
247
232
|
- thrift/Descriptors.thrift
|
@@ -290,4 +275,5 @@ specification_version: 3
|
|
290
275
|
summary: A ruby client for Cloudera's Impala
|
291
276
|
test_files:
|
292
277
|
- test/test_impala.rb
|
278
|
+
- test/test_impala_connected.rb
|
293
279
|
has_rdoc:
|