impala 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +6 -6
- data/impala.gemspec +1 -2
- data/lib/impala.rb +8 -0
- data/lib/impala/connection.rb +20 -0
- data/lib/impala/cursor.rb +29 -3
- data/lib/impala/protocol.rb +2 -0
- data/lib/impala/version.rb +1 -1
- data/test/test_impala.rb +51 -21
- data/test/test_impala_connected.rb +47 -0
- metadata +9 -23
data/Rakefile
CHANGED
@@ -1,18 +1,18 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
2
3
|
|
3
4
|
task :default => [:test]
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
10
|
-
exit(ret)
|
6
|
+
Rake::TestTask.new do |t|
|
7
|
+
t.libs.push "lib"
|
8
|
+
t.test_files = FileList['test/test_*.rb']
|
9
|
+
t.verbose = true
|
11
10
|
end
|
12
11
|
|
13
12
|
THRIFT_FILES = FileList['./thrift/*.thrift']
|
14
13
|
GENNED_FILES = FileList['./lib/impala/protocol/*']
|
15
14
|
|
15
|
+
# eden only works on 1.8, unfortunately =(
|
16
16
|
task :gen do
|
17
17
|
THRIFT_FILES.each do |f|
|
18
18
|
sh "thrift -out lib/impala/protocol --gen rb #{f}"
|
data/impala.gemspec
CHANGED
@@ -18,8 +18,7 @@ Gem::Specification.new do |gem|
|
|
18
18
|
gem.add_development_dependency('eden')
|
19
19
|
gem.add_development_dependency('pry')
|
20
20
|
|
21
|
-
gem.add_development_dependency('
|
22
|
-
gem.add_development_dependency('shoulda')
|
21
|
+
gem.add_development_dependency('minitest', '~> 4.5')
|
23
22
|
gem.add_development_dependency('mocha')
|
24
23
|
|
25
24
|
gem.add_development_dependency('yard')
|
data/lib/impala.rb
CHANGED
@@ -21,6 +21,14 @@ module Impala
|
|
21
21
|
class CursorError < StandardError; end
|
22
22
|
class ParsingError < StandardError; end
|
23
23
|
|
24
|
+
# Connect to an Impala server. If a block is given, it will close the
|
25
|
+
# connection after calling the block with the collection.
|
26
|
+
# @param [String] host the hostname or IP address of the Impala server
|
27
|
+
# @param [int] port the port that the Impala server is listening on
|
28
|
+
# @yieldparam [Connection] conn the open connection. Will be closed once the block
|
29
|
+
# finishes
|
30
|
+
# @return [Connection] the open connection, or, if a block is
|
31
|
+
# passed, the return value of the block
|
24
32
|
def self.connect(host=DEFAULT_HOST, port=DEFAULT_PORT)
|
25
33
|
connection = Connection.new(host, port)
|
26
34
|
|
data/lib/impala/connection.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module Impala
|
2
|
+
# This object represents a connection to an Impala server. It can be used to
|
3
|
+
# perform queries on the database.
|
2
4
|
class Connection
|
3
5
|
SLEEP_INTERVAL = 0.1
|
4
6
|
|
7
|
+
# Don't instantiate Connections directly; instead, use {Impala.connect}.
|
5
8
|
def initialize(host, port)
|
6
9
|
@host = host
|
7
10
|
@port = port
|
@@ -9,6 +12,11 @@ module Impala
|
|
9
12
|
open
|
10
13
|
end
|
11
14
|
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class} #{@host}:#{@port}#{open? ? '' : ' (DISCONNECTED)'}>"
|
17
|
+
end
|
18
|
+
|
19
|
+
# Open the connection if it's currently closed.
|
12
20
|
def open
|
13
21
|
return if @connected
|
14
22
|
|
@@ -22,19 +30,31 @@ module Impala
|
|
22
30
|
@connected = true
|
23
31
|
end
|
24
32
|
|
33
|
+
# Close this connection. It can still be reopened with {#open}.
|
25
34
|
def close
|
35
|
+
return unless @connected
|
36
|
+
|
26
37
|
@transport.close
|
27
38
|
@connected = false
|
28
39
|
end
|
29
40
|
|
41
|
+
# Returns true if the connection is currently open.
|
30
42
|
def open?
|
31
43
|
@connected
|
32
44
|
end
|
33
45
|
|
46
|
+
# Perform a query and return all the results. This will
|
47
|
+
# load the entire result set into memory, so if you're dealing with lots
|
48
|
+
# of rows, {#execute} may work better.
|
49
|
+
# @param [String] query the query you want to run
|
50
|
+
# @return [Array<Hash>] an array of hashes, one for each row.
|
34
51
|
def query(raw_query)
|
35
52
|
execute(raw_query).fetch_all
|
36
53
|
end
|
37
54
|
|
55
|
+
# Perform a query and return a cursor for iterating over the results.
|
56
|
+
# @param [String] query the query you want to run
|
57
|
+
# @return [Cursor] a cursor for the result rows
|
38
58
|
def execute(raw_query)
|
39
59
|
raise ConnectionError.new("Connection closed") unless open?
|
40
60
|
|
data/lib/impala/cursor.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
module Impala
|
2
|
+
# Cursors are used to iterate over result sets without loading them all
|
3
|
+
# into memory at once. This can be useful if you're dealing with lots of
|
4
|
+
# rows. It implements Enumerable, so you can use each/select/map/etc.
|
2
5
|
class Cursor
|
3
6
|
include Enumerable
|
4
7
|
|
@@ -11,7 +14,11 @@ module Impala
|
|
11
14
|
@row_buffer = []
|
12
15
|
|
13
16
|
@done = false
|
14
|
-
@
|
17
|
+
@open = true
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect
|
21
|
+
"#<#{self.class}#{open? ? '' : ' (CLOSED)'}>"
|
15
22
|
end
|
16
23
|
|
17
24
|
def each
|
@@ -20,8 +27,12 @@ module Impala
|
|
20
27
|
end
|
21
28
|
end
|
22
29
|
|
30
|
+
# Returns the next available row as a hash, or nil if there are none left.
|
31
|
+
# @return [Hash, nil] the next available row, or nil if there are none
|
32
|
+
# left
|
33
|
+
# @see #fetch_all
|
23
34
|
def fetch_row
|
24
|
-
raise CursorError.new("Cursor has expired or been closed")
|
35
|
+
raise CursorError.new("Cursor has expired or been closed") unless @open
|
25
36
|
|
26
37
|
if @row_buffer.empty?
|
27
38
|
if @done
|
@@ -34,15 +45,30 @@ module Impala
|
|
34
45
|
@row_buffer.shift
|
35
46
|
end
|
36
47
|
|
48
|
+
# Returns all the remaining rows in the result set.
|
49
|
+
# @return [Array<Hash>] the remaining rows in the result set
|
50
|
+
# @see #fetch_one
|
37
51
|
def fetch_all
|
38
52
|
self.to_a
|
39
53
|
end
|
40
54
|
|
55
|
+
# Close the cursor on the remote server. Once a cursor is closed, you
|
56
|
+
# can no longer fetch any rows from it.
|
41
57
|
def close
|
42
|
-
@
|
58
|
+
@open = false
|
43
59
|
@service.close(@handle)
|
44
60
|
end
|
45
61
|
|
62
|
+
# Returns true if the cursor is still open.
|
63
|
+
def open?
|
64
|
+
@open
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns true if there are any more rows to fetch.
|
68
|
+
def has_more?
|
69
|
+
!@done || !@row_buffer.empty?
|
70
|
+
end
|
71
|
+
|
46
72
|
private
|
47
73
|
|
48
74
|
def fetch_more
|
data/lib/impala/protocol.rb
CHANGED
data/lib/impala/version.rb
CHANGED
data/test/test_impala.rb
CHANGED
@@ -1,34 +1,64 @@
|
|
1
1
|
require 'impala'
|
2
|
-
require '
|
3
|
-
require 'shoulda'
|
2
|
+
require 'minitest/autorun'
|
4
3
|
require 'mocha'
|
5
4
|
|
6
|
-
|
7
|
-
|
5
|
+
describe 'Impala.connect' do
|
6
|
+
before do
|
7
|
+
Thrift::Socket.expects(:new).with('host', 12345)
|
8
|
+
Thrift::BufferedTransport.expects(:new).once.returns(stub(:open => nil))
|
9
|
+
Thrift::BinaryProtocol.expects(:new).once
|
10
|
+
Impala::Protocol::ImpalaService::Client.expects(:new).once
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should return an open connection when passed a block' do
|
14
|
+
connection = Impala.connect('host', 12345)
|
15
|
+
assert_equal(Impala::Connection, connection.class)
|
16
|
+
assert_equal(true, connection.open?)
|
17
|
+
end
|
8
18
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
19
|
+
it 'should return the results of the query when given a block with a query, and then close tho connection' do
|
20
|
+
Impala::Connection.any_instance.stubs(:query => 'result')
|
21
|
+
Impala::Connection.any_instance.expects(:close).once
|
22
|
+
|
23
|
+
ret = Impala.connect('host', 12345) do |conn|
|
24
|
+
conn.query('query')
|
15
25
|
end
|
16
26
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
27
|
+
assert_equal('result', ret)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe Impala::Connection do
|
32
|
+
describe '#sanitize_query' do
|
33
|
+
before do
|
34
|
+
Impala::Connection.any_instance.stubs(:open)
|
35
|
+
@connection = Impala::Connection.new('test', 1234)
|
21
36
|
end
|
22
37
|
|
23
|
-
|
24
|
-
|
25
|
-
|
38
|
+
it 'should downcase the command but nothing else' do
|
39
|
+
query = 'SELECT blah FROM Blah'
|
40
|
+
assert_equal('select blah FROM Blah', @connection.send(:sanitize_query, query))
|
41
|
+
end
|
26
42
|
|
27
|
-
|
28
|
-
|
29
|
-
|
43
|
+
it 'should reject empty or invalid queries' do
|
44
|
+
assert_raises(Impala::InvalidQueryError) { @connection.send(:sanitize_query, '')}
|
45
|
+
assert_raises(Impala::InvalidQueryError) { @connection.send(:sanitize_query, 'HERRO herro herro')}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#wait_for_result' do
|
50
|
+
before do
|
51
|
+
Impala::Connection.any_instance.stubs(:open)
|
52
|
+
@connection = Impala::Connection.new('test', 1234)
|
53
|
+
@service = stub(:get_state => nil)
|
54
|
+
@connection.instance_variable_set('@service', @service)
|
55
|
+
end
|
30
56
|
|
31
|
-
|
57
|
+
it 'should close the handle if an exception is raised, and then re-raise' do
|
58
|
+
handle = stub()
|
59
|
+
@service.expects(:close).with(handle).once
|
60
|
+
@service.expects(:get_state).raises(StandardError)
|
61
|
+
assert_raises(StandardError) { @connection.send(:wait_for_result, handle) }
|
32
62
|
end
|
33
63
|
end
|
34
64
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'impala'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'mocha'
|
4
|
+
|
5
|
+
# these are tests that require an available Impala server. To run them,
|
6
|
+
# declare a IMPALA_SERVER env var, e.g. `IMPALA_SERVER=localhost:21000 rake`
|
7
|
+
IMPALA_SERVER = ENV['IMPALA_SERVER']
|
8
|
+
|
9
|
+
def connect
|
10
|
+
parts = IMPALA_SERVER.split(':')
|
11
|
+
if parts.length != 2 || parts.any? { |p| p.empty? }
|
12
|
+
raise "Invalid IMPALA_SERVER: #{IMPALA_SERVER}"
|
13
|
+
end
|
14
|
+
|
15
|
+
host, port = parts
|
16
|
+
Impala.connect(host, port)
|
17
|
+
end
|
18
|
+
|
19
|
+
describe 'connected tests' do
|
20
|
+
before do
|
21
|
+
skip unless IMPALA_SERVER
|
22
|
+
@connection = connect
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'can successfully connect' do
|
26
|
+
assert_instance_of(Impala::Connection, @connection)
|
27
|
+
assert(@connection.open?, "the connection should be open")
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'can run a basic query' do
|
31
|
+
ret = @connection.query('SELECT 1 AS a')
|
32
|
+
assert_equal([{:a=>1}], ret, "the result should be a list of hashes")
|
33
|
+
end
|
34
|
+
|
35
|
+
# TODO: this test sucks because there's no way to get multiple records
|
36
|
+
# with a literal select. perhaps there should be importable test data?
|
37
|
+
it 'can get a cursor and fetch one row at a time' do
|
38
|
+
cursor = @connection.execute('SELECT 1 AS a')
|
39
|
+
assert_instance_of(Impala::Cursor, cursor, "the result should be a cursor")
|
40
|
+
|
41
|
+
row = cursor.fetch_row
|
42
|
+
assert_equal({:a=>1}, row, "the row should be a hash")
|
43
|
+
|
44
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
45
|
+
assert_nil(cursor.fetch_row, "subsequent calls to fetch_row should be nil")
|
46
|
+
end
|
47
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: impala
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: thrift
|
@@ -76,37 +76,21 @@ dependencies:
|
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
79
|
+
name: minitest
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
83
|
-
- -
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: '0'
|
86
|
-
type: :development
|
87
|
-
prerelease: false
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
|
-
requirements:
|
91
|
-
- - ! '>='
|
92
|
-
- !ruby/object:Gem::Version
|
93
|
-
version: '0'
|
94
|
-
- !ruby/object:Gem::Dependency
|
95
|
-
name: shoulda
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
|
-
requirements:
|
99
|
-
- - ! '>='
|
83
|
+
- - ~>
|
100
84
|
- !ruby/object:Gem::Version
|
101
|
-
version: '
|
85
|
+
version: '4.5'
|
102
86
|
type: :development
|
103
87
|
prerelease: false
|
104
88
|
version_requirements: !ruby/object:Gem::Requirement
|
105
89
|
none: false
|
106
90
|
requirements:
|
107
|
-
- -
|
91
|
+
- - ~>
|
108
92
|
- !ruby/object:Gem::Version
|
109
|
-
version: '
|
93
|
+
version: '4.5'
|
110
94
|
- !ruby/object:Gem::Dependency
|
111
95
|
name: mocha
|
112
96
|
requirement: !ruby/object:Gem::Requirement
|
@@ -242,6 +226,7 @@ files:
|
|
242
226
|
- lib/impala/protocol/types_types.rb
|
243
227
|
- lib/impala/version.rb
|
244
228
|
- test/test_impala.rb
|
229
|
+
- test/test_impala_connected.rb
|
245
230
|
- thrift/Data.thrift
|
246
231
|
- thrift/DataSinks.thrift
|
247
232
|
- thrift/Descriptors.thrift
|
@@ -290,4 +275,5 @@ specification_version: 3
|
|
290
275
|
summary: A ruby client for Cloudera's Impala
|
291
276
|
test_files:
|
292
277
|
- test/test_impala.rb
|
278
|
+
- test/test_impala_connected.rb
|
293
279
|
has_rdoc:
|