impala 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +1 -1
- data/impala.gemspec +2 -0
- data/lib/impala.rb +1 -1
- data/lib/impala/connection.rb +25 -21
- data/lib/impala/version.rb +1 -1
- data/test/test_impala.rb +42 -3
- data/test/test_impala_connected.rb +118 -87
- metadata +34 -2
data/README.md
CHANGED
data/impala.gemspec
CHANGED
@@ -13,6 +13,8 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.homepage = "https://github.com/colinmarc/impala-ruby"
|
14
14
|
|
15
15
|
gem.add_dependency('thrift', '~> 0.9.1')
|
16
|
+
gem.add_dependency('rack')
|
17
|
+
gem.add_dependency('thin')
|
16
18
|
|
17
19
|
gem.add_development_dependency('rake')
|
18
20
|
gem.add_development_dependency('pry')
|
data/lib/impala.rb
CHANGED
@@ -14,7 +14,7 @@ require 'impala/cursor'
|
|
14
14
|
require 'impala/connection'
|
15
15
|
|
16
16
|
module Impala
|
17
|
-
KNOWN_COMMANDS = ['select', 'insert', 'show', 'describe', 'use', 'explain', 'create', 'drop']
|
17
|
+
KNOWN_COMMANDS = ['select', 'insert', 'show', 'describe', 'use', 'explain', 'create', 'drop', 'invalidate', 'with']
|
18
18
|
DEFAULT_HOST = 'localhost'
|
19
19
|
DEFAULT_PORT = 21000
|
20
20
|
class InvalidQueryError < StandardError; end
|
data/lib/impala/connection.rb
CHANGED
@@ -2,7 +2,7 @@ module Impala
|
|
2
2
|
# This object represents a connection to an Impala server. It can be used to
|
3
3
|
# perform queries on the database.
|
4
4
|
class Connection
|
5
|
-
|
5
|
+
LOG_CONTEXT_ID = "impala-ruby"
|
6
6
|
|
7
7
|
# Don't instantiate Connections directly; instead, use {Impala.connect}.
|
8
8
|
def initialize(host, port)
|
@@ -43,7 +43,7 @@ module Impala
|
|
43
43
|
@connected
|
44
44
|
end
|
45
45
|
|
46
|
-
# Refresh the metadata store
|
46
|
+
# Refresh the metadata store.
|
47
47
|
def refresh
|
48
48
|
raise ConnectionError.new("Connection closed") unless open?
|
49
49
|
@service.ResetCatalog
|
@@ -53,21 +53,27 @@ module Impala
|
|
53
53
|
# load the entire result set into memory, so if you're dealing with lots
|
54
54
|
# of rows, {#execute} may work better.
|
55
55
|
# @param [String] query the query you want to run
|
56
|
+
# @param [Hash] query_options the options to set user and configuration
|
57
|
+
# except for :user, see TImpalaQueryOptions in ImpalaService.thrift
|
58
|
+
# @option query_options [String] :user the user runs the query
|
56
59
|
# @return [Array<Hash>] an array of hashes, one for each row.
|
57
|
-
def query(raw_query)
|
58
|
-
execute(raw_query).fetch_all
|
60
|
+
def query(raw_query, query_options = {})
|
61
|
+
execute(raw_query, query_options).fetch_all
|
59
62
|
end
|
60
63
|
|
61
64
|
# Perform a query and return a cursor for iterating over the results.
|
62
65
|
# @param [String] query the query you want to run
|
66
|
+
# @param [Hash] query_options the options to set user and configuration
|
67
|
+
# except for :user, see TImpalaQueryOptions in ImpalaService.thrift
|
68
|
+
# @option query_options [String] :user the user runs the query
|
63
69
|
# @return [Cursor] a cursor for the result rows
|
64
|
-
def execute(raw_query)
|
70
|
+
def execute(raw_query, query_options = {})
|
65
71
|
raise ConnectionError.new("Connection closed") unless open?
|
66
72
|
|
67
73
|
query = sanitize_query(raw_query)
|
68
|
-
handle = send_query(query)
|
74
|
+
handle = send_query(query, query_options)
|
69
75
|
|
70
|
-
|
76
|
+
check_result(handle)
|
71
77
|
Cursor.new(handle, @service)
|
72
78
|
end
|
73
79
|
|
@@ -85,25 +91,23 @@ module Impala
|
|
85
91
|
([command] + words[1..-1]).join(' ')
|
86
92
|
end
|
87
93
|
|
88
|
-
def send_query(sanitized_query)
|
94
|
+
def send_query(sanitized_query, query_options)
|
89
95
|
query = Protocol::Beeswax::Query.new
|
90
96
|
query.query = sanitized_query
|
91
97
|
|
92
|
-
|
98
|
+
query.hadoop_user = query_options.delete(:user) if query_options[:user]
|
99
|
+
query.configuration = query_options.map do |key, value|
|
100
|
+
"#{key.upcase}=#{value}"
|
101
|
+
end
|
102
|
+
|
103
|
+
@service.executeAndWait(query, LOG_CONTEXT_ID)
|
93
104
|
end
|
94
105
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
break
|
101
|
-
elsif state == Protocol::Beeswax::QueryState::EXCEPTION
|
102
|
-
close_handle(handle)
|
103
|
-
raise ConnectionError.new("The query was aborted")
|
104
|
-
end
|
105
|
-
|
106
|
-
sleep(SLEEP_INTERVAL)
|
106
|
+
def check_result(handle)
|
107
|
+
state = @service.get_state(handle)
|
108
|
+
if state == Protocol::Beeswax::QueryState::EXCEPTION
|
109
|
+
close_handle(handle)
|
110
|
+
raise ConnectionError.new("The query was aborted")
|
107
111
|
end
|
108
112
|
rescue
|
109
113
|
close_handle(handle)
|
data/lib/impala/version.rb
CHANGED
data/test/test_impala.rb
CHANGED
@@ -46,7 +46,7 @@ describe Impala::Connection do
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
describe '#
|
49
|
+
describe '#check_result' do
|
50
50
|
before do
|
51
51
|
Impala::Connection.any_instance.stubs(:open)
|
52
52
|
@connection = Impala::Connection.new('test', 1234)
|
@@ -58,7 +58,46 @@ describe Impala::Connection do
|
|
58
58
|
handle = stub()
|
59
59
|
@service.expects(:close).with(handle).once
|
60
60
|
@service.expects(:get_state).raises(StandardError)
|
61
|
-
assert_raises(StandardError) { @connection.send(:
|
61
|
+
assert_raises(StandardError) { @connection.send(:check_result, handle) }
|
62
62
|
end
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
|
+
describe '#execute' do
|
66
|
+
before do
|
67
|
+
Impala::Connection.any_instance.stubs(:open)
|
68
|
+
Impala::Cursor.stubs(:new)
|
69
|
+
@connection = Impala::Connection.new('test', 1234)
|
70
|
+
@connection.stubs(:open? => true, :sanitize_query => 'sanitized_query', :check_result => nil)
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should call Protocol::ImpalaService::Client#executeAndWait with the sanitized query' do
|
74
|
+
query = Impala::Protocol::Beeswax::Query.new
|
75
|
+
query.query = 'sanitized_query'
|
76
|
+
query.configuration = []
|
77
|
+
|
78
|
+
@service = stub()
|
79
|
+
@service.expects(:executeAndWait).with(query, Impala::Connection::LOG_CONTEXT_ID).once
|
80
|
+
@connection.instance_variable_set('@service', @service)
|
81
|
+
|
82
|
+
@connection.execute('query')
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should call Protocol::ImpalaService::Client#executeAndWait with the hadoop_user and configuration if passed as parameter' do
|
86
|
+
query = Impala::Protocol::Beeswax::Query.new
|
87
|
+
query.query = 'sanitized_query'
|
88
|
+
query.hadoop_user = 'impala'
|
89
|
+
query.configuration = %w|NUM_SCANNER_THREADS=8 MEM_LIMIT=3221225472|
|
90
|
+
|
91
|
+
@service = stub()
|
92
|
+
@service.expects(:executeAndWait).with(query, Impala::Connection::LOG_CONTEXT_ID).once
|
93
|
+
@connection.instance_variable_set('@service', @service)
|
94
|
+
|
95
|
+
opt = {
|
96
|
+
:user => 'impala',
|
97
|
+
:num_scanner_threads => 8,
|
98
|
+
:mem_limit => 3221225472
|
99
|
+
}
|
100
|
+
@connection.execute('query', opt)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -16,140 +16,171 @@ def connect
|
|
16
16
|
Impala.connect(host, port)
|
17
17
|
end
|
18
18
|
|
19
|
-
describe '
|
19
|
+
describe 'connected tests' do
|
20
20
|
before do
|
21
21
|
skip unless IMPALA_SERVER
|
22
22
|
@connection = connect
|
23
23
|
end
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
describe 'basic tests' do
|
26
|
+
it 'can connect' do
|
27
|
+
assert_instance_of(Impala::Connection, @connection)
|
28
|
+
assert(@connection.open?, "the connection should be open")
|
29
|
+
end
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
|
31
|
+
it 'can refresh the catalog' do
|
32
|
+
@connection.refresh
|
33
|
+
end
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
35
|
+
it 'can refresh metadata' do
|
36
|
+
@connection.query('invalidate metadata')
|
37
|
+
end
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
it 'can run a basic query' do
|
40
|
+
ret = @connection.query('SELECT "foo" AS foo')
|
41
|
+
assert_equal([{:foo=>'foo'}], ret, "the result should be a list of hashes")
|
42
|
+
end
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
it 'can run a basic query with some query options as specified user' do
|
45
|
+
ret = @connection.query('SELECT "foo" AS foo',
|
46
|
+
:user => 'someoneelse',
|
47
|
+
:mem_limit => 1234567890,
|
48
|
+
:max_scan_range_length => 1024 * 1024 * 1024)
|
49
|
+
assert_equal([{:foo=>'foo'}], ret, "the result should be a list of hashes")
|
50
|
+
end
|
48
51
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
52
|
+
it 'can handle boolean values' do
|
53
|
+
ret = @connection.query('SELECT TRUE AS foo')
|
54
|
+
assert_equal([{:foo=>true}], ret, "the result should be a bool")
|
55
|
+
end
|
53
56
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
57
|
+
it 'can handle double values' do
|
58
|
+
ret = @connection.query('SELECT 1.23 AS foo')
|
59
|
+
assert_equal([{:foo=>1.23}], ret, "the result should be a float")
|
60
|
+
end
|
58
61
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
end
|
62
|
+
it 'can handle float values' do
|
63
|
+
ret = @connection.query('SELECT CAST(1.23 AS float) as foo')
|
64
|
+
assert_instance_of(Float, ret.first[:foo], "the result should be a float")
|
65
|
+
end
|
63
66
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
end
|
67
|
+
it 'can handle timestamp values' do
|
68
|
+
ret = @connection.query('SELECT NOW() AS foo')
|
69
|
+
assert_instance_of(Time, ret.first[:foo], "the result should be a timestamp")
|
70
|
+
end
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
it 'can handle null values' do
|
73
|
+
ret = @connection.query('SELECT NULL AS nothing')
|
74
|
+
assert_equal(nil, ret.first[:nothing], "the result should be nil")
|
75
|
+
end
|
73
76
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
+
it 'can successfully refresh the metadata store' do
|
78
|
+
ret = @connection.refresh
|
79
|
+
end
|
77
80
|
end
|
78
81
|
|
79
|
-
describe '
|
82
|
+
describe 'with a test database' do
|
80
83
|
before do
|
81
|
-
@
|
82
|
-
@connection.query("CREATE
|
84
|
+
@database = '_impala_ruby_test'
|
85
|
+
@connection.query("CREATE DATABASE IF NOT EXISTS #{@database}")
|
83
86
|
end
|
84
87
|
|
85
88
|
after do
|
86
|
-
@connection.query("DROP
|
89
|
+
@connection.query("DROP DATABASE IF EXISTS #{@database}") if @connection
|
87
90
|
end
|
88
91
|
|
89
|
-
it '
|
90
|
-
|
91
|
-
|
92
|
+
it 'can use the database' do
|
93
|
+
@connection.query("USE #{@database}")
|
94
|
+
@connection.query("USE default")
|
92
95
|
end
|
93
96
|
|
94
|
-
|
95
|
-
cursor = @connection.execute("SELECT * FROM #{@table}")
|
96
|
-
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
97
|
-
assert_nil(cursor.fetch_row, "calls to fetch_row should be nil")
|
98
|
-
end
|
99
|
-
|
100
|
-
describe 'with data' do
|
97
|
+
describe 'and a test table' do
|
101
98
|
before do
|
102
|
-
@
|
103
|
-
@connection.query("
|
104
|
-
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
99
|
+
@table = "#{@database}.foobar"
|
100
|
+
@connection.query("CREATE TABLE #{@table} (i INT)")
|
105
101
|
end
|
106
102
|
|
107
|
-
|
108
|
-
@connection.query("
|
103
|
+
after do
|
104
|
+
@connection.query("DROP TABLE #{@table}") if @connection
|
109
105
|
end
|
110
106
|
|
111
|
-
it '
|
107
|
+
it 'deals with empty tables correctly when using #query' do
|
112
108
|
res = @connection.query("SELECT * FROM #{@table}")
|
113
|
-
assert_equal([
|
109
|
+
assert_equal([], res, "the result set should be empty")
|
114
110
|
end
|
115
111
|
|
116
|
-
it '
|
112
|
+
it 'deals with empty tables correctly when using a cursor' do
|
117
113
|
cursor = @connection.execute("SELECT * FROM #{@table}")
|
118
|
-
|
114
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
115
|
+
assert_nil(cursor.fetch_row, "calls to fetch_row should be nil")
|
116
|
+
end
|
119
117
|
|
120
|
-
|
121
|
-
|
122
|
-
|
118
|
+
describe 'with data' do
|
119
|
+
before do
|
120
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
121
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
122
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
123
123
|
end
|
124
124
|
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
it 'can handle the keywoard "with"' do
|
126
|
+
res = @connection.query("with bar as (select * from #{@table}) select * from bar")
|
127
|
+
assert_equal([{:i => 1}, {:i => 1}, {:i => 1}], res)
|
128
|
+
end
|
128
129
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
count = @connection.query("SELECT COUNT(*) as n from #{@table}")[0][:n]
|
133
|
-
assert(count > Impala::Cursor::BUFFER_SIZE) # otherwise the test is pointless
|
130
|
+
it 'can insert into the table' do
|
131
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 2")
|
132
|
+
end
|
134
133
|
|
135
|
-
|
136
|
-
|
134
|
+
it 'can select from the table using #query' do
|
135
|
+
res = @connection.query("SELECT * FROM #{@table}")
|
136
|
+
assert_equal([{:i => 1}, {:i => 1}, {:i => 1}], res)
|
137
|
+
end
|
138
|
+
|
139
|
+
it 'can create a cursor and fetch one row at a time' do
|
140
|
+
cursor = @connection.execute("SELECT * FROM #{@table}")
|
141
|
+
assert_instance_of(Impala::Cursor, cursor, "the result should be a cursor")
|
142
|
+
|
143
|
+
3.times do
|
144
|
+
row = cursor.fetch_row
|
145
|
+
assert_equal({:i=>1}, row, "the row should be a hash with the correct result")
|
146
|
+
end
|
137
147
|
|
138
|
-
|
139
|
-
|
140
|
-
|
148
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
149
|
+
assert_nil(cursor.fetch_row, "subsequent calls to fetch_row should be nil")
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'can use a cursor to deal with lots of data' do
|
153
|
+
10.times { @connection.query("INSERT INTO #{@table} SELECT * FROM #{@table}") }
|
154
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
155
|
+
count = @connection.query("SELECT COUNT(*) as n from #{@table}")[0][:n]
|
156
|
+
assert(count > Impala::Cursor::BUFFER_SIZE) # otherwise the test is pointless
|
141
157
|
|
142
|
-
|
143
|
-
|
158
|
+
cursor = @connection.execute("SELECT * FROM #{@table}")
|
159
|
+
assert_instance_of(Impala::Cursor, cursor, "the result should be a cursor")
|
144
160
|
|
145
|
-
|
161
|
+
# fetch one to fill the buffer
|
146
162
|
row = cursor.fetch_row
|
147
163
|
assert_equal({:i=>1}, row, "the row should be a hash with the correct result")
|
164
|
+
|
165
|
+
buffer_size = cursor.instance_variable_get('@row_buffer').size
|
166
|
+
assert_equal(Impala::Cursor::BUFFER_SIZE - 1, buffer_size, "it should only buffer #{Impala::Cursor::BUFFER_SIZE} rows into memory")
|
167
|
+
|
168
|
+
(count - 1).times do
|
169
|
+
row = cursor.fetch_row
|
170
|
+
assert_equal({:i=>1}, row, "the row should be a hash with the correct result")
|
171
|
+
end
|
172
|
+
|
173
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
174
|
+
assert_nil(cursor.fetch_row, "subsequent calls to fetch_row should be nil")
|
148
175
|
end
|
149
176
|
|
150
|
-
|
151
|
-
|
177
|
+
it 'can handle interspersed NULL values' do
|
178
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT NULL")
|
179
|
+
res = @connection.query("SELECT * FROM #{@table} ORDER BY i DESC LIMIT 4")
|
180
|
+
assert_equal([{:i => 1}, {:i => 1}, {:i => 1}, {:i => nil}], res)
|
181
|
+
end
|
152
182
|
end
|
183
|
+
|
153
184
|
end
|
154
185
|
end
|
155
186
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: impala
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-11-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: thrift
|
@@ -27,6 +27,38 @@ dependencies:
|
|
27
27
|
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 0.9.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rack
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: thin
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
30
62
|
- !ruby/object:Gem::Dependency
|
31
63
|
name: rake
|
32
64
|
requirement: !ruby/object:Gem::Requirement
|