impala 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -1
- data/impala.gemspec +2 -0
- data/lib/impala.rb +1 -1
- data/lib/impala/connection.rb +25 -21
- data/lib/impala/version.rb +1 -1
- data/test/test_impala.rb +42 -3
- data/test/test_impala_connected.rb +118 -87
- metadata +34 -2
data/README.md
CHANGED
data/impala.gemspec
CHANGED
@@ -13,6 +13,8 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.homepage = "https://github.com/colinmarc/impala-ruby"
|
14
14
|
|
15
15
|
gem.add_dependency('thrift', '~> 0.9.1')
|
16
|
+
gem.add_dependency('rack')
|
17
|
+
gem.add_dependency('thin')
|
16
18
|
|
17
19
|
gem.add_development_dependency('rake')
|
18
20
|
gem.add_development_dependency('pry')
|
data/lib/impala.rb
CHANGED
@@ -14,7 +14,7 @@ require 'impala/cursor'
|
|
14
14
|
require 'impala/connection'
|
15
15
|
|
16
16
|
module Impala
|
17
|
-
KNOWN_COMMANDS = ['select', 'insert', 'show', 'describe', 'use', 'explain', 'create', 'drop']
|
17
|
+
KNOWN_COMMANDS = ['select', 'insert', 'show', 'describe', 'use', 'explain', 'create', 'drop', 'invalidate', 'with']
|
18
18
|
DEFAULT_HOST = 'localhost'
|
19
19
|
DEFAULT_PORT = 21000
|
20
20
|
class InvalidQueryError < StandardError; end
|
data/lib/impala/connection.rb
CHANGED
@@ -2,7 +2,7 @@ module Impala
|
|
2
2
|
# This object represents a connection to an Impala server. It can be used to
|
3
3
|
# perform queries on the database.
|
4
4
|
class Connection
|
5
|
-
|
5
|
+
LOG_CONTEXT_ID = "impala-ruby"
|
6
6
|
|
7
7
|
# Don't instantiate Connections directly; instead, use {Impala.connect}.
|
8
8
|
def initialize(host, port)
|
@@ -43,7 +43,7 @@ module Impala
|
|
43
43
|
@connected
|
44
44
|
end
|
45
45
|
|
46
|
-
# Refresh the metadata store
|
46
|
+
# Refresh the metadata store.
|
47
47
|
def refresh
|
48
48
|
raise ConnectionError.new("Connection closed") unless open?
|
49
49
|
@service.ResetCatalog
|
@@ -53,21 +53,27 @@ module Impala
|
|
53
53
|
# load the entire result set into memory, so if you're dealing with lots
|
54
54
|
# of rows, {#execute} may work better.
|
55
55
|
# @param [String] query the query you want to run
|
56
|
+
# @param [Hash] query_options the options to set user and configuration
|
57
|
+
# except for :user, see TImpalaQueryOptions in ImpalaService.thrift
|
58
|
+
# @option query_options [String] :user the user runs the query
|
56
59
|
# @return [Array<Hash>] an array of hashes, one for each row.
|
57
|
-
def query(raw_query)
|
58
|
-
execute(raw_query).fetch_all
|
60
|
+
def query(raw_query, query_options = {})
|
61
|
+
execute(raw_query, query_options).fetch_all
|
59
62
|
end
|
60
63
|
|
61
64
|
# Perform a query and return a cursor for iterating over the results.
|
62
65
|
# @param [String] query the query you want to run
|
66
|
+
# @param [Hash] query_options the options to set user and configuration
|
67
|
+
# except for :user, see TImpalaQueryOptions in ImpalaService.thrift
|
68
|
+
# @option query_options [String] :user the user runs the query
|
63
69
|
# @return [Cursor] a cursor for the result rows
|
64
|
-
def execute(raw_query)
|
70
|
+
def execute(raw_query, query_options = {})
|
65
71
|
raise ConnectionError.new("Connection closed") unless open?
|
66
72
|
|
67
73
|
query = sanitize_query(raw_query)
|
68
|
-
handle = send_query(query)
|
74
|
+
handle = send_query(query, query_options)
|
69
75
|
|
70
|
-
|
76
|
+
check_result(handle)
|
71
77
|
Cursor.new(handle, @service)
|
72
78
|
end
|
73
79
|
|
@@ -85,25 +91,23 @@ module Impala
|
|
85
91
|
([command] + words[1..-1]).join(' ')
|
86
92
|
end
|
87
93
|
|
88
|
-
def send_query(sanitized_query)
|
94
|
+
def send_query(sanitized_query, query_options)
|
89
95
|
query = Protocol::Beeswax::Query.new
|
90
96
|
query.query = sanitized_query
|
91
97
|
|
92
|
-
|
98
|
+
query.hadoop_user = query_options.delete(:user) if query_options[:user]
|
99
|
+
query.configuration = query_options.map do |key, value|
|
100
|
+
"#{key.upcase}=#{value}"
|
101
|
+
end
|
102
|
+
|
103
|
+
@service.executeAndWait(query, LOG_CONTEXT_ID)
|
93
104
|
end
|
94
105
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
break
|
101
|
-
elsif state == Protocol::Beeswax::QueryState::EXCEPTION
|
102
|
-
close_handle(handle)
|
103
|
-
raise ConnectionError.new("The query was aborted")
|
104
|
-
end
|
105
|
-
|
106
|
-
sleep(SLEEP_INTERVAL)
|
106
|
+
def check_result(handle)
|
107
|
+
state = @service.get_state(handle)
|
108
|
+
if state == Protocol::Beeswax::QueryState::EXCEPTION
|
109
|
+
close_handle(handle)
|
110
|
+
raise ConnectionError.new("The query was aborted")
|
107
111
|
end
|
108
112
|
rescue
|
109
113
|
close_handle(handle)
|
data/lib/impala/version.rb
CHANGED
data/test/test_impala.rb
CHANGED
@@ -46,7 +46,7 @@ describe Impala::Connection do
|
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
|
-
describe '#
|
49
|
+
describe '#check_result' do
|
50
50
|
before do
|
51
51
|
Impala::Connection.any_instance.stubs(:open)
|
52
52
|
@connection = Impala::Connection.new('test', 1234)
|
@@ -58,7 +58,46 @@ describe Impala::Connection do
|
|
58
58
|
handle = stub()
|
59
59
|
@service.expects(:close).with(handle).once
|
60
60
|
@service.expects(:get_state).raises(StandardError)
|
61
|
-
assert_raises(StandardError) { @connection.send(:
|
61
|
+
assert_raises(StandardError) { @connection.send(:check_result, handle) }
|
62
62
|
end
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
|
+
describe '#execute' do
|
66
|
+
before do
|
67
|
+
Impala::Connection.any_instance.stubs(:open)
|
68
|
+
Impala::Cursor.stubs(:new)
|
69
|
+
@connection = Impala::Connection.new('test', 1234)
|
70
|
+
@connection.stubs(:open? => true, :sanitize_query => 'sanitized_query', :check_result => nil)
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should call Protocol::ImpalaService::Client#executeAndWait with the sanitized query' do
|
74
|
+
query = Impala::Protocol::Beeswax::Query.new
|
75
|
+
query.query = 'sanitized_query'
|
76
|
+
query.configuration = []
|
77
|
+
|
78
|
+
@service = stub()
|
79
|
+
@service.expects(:executeAndWait).with(query, Impala::Connection::LOG_CONTEXT_ID).once
|
80
|
+
@connection.instance_variable_set('@service', @service)
|
81
|
+
|
82
|
+
@connection.execute('query')
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should call Protocol::ImpalaService::Client#executeAndWait with the hadoop_user and configuration if passed as parameter' do
|
86
|
+
query = Impala::Protocol::Beeswax::Query.new
|
87
|
+
query.query = 'sanitized_query'
|
88
|
+
query.hadoop_user = 'impala'
|
89
|
+
query.configuration = %w|NUM_SCANNER_THREADS=8 MEM_LIMIT=3221225472|
|
90
|
+
|
91
|
+
@service = stub()
|
92
|
+
@service.expects(:executeAndWait).with(query, Impala::Connection::LOG_CONTEXT_ID).once
|
93
|
+
@connection.instance_variable_set('@service', @service)
|
94
|
+
|
95
|
+
opt = {
|
96
|
+
:user => 'impala',
|
97
|
+
:num_scanner_threads => 8,
|
98
|
+
:mem_limit => 3221225472
|
99
|
+
}
|
100
|
+
@connection.execute('query', opt)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -16,140 +16,171 @@ def connect
|
|
16
16
|
Impala.connect(host, port)
|
17
17
|
end
|
18
18
|
|
19
|
-
describe '
|
19
|
+
describe 'connected tests' do
|
20
20
|
before do
|
21
21
|
skip unless IMPALA_SERVER
|
22
22
|
@connection = connect
|
23
23
|
end
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
describe 'basic tests' do
|
26
|
+
it 'can connect' do
|
27
|
+
assert_instance_of(Impala::Connection, @connection)
|
28
|
+
assert(@connection.open?, "the connection should be open")
|
29
|
+
end
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
|
31
|
+
it 'can refresh the catalog' do
|
32
|
+
@connection.refresh
|
33
|
+
end
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
35
|
+
it 'can refresh metadata' do
|
36
|
+
@connection.query('invalidate metadata')
|
37
|
+
end
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
it 'can run a basic query' do
|
40
|
+
ret = @connection.query('SELECT "foo" AS foo')
|
41
|
+
assert_equal([{:foo=>'foo'}], ret, "the result should be a list of hashes")
|
42
|
+
end
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
it 'can run a basic query with some query options as specified user' do
|
45
|
+
ret = @connection.query('SELECT "foo" AS foo',
|
46
|
+
:user => 'someoneelse',
|
47
|
+
:mem_limit => 1234567890,
|
48
|
+
:max_scan_range_length => 1024 * 1024 * 1024)
|
49
|
+
assert_equal([{:foo=>'foo'}], ret, "the result should be a list of hashes")
|
50
|
+
end
|
48
51
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
52
|
+
it 'can handle boolean values' do
|
53
|
+
ret = @connection.query('SELECT TRUE AS foo')
|
54
|
+
assert_equal([{:foo=>true}], ret, "the result should be a bool")
|
55
|
+
end
|
53
56
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
57
|
+
it 'can handle double values' do
|
58
|
+
ret = @connection.query('SELECT 1.23 AS foo')
|
59
|
+
assert_equal([{:foo=>1.23}], ret, "the result should be a float")
|
60
|
+
end
|
58
61
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
end
|
62
|
+
it 'can handle float values' do
|
63
|
+
ret = @connection.query('SELECT CAST(1.23 AS float) as foo')
|
64
|
+
assert_instance_of(Float, ret.first[:foo], "the result should be a float")
|
65
|
+
end
|
63
66
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
end
|
67
|
+
it 'can handle timestamp values' do
|
68
|
+
ret = @connection.query('SELECT NOW() AS foo')
|
69
|
+
assert_instance_of(Time, ret.first[:foo], "the result should be a timestamp")
|
70
|
+
end
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
it 'can handle null values' do
|
73
|
+
ret = @connection.query('SELECT NULL AS nothing')
|
74
|
+
assert_equal(nil, ret.first[:nothing], "the result should be nil")
|
75
|
+
end
|
73
76
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
+
it 'can successfully refresh the metadata store' do
|
78
|
+
ret = @connection.refresh
|
79
|
+
end
|
77
80
|
end
|
78
81
|
|
79
|
-
describe '
|
82
|
+
describe 'with a test database' do
|
80
83
|
before do
|
81
|
-
@
|
82
|
-
@connection.query("CREATE
|
84
|
+
@database = '_impala_ruby_test'
|
85
|
+
@connection.query("CREATE DATABASE IF NOT EXISTS #{@database}")
|
83
86
|
end
|
84
87
|
|
85
88
|
after do
|
86
|
-
@connection.query("DROP
|
89
|
+
@connection.query("DROP DATABASE IF EXISTS #{@database}") if @connection
|
87
90
|
end
|
88
91
|
|
89
|
-
it '
|
90
|
-
|
91
|
-
|
92
|
+
it 'can use the database' do
|
93
|
+
@connection.query("USE #{@database}")
|
94
|
+
@connection.query("USE default")
|
92
95
|
end
|
93
96
|
|
94
|
-
|
95
|
-
cursor = @connection.execute("SELECT * FROM #{@table}")
|
96
|
-
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
97
|
-
assert_nil(cursor.fetch_row, "calls to fetch_row should be nil")
|
98
|
-
end
|
99
|
-
|
100
|
-
describe 'with data' do
|
97
|
+
describe 'and a test table' do
|
101
98
|
before do
|
102
|
-
@
|
103
|
-
@connection.query("
|
104
|
-
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
99
|
+
@table = "#{@database}.foobar"
|
100
|
+
@connection.query("CREATE TABLE #{@table} (i INT)")
|
105
101
|
end
|
106
102
|
|
107
|
-
|
108
|
-
@connection.query("
|
103
|
+
after do
|
104
|
+
@connection.query("DROP TABLE #{@table}") if @connection
|
109
105
|
end
|
110
106
|
|
111
|
-
it '
|
107
|
+
it 'deals with empty tables correctly when using #query' do
|
112
108
|
res = @connection.query("SELECT * FROM #{@table}")
|
113
|
-
assert_equal([
|
109
|
+
assert_equal([], res, "the result set should be empty")
|
114
110
|
end
|
115
111
|
|
116
|
-
it '
|
112
|
+
it 'deals with empty tables correctly when using a cursor' do
|
117
113
|
cursor = @connection.execute("SELECT * FROM #{@table}")
|
118
|
-
|
114
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
115
|
+
assert_nil(cursor.fetch_row, "calls to fetch_row should be nil")
|
116
|
+
end
|
119
117
|
|
120
|
-
|
121
|
-
|
122
|
-
|
118
|
+
describe 'with data' do
|
119
|
+
before do
|
120
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
121
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
122
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
123
123
|
end
|
124
124
|
|
125
|
-
|
126
|
-
|
127
|
-
|
125
|
+
it 'can handle the keywoard "with"' do
|
126
|
+
res = @connection.query("with bar as (select * from #{@table}) select * from bar")
|
127
|
+
assert_equal([{:i => 1}, {:i => 1}, {:i => 1}], res)
|
128
|
+
end
|
128
129
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
count = @connection.query("SELECT COUNT(*) as n from #{@table}")[0][:n]
|
133
|
-
assert(count > Impala::Cursor::BUFFER_SIZE) # otherwise the test is pointless
|
130
|
+
it 'can insert into the table' do
|
131
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 2")
|
132
|
+
end
|
134
133
|
|
135
|
-
|
136
|
-
|
134
|
+
it 'can select from the table using #query' do
|
135
|
+
res = @connection.query("SELECT * FROM #{@table}")
|
136
|
+
assert_equal([{:i => 1}, {:i => 1}, {:i => 1}], res)
|
137
|
+
end
|
138
|
+
|
139
|
+
it 'can create a cursor and fetch one row at a time' do
|
140
|
+
cursor = @connection.execute("SELECT * FROM #{@table}")
|
141
|
+
assert_instance_of(Impala::Cursor, cursor, "the result should be a cursor")
|
142
|
+
|
143
|
+
3.times do
|
144
|
+
row = cursor.fetch_row
|
145
|
+
assert_equal({:i=>1}, row, "the row should be a hash with the correct result")
|
146
|
+
end
|
137
147
|
|
138
|
-
|
139
|
-
|
140
|
-
|
148
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
149
|
+
assert_nil(cursor.fetch_row, "subsequent calls to fetch_row should be nil")
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'can use a cursor to deal with lots of data' do
|
153
|
+
10.times { @connection.query("INSERT INTO #{@table} SELECT * FROM #{@table}") }
|
154
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT 1")
|
155
|
+
count = @connection.query("SELECT COUNT(*) as n from #{@table}")[0][:n]
|
156
|
+
assert(count > Impala::Cursor::BUFFER_SIZE) # otherwise the test is pointless
|
141
157
|
|
142
|
-
|
143
|
-
|
158
|
+
cursor = @connection.execute("SELECT * FROM #{@table}")
|
159
|
+
assert_instance_of(Impala::Cursor, cursor, "the result should be a cursor")
|
144
160
|
|
145
|
-
|
161
|
+
# fetch one to fill the buffer
|
146
162
|
row = cursor.fetch_row
|
147
163
|
assert_equal({:i=>1}, row, "the row should be a hash with the correct result")
|
164
|
+
|
165
|
+
buffer_size = cursor.instance_variable_get('@row_buffer').size
|
166
|
+
assert_equal(Impala::Cursor::BUFFER_SIZE - 1, buffer_size, "it should only buffer #{Impala::Cursor::BUFFER_SIZE} rows into memory")
|
167
|
+
|
168
|
+
(count - 1).times do
|
169
|
+
row = cursor.fetch_row
|
170
|
+
assert_equal({:i=>1}, row, "the row should be a hash with the correct result")
|
171
|
+
end
|
172
|
+
|
173
|
+
assert_equal(false, cursor.has_more?, "has_more? should be false")
|
174
|
+
assert_nil(cursor.fetch_row, "subsequent calls to fetch_row should be nil")
|
148
175
|
end
|
149
176
|
|
150
|
-
|
151
|
-
|
177
|
+
it 'can handle interspersed NULL values' do
|
178
|
+
@connection.query("INSERT INTO #{@table} (i) SELECT NULL")
|
179
|
+
res = @connection.query("SELECT * FROM #{@table} ORDER BY i DESC LIMIT 4")
|
180
|
+
assert_equal([{:i => 1}, {:i => 1}, {:i => 1}, {:i => nil}], res)
|
181
|
+
end
|
152
182
|
end
|
183
|
+
|
153
184
|
end
|
154
185
|
end
|
155
186
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: impala
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-11-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: thrift
|
@@ -27,6 +27,38 @@ dependencies:
|
|
27
27
|
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 0.9.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rack
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: thin
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
30
62
|
- !ruby/object:Gem::Dependency
|
31
63
|
name: rake
|
32
64
|
requirement: !ruby/object:Gem::Requirement
|