rbhive-u2i 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/CHANGELOG.md +16 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/README.md +348 -0
- data/Rakefile +1 -0
- data/lib/rbhive.rb +8 -0
- data/lib/rbhive/connection.rb +150 -0
- data/lib/rbhive/explain_result.rb +46 -0
- data/lib/rbhive/result_set.rb +37 -0
- data/lib/rbhive/schema_definition.rb +87 -0
- data/lib/rbhive/t_c_l_i_connection.rb +441 -0
- data/lib/rbhive/t_c_l_i_result_set.rb +3 -0
- data/lib/rbhive/t_c_l_i_schema_definition.rb +89 -0
- data/lib/rbhive/table_schema.rb +122 -0
- data/lib/rbhive/version.rb +3 -0
- data/lib/thrift/facebook_service.rb +700 -0
- data/lib/thrift/fb303_constants.rb +9 -0
- data/lib/thrift/fb303_types.rb +19 -0
- data/lib/thrift/hive_metastore_constants.rb +41 -0
- data/lib/thrift/hive_metastore_types.rb +630 -0
- data/lib/thrift/hive_service_constants.rb +13 -0
- data/lib/thrift/hive_service_types.rb +72 -0
- data/lib/thrift/queryplan_constants.rb +13 -0
- data/lib/thrift/queryplan_types.rb +261 -0
- data/lib/thrift/sasl_client_transport.rb +97 -0
- data/lib/thrift/serde_constants.rb +92 -0
- data/lib/thrift/serde_types.rb +7 -0
- data/lib/thrift/t_c_l_i_service.rb +1054 -0
- data/lib/thrift/t_c_l_i_service_constants.rb +72 -0
- data/lib/thrift/t_c_l_i_service_types.rb +1762 -0
- data/lib/thrift/thrift_hive.rb +508 -0
- data/lib/thrift/thrift_hive_metastore.rb +3856 -0
- data/rbhive.gemspec +27 -0
- metadata +137 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
class ExplainResult
|
2
|
+
def initialize(rows)
|
3
|
+
@rows = rows
|
4
|
+
end
|
5
|
+
|
6
|
+
def ast
|
7
|
+
by_section[:abstract_syntax_tree].first
|
8
|
+
end
|
9
|
+
|
10
|
+
def stage_count
|
11
|
+
stage_dependencies.length
|
12
|
+
end
|
13
|
+
|
14
|
+
def stage_dependencies
|
15
|
+
by_section[:stage_dependencies] || []
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_tsv
|
19
|
+
@rows.join("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
def raw
|
23
|
+
@rows
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
to_tsv
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def by_section
|
33
|
+
current_section = nil
|
34
|
+
@rows.inject({}) do |sections, row|
|
35
|
+
if row.match(/^[A-Z]/)
|
36
|
+
current_section = row.chomp(':').downcase.gsub(' ', '_').to_sym
|
37
|
+
sections[current_section] = []
|
38
|
+
elsif row.length == 0
|
39
|
+
next sections
|
40
|
+
else
|
41
|
+
sections[current_section] << row.strip
|
42
|
+
end
|
43
|
+
sections
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module RBHive
|
2
|
+
class ResultSet < Array
|
3
|
+
def initialize(rows, schema)
|
4
|
+
@schema = schema
|
5
|
+
super(rows.map {|r| @schema.coerce_row(r) })
|
6
|
+
end
|
7
|
+
|
8
|
+
def column_names
|
9
|
+
@schema.column_names
|
10
|
+
end
|
11
|
+
|
12
|
+
def column_type_map
|
13
|
+
@schema.column_type_map
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_csv(out_file=nil)
|
17
|
+
to_separated_output(",", out_file)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_tsv(out_file=nil)
|
21
|
+
to_separated_output("\t", out_file)
|
22
|
+
end
|
23
|
+
|
24
|
+
def as_arrays
|
25
|
+
@as_arrays ||= self.map{ |r| @schema.coerce_row_to_array(r) }
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def to_separated_output(sep, out_file)
|
31
|
+
rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
|
32
|
+
sv = rows.join("\n")
|
33
|
+
return sv if out_file.nil?
|
34
|
+
File.open(out_file, 'w+') { |f| f << sv }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module RBHive
|
4
|
+
class SchemaDefinition
|
5
|
+
attr_reader :schema
|
6
|
+
|
7
|
+
NAN = Float::NAN rescue 0.0/0.0
|
8
|
+
INFINITY = Float::INFINITY rescue 1.0/0.0
|
9
|
+
TYPES = {
|
10
|
+
:boolean => :to_s,
|
11
|
+
:string => :to_s,
|
12
|
+
:bigint => :to_i,
|
13
|
+
:float => :to_f,
|
14
|
+
:double => :to_f,
|
15
|
+
:int => :to_i,
|
16
|
+
:smallint => :to_i,
|
17
|
+
:tinyint => :to_i,
|
18
|
+
}
|
19
|
+
|
20
|
+
def initialize(schema, example_row)
|
21
|
+
@schema = schema
|
22
|
+
@example_row = example_row ? example_row.split("\t") : []
|
23
|
+
end
|
24
|
+
|
25
|
+
def column_names
|
26
|
+
@column_names ||= begin
|
27
|
+
schema_names = @schema.fieldSchemas.map {|c| c.name }
|
28
|
+
|
29
|
+
# In rare cases Hive can return two identical column names
|
30
|
+
# consider SELECT a.foo, b.foo...
|
31
|
+
# in this case you get two columns called foo with no disambiguation.
|
32
|
+
# as a (far from ideal) solution we detect this edge case and rename them
|
33
|
+
# a.foo => foo1, b.foo => foo2
|
34
|
+
# otherwise we will trample one of the columns during Hash mapping.
|
35
|
+
s = Hash.new(0)
|
36
|
+
schema_names.map! { |c| s[c] += 1; s[c] > 1 ? "#{c}---|---#{s[c]}" : c }
|
37
|
+
schema_names.map! { |c| s[c] > 1 ? "#{c}---|---1" : c }
|
38
|
+
schema_names.map! { |c| c.gsub('---|---', '_').to_sym }
|
39
|
+
|
40
|
+
# Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
|
41
|
+
# For now we will call them :_p1, :_p2, etc. to avoid collisions.
|
42
|
+
offset = 0
|
43
|
+
while schema_names.length < @example_row.length
|
44
|
+
schema_names.push(:"_p#{offset+=1}")
|
45
|
+
end
|
46
|
+
schema_names
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def column_type_map
|
51
|
+
@column_type_map ||= column_names.inject({}) do |hsh, c|
|
52
|
+
definition = @schema.fieldSchemas.find {|s| s.name.to_sym == c }
|
53
|
+
# If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
|
54
|
+
hsh[c] = definition ? definition.type.to_sym : :string
|
55
|
+
hsh
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def coerce_row(row)
|
60
|
+
column_names.zip(row.split("\t")).inject({}) do |hsh, (column_name, value)|
|
61
|
+
hsh[column_name] = coerce_column(column_name, value)
|
62
|
+
hsh
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def coerce_column(column_name, value)
|
67
|
+
return nil if value.nil?
|
68
|
+
type = column_type_map[column_name]
|
69
|
+
return INFINITY if (type != :string && value == "Infinity")
|
70
|
+
return NAN if (type != :string && value == "NaN")
|
71
|
+
return coerce_complex_value(value) if type.to_s =~ /^array/
|
72
|
+
conversion_method = TYPES[type]
|
73
|
+
conversion_method ? value.send(conversion_method) : value
|
74
|
+
end
|
75
|
+
|
76
|
+
def coerce_row_to_array(row)
|
77
|
+
column_names.map { |n| row[n] }
|
78
|
+
end
|
79
|
+
|
80
|
+
def coerce_complex_value(value)
|
81
|
+
return nil if value.nil?
|
82
|
+
return nil if value.length == 0
|
83
|
+
return nil if value == 'null'
|
84
|
+
JSON.parse(value)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,441 @@
|
|
1
|
+
# suppress warnings
|
2
|
+
old_verbose, $VERBOSE = $VERBOSE, nil
|
3
|
+
|
4
|
+
raise 'Thrift is not loaded' unless defined?(Thrift)
|
5
|
+
raise 'RBHive is not loaded' unless defined?(RBHive)
|
6
|
+
|
7
|
+
# require thrift autogenerated files
|
8
|
+
require File.join(File.dirname(__FILE__), *%w[.. thrift t_c_l_i_service_constants])
|
9
|
+
require File.join(File.dirname(__FILE__), *%w[.. thrift t_c_l_i_service])
|
10
|
+
require File.join(File.dirname(__FILE__), *%w[.. thrift sasl_client_transport])
|
11
|
+
|
12
|
+
# restore warnings
|
13
|
+
$VERBOSE = old_verbose
|
14
|
+
|
15
|
+
# Monkey patch thrift to set an infinite read timeout
|
16
|
+
module Thrift
|
17
|
+
class HTTPClientTransport < BaseTransport
|
18
|
+
def flush
|
19
|
+
http = Net::HTTP.new @url.host, @url.port
|
20
|
+
http.use_ssl = @url.scheme == 'https'
|
21
|
+
http.read_timeout = nil
|
22
|
+
http.verify_mode = @ssl_verify_mode if @url.scheme == 'https'
|
23
|
+
resp = http.post(@url.request_uri, @outbuf, @headers)
|
24
|
+
data = resp.body
|
25
|
+
data = Bytes.force_binary_encoding(data)
|
26
|
+
@inbuf = StringIO.new data
|
27
|
+
@outbuf = Bytes.empty_byte_buffer
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
module RBHive
|
33
|
+
|
34
|
+
HIVE_THRIFT_MAPPING = {
|
35
|
+
10 => 0,
|
36
|
+
11 => 1,
|
37
|
+
12 => 2,
|
38
|
+
13 => 6,
|
39
|
+
:cdh4 => 0,
|
40
|
+
:cdh5 => 4,
|
41
|
+
:PROTOCOL_V1 => 0,
|
42
|
+
:PROTOCOL_V2 => 1,
|
43
|
+
:PROTOCOL_V3 => 2,
|
44
|
+
:PROTOCOL_V4 => 3,
|
45
|
+
:PROTOCOL_V5 => 4,
|
46
|
+
:PROTOCOL_V6 => 5,
|
47
|
+
:PROTOCOL_V7 => 6
|
48
|
+
}
|
49
|
+
|
50
|
+
def tcli_connect(server, port = 10_000, options)
|
51
|
+
logger = options.key?(:logger) ? options.delete(:logger) : StdOutLogger.new
|
52
|
+
connection = RBHive::TCLIConnection.new(server, port, options, logger)
|
53
|
+
ret = nil
|
54
|
+
begin
|
55
|
+
connection.open
|
56
|
+
connection.open_session
|
57
|
+
ret = yield(connection)
|
58
|
+
|
59
|
+
ensure
|
60
|
+
# Try to close the session and our connection if those are still open, ignore io errors
|
61
|
+
begin
|
62
|
+
connection.close_session if connection.session
|
63
|
+
connection.close
|
64
|
+
rescue IOError => e
|
65
|
+
# noop
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
ret
|
70
|
+
end
|
71
|
+
module_function :tcli_connect
|
72
|
+
|
73
|
+
class StdOutLogger
|
74
|
+
%w(fatal error warn info debug).each do |level|
|
75
|
+
define_method level.to_sym do |message|
|
76
|
+
STDOUT.puts(message)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class TCLIConnection
|
82
|
+
attr_reader :client
|
83
|
+
|
84
|
+
def initialize(server, port = 10_000, options = {}, logger = StdOutLogger.new)
|
85
|
+
options ||= {} # backwards compatibility
|
86
|
+
raise "'options' parameter must be a hash" unless options.is_a?(Hash)
|
87
|
+
|
88
|
+
if options[:transport] == :sasl and options[:sasl_params].nil?
|
89
|
+
raise ":transport is set to :sasl, but no :sasl_params option was supplied"
|
90
|
+
end
|
91
|
+
|
92
|
+
# Defaults to buffered transport, Hive 0.10, 1800 second timeout
|
93
|
+
options[:transport] ||= :buffered
|
94
|
+
options[:hive_version] ||= 10
|
95
|
+
options[:timeout] ||= 1800
|
96
|
+
@options = options
|
97
|
+
|
98
|
+
# Look up the appropriate Thrift protocol version for the supplied Hive version
|
99
|
+
@thrift_protocol_version = thrift_hive_protocol(options[:hive_version])
|
100
|
+
|
101
|
+
@logger = logger
|
102
|
+
@transport = thrift_transport(server, port)
|
103
|
+
@protocol = Thrift::BinaryProtocol.new(@transport)
|
104
|
+
@client = Hive2::Thrift::TCLIService::Client.new(@protocol)
|
105
|
+
@session = nil
|
106
|
+
@logger.info("Connecting to HiveServer2 #{server} on port #{port}")
|
107
|
+
end
|
108
|
+
|
109
|
+
def thrift_hive_protocol(version)
|
110
|
+
HIVE_THRIFT_MAPPING[version] || raise("Invalid Hive version")
|
111
|
+
end
|
112
|
+
|
113
|
+
def thrift_transport(server, port)
|
114
|
+
@logger.info("Initializing transport #{@options[:transport]}")
|
115
|
+
case @options[:transport]
|
116
|
+
when :buffered
|
117
|
+
return Thrift::BufferedTransport.new(thrift_socket(server, port, @options[:timeout]))
|
118
|
+
when :sasl
|
119
|
+
return Thrift::SaslClientTransport.new(thrift_socket(server, port, @options[:timeout]),
|
120
|
+
parse_sasl_params(@options[:sasl_params]))
|
121
|
+
when :http
|
122
|
+
return Thrift::HTTPClientTransport.new("http://#{server}:#{port}/cliservice")
|
123
|
+
else
|
124
|
+
raise "Unrecognised transport type '#{transport}'"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def thrift_socket(server, port, timeout)
|
129
|
+
socket = Thrift::Socket.new(server, port)
|
130
|
+
socket.timeout = timeout
|
131
|
+
socket
|
132
|
+
end
|
133
|
+
|
134
|
+
# Processes SASL connection params and returns a hash with symbol keys or a nil
|
135
|
+
def parse_sasl_params(sasl_params)
|
136
|
+
# Symbilize keys in a hash
|
137
|
+
if sasl_params.kind_of?(Hash)
|
138
|
+
return sasl_params.inject({}) do |memo,(k,v)|
|
139
|
+
memo[k.to_sym] = v;
|
140
|
+
memo
|
141
|
+
end
|
142
|
+
end
|
143
|
+
return nil
|
144
|
+
end
|
145
|
+
|
146
|
+
def open
|
147
|
+
@transport.open
|
148
|
+
end
|
149
|
+
|
150
|
+
def close
|
151
|
+
@transport.close
|
152
|
+
end
|
153
|
+
|
154
|
+
def open_session
|
155
|
+
@session = @client.OpenSession(prepare_open_session(@thrift_protocol_version))
|
156
|
+
end
|
157
|
+
|
158
|
+
def close_session
|
159
|
+
@client.CloseSession prepare_close_session
|
160
|
+
@session = nil
|
161
|
+
end
|
162
|
+
|
163
|
+
def session
|
164
|
+
@session && @session.sessionHandle
|
165
|
+
end
|
166
|
+
|
167
|
+
def client
|
168
|
+
@client
|
169
|
+
end
|
170
|
+
|
171
|
+
def execute(query)
|
172
|
+
@logger.info("Executing Hive Query: #{query}")
|
173
|
+
req = prepare_execute_statement(query)
|
174
|
+
exec_result = client.ExecuteStatement(req)
|
175
|
+
raise_error_if_failed!(exec_result)
|
176
|
+
exec_result
|
177
|
+
end
|
178
|
+
|
179
|
+
def priority=(priority)
|
180
|
+
set("mapred.job.priority", priority)
|
181
|
+
end
|
182
|
+
|
183
|
+
def queue=(queue)
|
184
|
+
set("mapred.job.queue.name", queue)
|
185
|
+
end
|
186
|
+
|
187
|
+
def set(name,value)
|
188
|
+
@logger.info("Setting #{name}=#{value}")
|
189
|
+
self.execute("SET #{name}=#{value}")
|
190
|
+
end
|
191
|
+
|
192
|
+
# Async execute
|
193
|
+
def async_execute(query)
|
194
|
+
@logger.info("Executing query asynchronously: #{query}")
|
195
|
+
exec_result = @client.ExecuteStatement(
|
196
|
+
Hive2::Thrift::TExecuteStatementReq.new(
|
197
|
+
sessionHandle: @session.sessionHandle,
|
198
|
+
statement: query,
|
199
|
+
runAsync: true
|
200
|
+
)
|
201
|
+
)
|
202
|
+
raise_error_if_failed!(exec_result)
|
203
|
+
op_handle = exec_result.operationHandle
|
204
|
+
|
205
|
+
# Return handles to get hold of this query / session again
|
206
|
+
{
|
207
|
+
session: @session.sessionHandle,
|
208
|
+
guid: op_handle.operationId.guid,
|
209
|
+
secret: op_handle.operationId.secret
|
210
|
+
}
|
211
|
+
end
|
212
|
+
|
213
|
+
# Is the query complete?
|
214
|
+
def async_is_complete?(handles)
|
215
|
+
async_state(handles) == :finished
|
216
|
+
end
|
217
|
+
|
218
|
+
# Is the query actually running?
|
219
|
+
def async_is_running?(handles)
|
220
|
+
async_state(handles) == :running
|
221
|
+
end
|
222
|
+
|
223
|
+
# Has the query failed?
|
224
|
+
def async_is_failed?(handles)
|
225
|
+
async_state(handles) == :error
|
226
|
+
end
|
227
|
+
|
228
|
+
def async_is_cancelled?(handles)
|
229
|
+
async_state(handles) == :cancelled
|
230
|
+
end
|
231
|
+
|
232
|
+
def async_cancel(handles)
|
233
|
+
@client.CancelOperation(prepare_cancel_request(handles))
|
234
|
+
end
|
235
|
+
|
236
|
+
# Map states to symbols
|
237
|
+
def async_state(handles)
|
238
|
+
response = @client.GetOperationStatus(
|
239
|
+
Hive2::Thrift::TGetOperationStatusReq.new(operationHandle: prepare_operation_handle(handles))
|
240
|
+
)
|
241
|
+
|
242
|
+
case response.operationState
|
243
|
+
when Hive2::Thrift::TOperationState::FINISHED_STATE
|
244
|
+
return :finished
|
245
|
+
when Hive2::Thrift::TOperationState::INITIALIZED_STATE
|
246
|
+
return :initialized
|
247
|
+
when Hive2::Thrift::TOperationState::RUNNING_STATE
|
248
|
+
return :running
|
249
|
+
when Hive2::Thrift::TOperationState::CANCELED_STATE
|
250
|
+
return :cancelled
|
251
|
+
when Hive2::Thrift::TOperationState::CLOSED_STATE
|
252
|
+
return :closed
|
253
|
+
when Hive2::Thrift::TOperationState::ERROR_STATE
|
254
|
+
return :error
|
255
|
+
when Hive2::Thrift::TOperationState::UKNOWN_STATE
|
256
|
+
return :unknown
|
257
|
+
when Hive2::Thrift::TOperationState::PENDING_STATE
|
258
|
+
return :pending
|
259
|
+
when nil
|
260
|
+
raise "No operation state found for handles - has the session been closed?"
|
261
|
+
else
|
262
|
+
return :state_not_in_protocol
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# Async fetch results from an async execute
|
267
|
+
def async_fetch(handles, max_rows = 100)
|
268
|
+
# Can't get data from an unfinished query
|
269
|
+
unless async_is_complete?(handles)
|
270
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
271
|
+
end
|
272
|
+
|
273
|
+
# Fetch and
|
274
|
+
fetch_rows(prepare_operation_handle(handles), :first, max_rows)
|
275
|
+
end
|
276
|
+
|
277
|
+
# Performs a query on the server, fetches the results in batches of *batch_size* rows
|
278
|
+
# and yields the result batches to a given block as arrays of rows.
|
279
|
+
def async_fetch_in_batch(handles, batch_size = 1000, &block)
|
280
|
+
raise "No block given for the batch fetch request!" unless block_given?
|
281
|
+
# Can't get data from an unfinished query
|
282
|
+
unless async_is_complete?(handles)
|
283
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
284
|
+
end
|
285
|
+
|
286
|
+
# Now let's iterate over the results
|
287
|
+
loop do
|
288
|
+
rows = fetch_rows(prepare_operation_handle(handles), :next, batch_size)
|
289
|
+
break if rows.empty?
|
290
|
+
yield rows
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
def async_close_session(handles)
|
295
|
+
validate_handles!(handles)
|
296
|
+
@client.CloseSession(Hive2::Thrift::TCloseSessionReq.new( sessionHandle: handles[:session] ))
|
297
|
+
end
|
298
|
+
|
299
|
+
# Pull rows from the query result
|
300
|
+
def fetch_rows(op_handle, orientation = :first, max_rows = 1000)
|
301
|
+
fetch_req = prepare_fetch_results(op_handle, orientation, max_rows)
|
302
|
+
fetch_results = @client.FetchResults(fetch_req)
|
303
|
+
raise_error_if_failed!(fetch_results)
|
304
|
+
rows = fetch_results.results.rows
|
305
|
+
TCLIResultSet.new(rows, TCLISchemaDefinition.new(get_schema_for(op_handle), rows.first))
|
306
|
+
end
|
307
|
+
|
308
|
+
# Performs a explain on the supplied query on the server, returns it as a ExplainResult.
|
309
|
+
# (Only works on 0.12 if you have this patch - https://issues.apache.org/jira/browse/HIVE-5492)
|
310
|
+
def explain(query)
|
311
|
+
rows = []
|
312
|
+
fetch_in_batch("EXPLAIN " + query) do |batch|
|
313
|
+
rows << batch.map { |b| b[:Explain] }
|
314
|
+
end
|
315
|
+
ExplainResult.new(rows.flatten)
|
316
|
+
end
|
317
|
+
|
318
|
+
# Performs a query on the server, fetches up to *max_rows* rows and returns them as an array.
|
319
|
+
def fetch(query, max_rows = 100)
|
320
|
+
# Execute the query and check the result
|
321
|
+
exec_result = execute(query)
|
322
|
+
raise_error_if_failed!(exec_result)
|
323
|
+
|
324
|
+
# Get search operation handle to fetch the results
|
325
|
+
op_handle = exec_result.operationHandle
|
326
|
+
|
327
|
+
# Fetch the rows
|
328
|
+
fetch_rows(op_handle, :first, max_rows)
|
329
|
+
end
|
330
|
+
|
331
|
+
# Performs a query on the server, fetches the results in batches of *batch_size* rows
|
332
|
+
# and yields the result batches to a given block as arrays of rows.
|
333
|
+
def fetch_in_batch(query, batch_size = 1000, &block)
|
334
|
+
raise "No block given for the batch fetch request!" unless block_given?
|
335
|
+
|
336
|
+
# Execute the query and check the result
|
337
|
+
exec_result = execute(query)
|
338
|
+
raise_error_if_failed!(exec_result)
|
339
|
+
|
340
|
+
# Get search operation handle to fetch the results
|
341
|
+
op_handle = exec_result.operationHandle
|
342
|
+
|
343
|
+
# Prepare fetch results request
|
344
|
+
fetch_req = prepare_fetch_results(op_handle, :next, batch_size)
|
345
|
+
|
346
|
+
# Now let's iterate over the results
|
347
|
+
loop do
|
348
|
+
rows = fetch_rows(op_handle, :next, batch_size)
|
349
|
+
break if rows.empty?
|
350
|
+
yield rows
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
def create_table(schema)
|
355
|
+
execute(schema.create_table_statement)
|
356
|
+
end
|
357
|
+
|
358
|
+
def drop_table(name)
|
359
|
+
name = name.name if name.is_a?(TableSchema)
|
360
|
+
execute("DROP TABLE `#{name}`")
|
361
|
+
end
|
362
|
+
|
363
|
+
def replace_columns(schema)
|
364
|
+
execute(schema.replace_columns_statement)
|
365
|
+
end
|
366
|
+
|
367
|
+
def add_columns(schema)
|
368
|
+
execute(schema.add_columns_statement)
|
369
|
+
end
|
370
|
+
|
371
|
+
def method_missing(meth, *args)
|
372
|
+
client.send(meth, *args)
|
373
|
+
end
|
374
|
+
|
375
|
+
private
|
376
|
+
|
377
|
+
def prepare_open_session(client_protocol)
|
378
|
+
req = ::Hive2::Thrift::TOpenSessionReq.new( @options[:sasl_params].nil? ? [] : @options[:sasl_params] )
|
379
|
+
req.client_protocol = client_protocol
|
380
|
+
req
|
381
|
+
end
|
382
|
+
|
383
|
+
def prepare_close_session
|
384
|
+
::Hive2::Thrift::TCloseSessionReq.new( sessionHandle: self.session )
|
385
|
+
end
|
386
|
+
|
387
|
+
def prepare_execute_statement(query)
|
388
|
+
::Hive2::Thrift::TExecuteStatementReq.new( sessionHandle: self.session, statement: query.to_s, confOverlay: {} )
|
389
|
+
end
|
390
|
+
|
391
|
+
def prepare_fetch_results(handle, orientation=:first, rows=100)
|
392
|
+
orientation_value = "FETCH_#{orientation.to_s.upcase}"
|
393
|
+
valid_orientations = ::Hive2::Thrift::TFetchOrientation::VALUE_MAP.values
|
394
|
+
unless valid_orientations.include?(orientation_value)
|
395
|
+
raise ArgumentError, "Invalid orientation: #{orientation.inspect}"
|
396
|
+
end
|
397
|
+
orientation_const = eval("::Hive2::Thrift::TFetchOrientation::#{orientation_value}")
|
398
|
+
::Hive2::Thrift::TFetchResultsReq.new(
|
399
|
+
operationHandle: handle,
|
400
|
+
orientation: orientation_const,
|
401
|
+
maxRows: rows
|
402
|
+
)
|
403
|
+
end
|
404
|
+
|
405
|
+
def prepare_operation_handle(handles)
|
406
|
+
validate_handles!(handles)
|
407
|
+
Hive2::Thrift::TOperationHandle.new(
|
408
|
+
operationId: Hive2::Thrift::THandleIdentifier.new(guid: handles[:guid], secret: handles[:secret]),
|
409
|
+
operationType: Hive2::Thrift::TOperationType::EXECUTE_STATEMENT,
|
410
|
+
hasResultSet: false
|
411
|
+
)
|
412
|
+
end
|
413
|
+
|
414
|
+
def prepare_cancel_request(handles)
|
415
|
+
Hive2::Thrift::TCancelOperationReq.new(
|
416
|
+
operationHandle: prepare_operation_handle(handles)
|
417
|
+
)
|
418
|
+
end
|
419
|
+
|
420
|
+
def validate_handles!(handles)
|
421
|
+
unless handles.has_key?(:guid) and handles.has_key?(:secret) and handles.has_key?(:session)
|
422
|
+
raise "Invalid handles hash: #{handles.inspect}"
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def get_schema_for(handle)
|
427
|
+
req = ::Hive2::Thrift::TGetResultSetMetadataReq.new( operationHandle: handle )
|
428
|
+
metadata = client.GetResultSetMetadata( req )
|
429
|
+
metadata.schema
|
430
|
+
end
|
431
|
+
|
432
|
+
# Raises an exception if given operation result is a failure
|
433
|
+
def raise_error_if_failed!(result)
|
434
|
+
return if result.status.statusCode == 0
|
435
|
+
error_message = result.status.errorMessage || 'Execution failed!'
|
436
|
+
raise RBHive::TCLIConnectionError.new(error_message)
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
class TCLIConnectionError < StandardError; end
|
441
|
+
end
|