rbhive-vidma 1.0.2.pre1.pre.thrift0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/CHANGELOG.md +16 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/README.md +344 -0
- data/Rakefile +1 -0
- data/lib/rbhive.rb +8 -0
- data/lib/rbhive/connection.rb +150 -0
- data/lib/rbhive/explain_result.rb +46 -0
- data/lib/rbhive/result_set.rb +37 -0
- data/lib/rbhive/schema_definition.rb +86 -0
- data/lib/rbhive/t_c_l_i_connection.rb +439 -0
- data/lib/rbhive/t_c_l_i_result_set.rb +3 -0
- data/lib/rbhive/t_c_l_i_schema_definition.rb +88 -0
- data/lib/rbhive/table_schema.rb +117 -0
- data/lib/rbhive/version.rb +3 -0
- data/lib/thrift/facebook_service.rb +700 -0
- data/lib/thrift/fb303_constants.rb +9 -0
- data/lib/thrift/fb303_types.rb +19 -0
- data/lib/thrift/hive_metastore_constants.rb +41 -0
- data/lib/thrift/hive_metastore_types.rb +630 -0
- data/lib/thrift/hive_service_constants.rb +13 -0
- data/lib/thrift/hive_service_types.rb +72 -0
- data/lib/thrift/queryplan_constants.rb +13 -0
- data/lib/thrift/queryplan_types.rb +261 -0
- data/lib/thrift/sasl_client_transport.rb +97 -0
- data/lib/thrift/serde_constants.rb +92 -0
- data/lib/thrift/serde_types.rb +7 -0
- data/lib/thrift/t_c_l_i_service.rb +1054 -0
- data/lib/thrift/t_c_l_i_service_constants.rb +72 -0
- data/lib/thrift/t_c_l_i_service_types.rb +1762 -0
- data/lib/thrift/thrift_hive.rb +508 -0
- data/lib/thrift/thrift_hive_metastore.rb +3856 -0
- data/rbhive.gemspec +33 -0
- metadata +165 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
class ExplainResult
|
2
|
+
def initialize(rows)
|
3
|
+
@rows = rows
|
4
|
+
end
|
5
|
+
|
6
|
+
def ast
|
7
|
+
by_section[:abstract_syntax_tree].first
|
8
|
+
end
|
9
|
+
|
10
|
+
def stage_count
|
11
|
+
stage_dependencies.length
|
12
|
+
end
|
13
|
+
|
14
|
+
def stage_dependencies
|
15
|
+
by_section[:stage_dependencies] || []
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_tsv
|
19
|
+
@rows.join("\n")
|
20
|
+
end
|
21
|
+
|
22
|
+
def raw
|
23
|
+
@rows
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
to_tsv
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def by_section
|
33
|
+
current_section = nil
|
34
|
+
@rows.inject({}) do |sections, row|
|
35
|
+
if row.match(/^[A-Z]/)
|
36
|
+
current_section = row.chomp(':').downcase.gsub(' ', '_').to_sym
|
37
|
+
sections[current_section] = []
|
38
|
+
elsif row.length == 0
|
39
|
+
next sections
|
40
|
+
else
|
41
|
+
sections[current_section] << row.strip
|
42
|
+
end
|
43
|
+
sections
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module RBHive
|
2
|
+
class ResultSet < Array
|
3
|
+
def initialize(rows, schema)
|
4
|
+
@schema = schema
|
5
|
+
super(rows.map {|r| @schema.coerce_row(r) })
|
6
|
+
end
|
7
|
+
|
8
|
+
def column_names
|
9
|
+
@schema.column_names
|
10
|
+
end
|
11
|
+
|
12
|
+
def column_type_map
|
13
|
+
@schema.column_type_map
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_csv(out_file=nil)
|
17
|
+
to_seperated_output(",", out_file)
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_tsv(out_file=nil)
|
21
|
+
to_seperated_output("\t", out_file)
|
22
|
+
end
|
23
|
+
|
24
|
+
def as_arrays
|
25
|
+
@as_arrays ||= self.map{ |r| @schema.coerce_row_to_array(r) }
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def to_seperated_output(sep, out_file)
|
31
|
+
rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
|
32
|
+
sv = rows.join("\n")
|
33
|
+
return sv if out_file.nil?
|
34
|
+
File.open(out_file, 'w+') { |f| f << sv }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module RBHive
|
4
|
+
class SchemaDefinition
|
5
|
+
attr_reader :schema
|
6
|
+
|
7
|
+
NAN = Float::NAN rescue 0.0/0.0
|
8
|
+
INFINITY = Float::INFINITY rescue 1.0/0.0
|
9
|
+
TYPES = {
|
10
|
+
:boolean => :to_s,
|
11
|
+
:string => :to_s,
|
12
|
+
:bigint => :to_i,
|
13
|
+
:float => :to_f,
|
14
|
+
:double => :to_f,
|
15
|
+
:int => :to_i,
|
16
|
+
:smallint => :to_i,
|
17
|
+
:tinyint => :to_i,
|
18
|
+
}
|
19
|
+
|
20
|
+
def initialize(schema, example_row)
|
21
|
+
@schema = schema
|
22
|
+
@example_row = example_row ? example_row.split("\t") : []
|
23
|
+
end
|
24
|
+
|
25
|
+
def column_names
|
26
|
+
@column_names ||= begin
|
27
|
+
schema_names = @schema.fieldSchemas.map {|c| c.name }
|
28
|
+
|
29
|
+
# In rare cases Hive can return two identical column names
|
30
|
+
# consider SELECT a.foo, b.foo...
|
31
|
+
# in this case you get two columns called foo with no disambiguation.
|
32
|
+
# as a (far from ideal) solution we detect this edge case and rename them
|
33
|
+
# a.foo => foo1, b.foo => foo2
|
34
|
+
# otherwise we will trample one of the columns during Hash mapping.
|
35
|
+
s = Hash.new(0)
|
36
|
+
schema_names.map! { |c| s[c] += 1; s[c] > 1 ? "#{c}---|---#{s[c]}" : c }
|
37
|
+
schema_names.map! { |c| s[c] > 1 ? "#{c}---|---1" : c }
|
38
|
+
schema_names.map! { |c| c.gsub('---|---', '_').to_sym }
|
39
|
+
|
40
|
+
# Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
|
41
|
+
# For now we will call them :_p1, :_p2, etc. to avoid collisions.
|
42
|
+
offset = 0
|
43
|
+
while schema_names.length < @example_row.length
|
44
|
+
schema_names.push(:"_p#{offset+=1}")
|
45
|
+
end
|
46
|
+
schema_names
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def column_type_map
|
51
|
+
@column_type_map ||= column_names.inject({}) do |hsh, c|
|
52
|
+
definition = @schema.fieldSchemas.find {|s| s.name.to_sym == c }
|
53
|
+
# If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
|
54
|
+
hsh[c] = definition ? definition.type.to_sym : :string
|
55
|
+
hsh
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def coerce_row(row)
|
60
|
+
column_names.zip(row.split("\t")).inject({}) do |hsh, (column_name, value)|
|
61
|
+
hsh[column_name] = coerce_column(column_name, value)
|
62
|
+
hsh
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def coerce_column(column_name, value)
|
67
|
+
type = column_type_map[column_name]
|
68
|
+
return INFINITY if (type != :string && value == "Infinity")
|
69
|
+
return NAN if (type != :string && value == "NaN")
|
70
|
+
return coerce_complex_value(value) if type.to_s =~ /^array/
|
71
|
+
conversion_method = TYPES[type]
|
72
|
+
conversion_method ? value.send(conversion_method) : value
|
73
|
+
end
|
74
|
+
|
75
|
+
def coerce_row_to_array(row)
|
76
|
+
column_names.map { |n| row[n] }
|
77
|
+
end
|
78
|
+
|
79
|
+
def coerce_complex_value(value)
|
80
|
+
return nil if value.nil?
|
81
|
+
return nil if value.length == 0
|
82
|
+
return nil if value == 'null'
|
83
|
+
JSON.parse(value)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,439 @@
|
|
1
|
+
# suppress warnings
|
2
|
+
old_verbose, $VERBOSE = $VERBOSE, nil
|
3
|
+
|
4
|
+
raise 'Thrift is not loaded' unless defined?(Thrift)
|
5
|
+
raise 'RBHive is not loaded' unless defined?(RBHive)
|
6
|
+
|
7
|
+
# require thrift autogenerated files
|
8
|
+
require File.join(File.dirname(__FILE__), *%w[.. thrift t_c_l_i_service_constants])
|
9
|
+
require File.join(File.dirname(__FILE__), *%w[.. thrift t_c_l_i_service])
|
10
|
+
require File.join(File.dirname(__FILE__), *%w[.. thrift sasl_client_transport])
|
11
|
+
|
12
|
+
# restore warnings
|
13
|
+
$VERBOSE = old_verbose
|
14
|
+
|
15
|
+
# Monkey patch thrift to set an infinite read timeout
|
16
|
+
module Thrift
|
17
|
+
class HTTPClientTransport < BaseTransport
|
18
|
+
def flush
|
19
|
+
http = Net::HTTP.new @url.host, @url.port
|
20
|
+
http.use_ssl = @url.scheme == 'https'
|
21
|
+
http.read_timeout = nil
|
22
|
+
http.verify_mode = @ssl_verify_mode if @url.scheme == 'https'
|
23
|
+
resp = http.post(@url.request_uri, @outbuf, @headers)
|
24
|
+
data = resp.body
|
25
|
+
data = Bytes.force_binary_encoding(data)
|
26
|
+
@inbuf = StringIO.new data
|
27
|
+
@outbuf = Bytes.empty_byte_buffer
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
module RBHive
|
33
|
+
|
34
|
+
HIVE_THRIFT_MAPPING = {
|
35
|
+
10 => 0,
|
36
|
+
11 => 1,
|
37
|
+
12 => 2,
|
38
|
+
13 => 6,
|
39
|
+
:cdh4 => 0,
|
40
|
+
:cdh5 => 4,
|
41
|
+
:PROTOCOL_V1 => 0,
|
42
|
+
:PROTOCOL_V2 => 1,
|
43
|
+
:PROTOCOL_V3 => 2,
|
44
|
+
:PROTOCOL_V4 => 3,
|
45
|
+
:PROTOCOL_V5 => 4,
|
46
|
+
:PROTOCOL_V6 => 5,
|
47
|
+
:PROTOCOL_V7 => 6
|
48
|
+
}
|
49
|
+
|
50
|
+
def tcli_connect(server, port = 10_000, options)
|
51
|
+
logger = options.key?(:logger) ? options.delete(:logger) : StdOutLogger.new
|
52
|
+
connection = RBHive::TCLIConnection.new(server, port, options, logger)
|
53
|
+
ret = nil
|
54
|
+
begin
|
55
|
+
connection.open
|
56
|
+
connection.open_session
|
57
|
+
ret = yield(connection)
|
58
|
+
|
59
|
+
ensure
|
60
|
+
# Try to close the session and our connection if those are still open, ignore io errors
|
61
|
+
begin
|
62
|
+
connection.close_session if connection.session
|
63
|
+
connection.close
|
64
|
+
rescue IOError => e
|
65
|
+
# noop
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
ret
|
70
|
+
end
|
71
|
+
module_function :tcli_connect
|
72
|
+
|
73
|
+
class StdOutLogger
|
74
|
+
%w(fatal error warn info debug).each do |level|
|
75
|
+
define_method level.to_sym do |message|
|
76
|
+
STDOUT.puts(message)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class TCLIConnection
|
82
|
+
attr_reader :client
|
83
|
+
|
84
|
+
def initialize(server, port = 10_000, options = {}, logger = StdOutLogger.new)
|
85
|
+
options ||= {} # backwards compatibility
|
86
|
+
raise "'options' parameter must be a hash" unless options.is_a?(Hash)
|
87
|
+
|
88
|
+
if options[:transport] == :sasl and options[:sasl_params].nil?
|
89
|
+
raise ":transport is set to :sasl, but no :sasl_params option was supplied"
|
90
|
+
end
|
91
|
+
|
92
|
+
# Defaults to buffered transport, Hive 0.10, 1800 second timeout
|
93
|
+
options[:transport] ||= :buffered
|
94
|
+
options[:hive_version] ||= 10
|
95
|
+
options[:timeout] ||= 1800
|
96
|
+
@options = options
|
97
|
+
|
98
|
+
# Look up the appropriate Thrift protocol version for the supplied Hive version
|
99
|
+
@thrift_protocol_version = thrift_hive_protocol(options[:hive_version])
|
100
|
+
|
101
|
+
@logger = logger
|
102
|
+
@transport = thrift_transport(server, port)
|
103
|
+
@protocol = Thrift::BinaryProtocol.new(@transport)
|
104
|
+
@client = Hive2::Thrift::TCLIService::Client.new(@protocol)
|
105
|
+
@session = nil
|
106
|
+
@logger.info("Connecting to HiveServer2 #{server} on port #{port}")
|
107
|
+
end
|
108
|
+
|
109
|
+
def thrift_hive_protocol(version)
|
110
|
+
HIVE_THRIFT_MAPPING[version] || raise("Invalid Hive version")
|
111
|
+
end
|
112
|
+
|
113
|
+
def thrift_transport(server, port)
|
114
|
+
@logger.info("Initializing transport #{@options[:transport]}")
|
115
|
+
case @options[:transport]
|
116
|
+
when :buffered
|
117
|
+
return Thrift::BufferedTransport.new(thrift_socket(server, port, @options[:timeout]))
|
118
|
+
when :sasl
|
119
|
+
return Thrift::SaslClientTransport.new(thrift_socket(server, port, @options[:timeout]),
|
120
|
+
parse_sasl_params(@options[:sasl_params]))
|
121
|
+
when :http
|
122
|
+
return Thrift::HTTPClientTransport.new("http://#{server}:#{port}/cliservice")
|
123
|
+
else
|
124
|
+
raise "Unrecognised transport type '#{transport}'"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def thrift_socket(server, port, timeout)
|
129
|
+
socket = Thrift::Socket.new(server, port)
|
130
|
+
socket.timeout = timeout
|
131
|
+
socket
|
132
|
+
end
|
133
|
+
|
134
|
+
# Processes SASL connection params and returns a hash with symbol keys or a nil
|
135
|
+
def parse_sasl_params(sasl_params)
|
136
|
+
# Symbilize keys in a hash
|
137
|
+
if sasl_params.kind_of?(Hash)
|
138
|
+
return sasl_params.inject({}) do |memo,(k,v)|
|
139
|
+
memo[k.to_sym] = v;
|
140
|
+
memo
|
141
|
+
end
|
142
|
+
end
|
143
|
+
return nil
|
144
|
+
end
|
145
|
+
|
146
|
+
def open
|
147
|
+
@transport.open
|
148
|
+
end
|
149
|
+
|
150
|
+
def close
|
151
|
+
@transport.close
|
152
|
+
end
|
153
|
+
|
154
|
+
def open_session
|
155
|
+
@session = @client.OpenSession(prepare_open_session(@thrift_protocol_version))
|
156
|
+
end
|
157
|
+
|
158
|
+
def close_session
|
159
|
+
@client.CloseSession prepare_close_session
|
160
|
+
@session = nil
|
161
|
+
end
|
162
|
+
|
163
|
+
def session
|
164
|
+
@session && @session.sessionHandle
|
165
|
+
end
|
166
|
+
|
167
|
+
def client
|
168
|
+
@client
|
169
|
+
end
|
170
|
+
|
171
|
+
def execute(query)
|
172
|
+
@logger.info("Executing Hive Query: #{query}")
|
173
|
+
req = prepare_execute_statement(query)
|
174
|
+
exec_result = client.ExecuteStatement(req)
|
175
|
+
raise_error_if_failed!(exec_result)
|
176
|
+
exec_result
|
177
|
+
end
|
178
|
+
|
179
|
+
def priority=(priority)
|
180
|
+
set("mapred.job.priority", priority)
|
181
|
+
end
|
182
|
+
|
183
|
+
def queue=(queue)
|
184
|
+
set("mapred.job.queue.name", queue)
|
185
|
+
end
|
186
|
+
|
187
|
+
def set(name,value)
|
188
|
+
@logger.info("Setting #{name}=#{value}")
|
189
|
+
self.execute("SET #{name}=#{value}")
|
190
|
+
end
|
191
|
+
|
192
|
+
# Async execute
|
193
|
+
def async_execute(query)
|
194
|
+
@logger.info("Executing query asynchronously: #{query}")
|
195
|
+
op_handle = @client.ExecuteStatement(
|
196
|
+
Hive2::Thrift::TExecuteStatementReq.new(
|
197
|
+
sessionHandle: @session.sessionHandle,
|
198
|
+
statement: query,
|
199
|
+
runAsync: true
|
200
|
+
)
|
201
|
+
).operationHandle
|
202
|
+
|
203
|
+
# Return handles to get hold of this query / session again
|
204
|
+
{
|
205
|
+
session: @session.sessionHandle,
|
206
|
+
guid: op_handle.operationId.guid,
|
207
|
+
secret: op_handle.operationId.secret
|
208
|
+
}
|
209
|
+
end
|
210
|
+
|
211
|
+
# Is the query complete?
|
212
|
+
def async_is_complete?(handles)
|
213
|
+
async_state(handles) == :finished
|
214
|
+
end
|
215
|
+
|
216
|
+
# Is the query actually running?
|
217
|
+
def async_is_running?(handles)
|
218
|
+
async_state(handles) == :running
|
219
|
+
end
|
220
|
+
|
221
|
+
# Has the query failed?
|
222
|
+
def async_is_failed?(handles)
|
223
|
+
async_state(handles) == :error
|
224
|
+
end
|
225
|
+
|
226
|
+
def async_is_cancelled?(handles)
|
227
|
+
async_state(handles) == :cancelled
|
228
|
+
end
|
229
|
+
|
230
|
+
def async_cancel(handles)
|
231
|
+
@client.CancelOperation(prepare_cancel_request(handles))
|
232
|
+
end
|
233
|
+
|
234
|
+
# Map states to symbols
|
235
|
+
def async_state(handles)
|
236
|
+
response = @client.GetOperationStatus(
|
237
|
+
Hive2::Thrift::TGetOperationStatusReq.new(operationHandle: prepare_operation_handle(handles))
|
238
|
+
)
|
239
|
+
puts response.operationState
|
240
|
+
case response.operationState
|
241
|
+
when Hive2::Thrift::TOperationState::FINISHED_STATE
|
242
|
+
return :finished
|
243
|
+
when Hive2::Thrift::TOperationState::INITIALIZED_STATE
|
244
|
+
return :initialized
|
245
|
+
when Hive2::Thrift::TOperationState::RUNNING_STATE
|
246
|
+
return :running
|
247
|
+
when Hive2::Thrift::TOperationState::CANCELED_STATE
|
248
|
+
return :cancelled
|
249
|
+
when Hive2::Thrift::TOperationState::CLOSED_STATE
|
250
|
+
return :closed
|
251
|
+
when Hive2::Thrift::TOperationState::ERROR_STATE
|
252
|
+
return :error
|
253
|
+
when Hive2::Thrift::TOperationState::UKNOWN_STATE
|
254
|
+
return :unknown
|
255
|
+
when Hive2::Thrift::TOperationState::PENDING_STATE
|
256
|
+
return :pending
|
257
|
+
when nil
|
258
|
+
raise "No operation state found for handles - has the session been closed?"
|
259
|
+
else
|
260
|
+
return :state_not_in_protocol
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
# Async fetch results from an async execute
|
265
|
+
def async_fetch(handles, max_rows = 100)
|
266
|
+
# Can't get data from an unfinished query
|
267
|
+
unless async_is_complete?(handles)
|
268
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
269
|
+
end
|
270
|
+
|
271
|
+
# Fetch and
|
272
|
+
fetch_rows(prepare_operation_handle(handles), :first, max_rows)
|
273
|
+
end
|
274
|
+
|
275
|
+
# Performs a query on the server, fetches the results in batches of *batch_size* rows
|
276
|
+
# and yields the result batches to a given block as arrays of rows.
|
277
|
+
def async_fetch_in_batch(handles, batch_size = 1000, &block)
|
278
|
+
raise "No block given for the batch fetch request!" unless block_given?
|
279
|
+
# Can't get data from an unfinished query
|
280
|
+
unless async_is_complete?(handles)
|
281
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
282
|
+
end
|
283
|
+
|
284
|
+
# Now let's iterate over the results
|
285
|
+
loop do
|
286
|
+
rows = fetch_rows(prepare_operation_handle(handles), :next, batch_size)
|
287
|
+
break if rows.empty?
|
288
|
+
yield rows
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def async_close_session(handles)
|
293
|
+
validate_handles!(handles)
|
294
|
+
@client.CloseSession(Hive2::Thrift::TCloseSessionReq.new( sessionHandle: handles[:session] ))
|
295
|
+
end
|
296
|
+
|
297
|
+
# Pull rows from the query result
|
298
|
+
def fetch_rows(op_handle, orientation = :first, max_rows = 1000)
|
299
|
+
fetch_req = prepare_fetch_results(op_handle, orientation, max_rows)
|
300
|
+
fetch_results = @client.FetchResults(fetch_req)
|
301
|
+
raise_error_if_failed!(fetch_results)
|
302
|
+
rows = fetch_results.results.rows
|
303
|
+
TCLIResultSet.new(rows, TCLISchemaDefinition.new(get_schema_for(op_handle), rows.first))
|
304
|
+
end
|
305
|
+
|
306
|
+
# Performs a explain on the supplied query on the server, returns it as a ExplainResult.
|
307
|
+
# (Only works on 0.12 if you have this patch - https://issues.apache.org/jira/browse/HIVE-5492)
|
308
|
+
def explain(query)
|
309
|
+
rows = []
|
310
|
+
fetch_in_batch("EXPLAIN " + query) do |batch|
|
311
|
+
rows << batch.map { |b| b[:Explain] }
|
312
|
+
end
|
313
|
+
ExplainResult.new(rows.flatten)
|
314
|
+
end
|
315
|
+
|
316
|
+
# Performs a query on the server, fetches up to *max_rows* rows and returns them as an array.
|
317
|
+
def fetch(query, max_rows = 100)
|
318
|
+
# Execute the query and check the result
|
319
|
+
exec_result = execute(query)
|
320
|
+
raise_error_if_failed!(exec_result)
|
321
|
+
|
322
|
+
# Get search operation handle to fetch the results
|
323
|
+
op_handle = exec_result.operationHandle
|
324
|
+
|
325
|
+
# Fetch the rows
|
326
|
+
fetch_rows(op_handle, :first, max_rows)
|
327
|
+
end
|
328
|
+
|
329
|
+
# Performs a query on the server, fetches the results in batches of *batch_size* rows
|
330
|
+
# and yields the result batches to a given block as arrays of rows.
|
331
|
+
def fetch_in_batch(query, batch_size = 1000, &block)
|
332
|
+
raise "No block given for the batch fetch request!" unless block_given?
|
333
|
+
|
334
|
+
# Execute the query and check the result
|
335
|
+
exec_result = execute(query)
|
336
|
+
raise_error_if_failed!(exec_result)
|
337
|
+
|
338
|
+
# Get search operation handle to fetch the results
|
339
|
+
op_handle = exec_result.operationHandle
|
340
|
+
|
341
|
+
# Prepare fetch results request
|
342
|
+
fetch_req = prepare_fetch_results(op_handle, :next, batch_size)
|
343
|
+
|
344
|
+
# Now let's iterate over the results
|
345
|
+
loop do
|
346
|
+
rows = fetch_rows(op_handle, :next, batch_size)
|
347
|
+
break if rows.empty?
|
348
|
+
yield rows
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
def create_table(schema)
|
353
|
+
execute(schema.create_table_statement)
|
354
|
+
end
|
355
|
+
|
356
|
+
def drop_table(name)
|
357
|
+
name = name.name if name.is_a?(TableSchema)
|
358
|
+
execute("DROP TABLE `#{name}`")
|
359
|
+
end
|
360
|
+
|
361
|
+
def replace_columns(schema)
|
362
|
+
execute(schema.replace_columns_statement)
|
363
|
+
end
|
364
|
+
|
365
|
+
def add_columns(schema)
|
366
|
+
execute(schema.add_columns_statement)
|
367
|
+
end
|
368
|
+
|
369
|
+
def method_missing(meth, *args)
|
370
|
+
client.send(meth, *args)
|
371
|
+
end
|
372
|
+
|
373
|
+
private
|
374
|
+
|
375
|
+
def prepare_open_session(client_protocol)
|
376
|
+
req = ::Hive2::Thrift::TOpenSessionReq.new( @options[:sasl_params].nil? ? [] : @options[:sasl_params] )
|
377
|
+
req.client_protocol = client_protocol
|
378
|
+
req
|
379
|
+
end
|
380
|
+
|
381
|
+
def prepare_close_session
|
382
|
+
::Hive2::Thrift::TCloseSessionReq.new( sessionHandle: self.session )
|
383
|
+
end
|
384
|
+
|
385
|
+
def prepare_execute_statement(query)
|
386
|
+
::Hive2::Thrift::TExecuteStatementReq.new( sessionHandle: self.session, statement: query.to_s, confOverlay: {} )
|
387
|
+
end
|
388
|
+
|
389
|
+
def prepare_fetch_results(handle, orientation=:first, rows=100)
|
390
|
+
orientation_value = "FETCH_#{orientation.to_s.upcase}"
|
391
|
+
valid_orientations = ::Hive2::Thrift::TFetchOrientation::VALUE_MAP.values
|
392
|
+
unless valid_orientations.include?(orientation_value)
|
393
|
+
raise ArgumentError, "Invalid orientation: #{orientation.inspect}"
|
394
|
+
end
|
395
|
+
orientation_const = eval("::Hive2::Thrift::TFetchOrientation::#{orientation_value}")
|
396
|
+
::Hive2::Thrift::TFetchResultsReq.new(
|
397
|
+
operationHandle: handle,
|
398
|
+
orientation: orientation_const,
|
399
|
+
maxRows: rows
|
400
|
+
)
|
401
|
+
end
|
402
|
+
|
403
|
+
def prepare_operation_handle(handles)
|
404
|
+
validate_handles!(handles)
|
405
|
+
Hive2::Thrift::TOperationHandle.new(
|
406
|
+
operationId: Hive2::Thrift::THandleIdentifier.new(guid: handles[:guid], secret: handles[:secret]),
|
407
|
+
operationType: Hive2::Thrift::TOperationType::EXECUTE_STATEMENT,
|
408
|
+
hasResultSet: false
|
409
|
+
)
|
410
|
+
end
|
411
|
+
|
412
|
+
def prepare_cancel_request(handles)
|
413
|
+
Hive2::Thrift::TCancelOperationReq.new(
|
414
|
+
operationHandle: prepare_operation_handle(handles)
|
415
|
+
)
|
416
|
+
end
|
417
|
+
|
418
|
+
def validate_handles!(handles)
|
419
|
+
unless handles.has_key?(:guid) and handles.has_key?(:secret) and handles.has_key?(:session)
|
420
|
+
raise "Invalid handles hash: #{handles.inspect}"
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
def get_schema_for(handle)
|
425
|
+
req = ::Hive2::Thrift::TGetResultSetMetadataReq.new( operationHandle: handle )
|
426
|
+
metadata = client.GetResultSetMetadata( req )
|
427
|
+
metadata.schema
|
428
|
+
end
|
429
|
+
|
430
|
+
# Raises an exception if given operation result is a failure
|
431
|
+
def raise_error_if_failed!(result)
|
432
|
+
return if result.status.statusCode == 0
|
433
|
+
error_message = result.status.errorMessage || 'Execution failed!'
|
434
|
+
raise RBHive::TCLIConnectionError.new(error_message)
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
class TCLIConnectionError < StandardError; end
|
439
|
+
end
|