rbhive 1.0.0.pre → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/README.md +12 -3
- data/lib/rbhive/result_set.rb +11 -11
- data/lib/rbhive/schema_definition.rb +4 -2
- data/lib/rbhive/t_c_l_i_connection.rb +19 -12
- data/lib/rbhive/t_c_l_i_schema_definition.rb +4 -3
- data/lib/rbhive/table_schema.rb +6 -1
- data/lib/rbhive/version.rb +1 -1
- data/rbhive.gemspec +13 -13
- metadata +20 -34
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 224509bb79d39f4f06ec081b96e96a4cbb3be341
|
4
|
+
data.tar.gz: f4df37b710b762c9f49238ba34e281773e7e7976
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 26a94f954cf6c76bbb6ed0ad345d56798beef6a933a756bba7778c1be5d66969606237052c0aed44978ecb073ad46836af0f830529220baec18f8d1f20e64fc8
|
7
|
+
data.tar.gz: ba9b396a9fcf388712cf5deefe2f9a16120fffc117b3e53631702f4a1d7df872d8bd209b8e0b1cedd485b6af364854b0a25577cd06ee8718fbee7363110eae6a
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
Versioning prior to 0.5.3 was not tracked, so this changelog only lists changes introduced after 0.5.3.
|
4
4
|
|
5
|
+
## 1.0.0
|
6
|
+
|
7
|
+
* Asynchronous execution with Hiveserver2
|
8
|
+
* Misc bugfixes
|
9
|
+
|
10
|
+
|
5
11
|
## 0.6.0
|
6
12
|
|
7
13
|
0.6.0 introduces one backwards-incompatible change:
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# RBHive - A Ruby Thrift client for Apache Hive
|
2
2
|
|
3
|
+
[![Code Climate](https://codeclimate.com/github/forward3d/rbhive/badges/gpa.svg)](https://codeclimate.com/github/forward3d/rbhive)
|
4
|
+
|
3
5
|
RBHive is a simple Ruby gem to communicate with the [Apache Hive](http://hive.apache.org)
|
4
6
|
Thrift servers.
|
5
7
|
|
@@ -97,17 +99,23 @@ Connecting with the defaults:
|
|
97
99
|
connection.fetch('SHOW TABLES')
|
98
100
|
end
|
99
101
|
|
102
|
+
Connecting with a Logger:
|
103
|
+
|
104
|
+
RBHive.tcli_connect('hive.server.address', 10_000, { logger: Logger.new(STDOUT) }) do |connection|
|
105
|
+
connection.fetch('SHOW TABLES')
|
106
|
+
end
|
107
|
+
|
100
108
|
Connecting with a specific Hive version (0.12 in this case):
|
101
109
|
|
102
|
-
RBHive.tcli_connect('hive.server.address', 10_000, {:
|
110
|
+
RBHive.tcli_connect('hive.server.address', 10_000, { hive_version: 12 }) do |connection|
|
103
111
|
connection.fetch('SHOW TABLES')
|
104
112
|
end
|
105
113
|
|
106
114
|
Connecting with a specific Hive version (0.12) and using the `:http` transport:
|
107
115
|
|
108
|
-
RBHive.tcli_connect('hive.server.address', 10_000, {:
|
116
|
+
RBHive.tcli_connect('hive.server.address', 10_000, { hive_version: 12, transport: :http }) do |connection|
|
109
117
|
connection.fetch('SHOW TABLES')
|
110
|
-
end
|
118
|
+
end
|
111
119
|
|
112
120
|
We have not tested the SASL connection, as we don't run SASL; pull requests and testing are welcomed.
|
113
121
|
|
@@ -144,6 +152,7 @@ In addition, you can explicitly set the Thrift protocol version according to thi
|
|
144
152
|
In versions of Hive later than 0.12, the Thrift server supports asynchronous execution.
|
145
153
|
|
146
154
|
The high-level view of using this feature is as follows:
|
155
|
+
|
147
156
|
1. Submit your query using `async_execute(query)`. This function returns a hash
|
148
157
|
with the following keys: `:guid`, `:secret`, and `:session`. You don't need to
|
149
158
|
care about the internals of this hash - all methods that interact with an async
|
data/lib/rbhive/result_set.rb
CHANGED
@@ -4,34 +4,34 @@ module RBHive
|
|
4
4
|
@schema = schema
|
5
5
|
super(rows.map {|r| @schema.coerce_row(r) })
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
def column_names
|
9
9
|
@schema.column_names
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def column_type_map
|
13
13
|
@schema.column_type_map
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def to_csv(out_file=nil)
|
17
|
-
|
17
|
+
to_separated_output(",", out_file)
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def to_tsv(out_file=nil)
|
21
|
-
|
21
|
+
to_separated_output("\t", out_file)
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def as_arrays
|
25
25
|
@as_arrays ||= self.map{ |r| @schema.coerce_row_to_array(r) }
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
private
|
29
|
-
|
30
|
-
def
|
29
|
+
|
30
|
+
def to_separated_output(sep, out_file)
|
31
31
|
rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
|
32
32
|
sv = rows.join("\n")
|
33
33
|
return sv if out_file.nil?
|
34
34
|
File.open(out_file, 'w+') { |f| f << sv }
|
35
35
|
end
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
@@ -4,6 +4,8 @@ module RBHive
|
|
4
4
|
class SchemaDefinition
|
5
5
|
attr_reader :schema
|
6
6
|
|
7
|
+
NAN = Float::NAN rescue 0.0/0.0
|
8
|
+
INFINITY = Float::INFINITY rescue 1.0/0.0
|
7
9
|
TYPES = {
|
8
10
|
:boolean => :to_s,
|
9
11
|
:string => :to_s,
|
@@ -63,8 +65,8 @@ module RBHive
|
|
63
65
|
|
64
66
|
def coerce_column(column_name, value)
|
65
67
|
type = column_type_map[column_name]
|
66
|
-
return
|
67
|
-
return
|
68
|
+
return INFINITY if (type != :string && value == "Infinity")
|
69
|
+
return NAN if (type != :string && value == "NaN")
|
68
70
|
return coerce_complex_value(value) if type.to_s =~ /^array/
|
69
71
|
conversion_method = TYPES[type]
|
70
72
|
conversion_method ? value.send(conversion_method) : value
|
@@ -46,9 +46,10 @@ module RBHive
|
|
46
46
|
:PROTOCOL_V6 => 5,
|
47
47
|
:PROTOCOL_V7 => 6
|
48
48
|
}
|
49
|
-
|
50
|
-
def tcli_connect(server, port=10_000, options)
|
51
|
-
|
49
|
+
|
50
|
+
def tcli_connect(server, port = 10_000, options)
|
51
|
+
logger = options.key?(:logger) ? options.delete(:logger) : StdOutLogger.new
|
52
|
+
connection = RBHive::TCLIConnection.new(server, port, options, logger)
|
52
53
|
ret = nil
|
53
54
|
begin
|
54
55
|
connection.open
|
@@ -65,7 +66,7 @@ module RBHive
|
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
68
|
-
|
69
|
+
ret
|
69
70
|
end
|
70
71
|
module_function :tcli_connect
|
71
72
|
|
@@ -80,7 +81,7 @@ module RBHive
|
|
80
81
|
class TCLIConnection
|
81
82
|
attr_reader :client
|
82
83
|
|
83
|
-
def initialize(server, port=10_000, options={}, logger=StdOutLogger.new)
|
84
|
+
def initialize(server, port = 10_000, options = {}, logger = StdOutLogger.new)
|
84
85
|
options ||= {} # backwards compatibility
|
85
86
|
raise "'options' parameter must be a hash" unless options.is_a?(Hash)
|
86
87
|
|
@@ -191,14 +192,16 @@ module RBHive
|
|
191
192
|
# Async execute
|
192
193
|
def async_execute(query)
|
193
194
|
@logger.info("Executing query asynchronously: #{query}")
|
194
|
-
|
195
|
+
exec_result = @client.ExecuteStatement(
|
195
196
|
Hive2::Thrift::TExecuteStatementReq.new(
|
196
197
|
sessionHandle: @session.sessionHandle,
|
197
198
|
statement: query,
|
198
199
|
runAsync: true
|
199
200
|
)
|
200
|
-
)
|
201
|
-
|
201
|
+
)
|
202
|
+
raise_error_if_failed!(exec_result)
|
203
|
+
op_handle = exec_result.operationHandle
|
204
|
+
|
202
205
|
# Return handles to get hold of this query / session again
|
203
206
|
{
|
204
207
|
session: @session.sessionHandle,
|
@@ -235,7 +238,7 @@ module RBHive
|
|
235
238
|
response = @client.GetOperationStatus(
|
236
239
|
Hive2::Thrift::TGetOperationStatusReq.new(operationHandle: prepare_operation_handle(handles))
|
237
240
|
)
|
238
|
-
|
241
|
+
|
239
242
|
case response.operationState
|
240
243
|
when Hive2::Thrift::TOperationState::FINISHED_STATE
|
241
244
|
return :finished
|
@@ -253,6 +256,8 @@ module RBHive
|
|
253
256
|
return :unknown
|
254
257
|
when Hive2::Thrift::TOperationState::PENDING_STATE
|
255
258
|
return :pending
|
259
|
+
when nil
|
260
|
+
raise "No operation state found for handles - has the session been closed?"
|
256
261
|
else
|
257
262
|
return :state_not_in_protocol
|
258
263
|
end
|
@@ -262,7 +267,7 @@ module RBHive
|
|
262
267
|
def async_fetch(handles, max_rows = 100)
|
263
268
|
# Can't get data from an unfinished query
|
264
269
|
unless async_is_complete?(handles)
|
265
|
-
raise "Can't perform fetch on a query in state: #{async_state(handles
|
270
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
266
271
|
end
|
267
272
|
|
268
273
|
# Fetch and
|
@@ -275,7 +280,7 @@ module RBHive
|
|
275
280
|
raise "No block given for the batch fetch request!" unless block_given?
|
276
281
|
# Can't get data from an unfinished query
|
277
282
|
unless async_is_complete?(handles)
|
278
|
-
raise "Can't perform fetch on a query in state: #{async_state(handles
|
283
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
279
284
|
end
|
280
285
|
|
281
286
|
# Now let's iterate over the results
|
@@ -428,7 +433,9 @@ module RBHive
|
|
428
433
|
def raise_error_if_failed!(result)
|
429
434
|
return if result.status.statusCode == 0
|
430
435
|
error_message = result.status.errorMessage || 'Execution failed!'
|
431
|
-
raise error_message
|
436
|
+
raise RBHive::TCLIConnectionError.new(error_message)
|
432
437
|
end
|
433
438
|
end
|
439
|
+
|
440
|
+
class TCLIConnectionError < StandardError; end
|
434
441
|
end
|
@@ -4,10 +4,11 @@ module RBHive
|
|
4
4
|
class TCLISchemaDefinition
|
5
5
|
attr_reader :schema
|
6
6
|
|
7
|
+
NAN = Float::NAN rescue 0.0/0.0
|
8
|
+
INFINITY = Float::INFINITY rescue 1.0/0.0
|
7
9
|
TYPES = {
|
8
10
|
:boolean => :to_s,
|
9
11
|
:string => :to_s,
|
10
|
-
:bigint => :to_i,
|
11
12
|
:float => :to_f,
|
12
13
|
:double => :to_f,
|
13
14
|
:int => :to_i,
|
@@ -65,8 +66,8 @@ module RBHive
|
|
65
66
|
|
66
67
|
def coerce_column(column_name, value)
|
67
68
|
type = column_type_map[column_name]
|
68
|
-
return
|
69
|
-
return
|
69
|
+
return INFINITY if (type != :string && value == "Infinity")
|
70
|
+
return NAN if (type != :string && value == "NaN")
|
70
71
|
return coerce_complex_value(value) if type.to_s =~ /^array/
|
71
72
|
conversion_method = TYPES[type]
|
72
73
|
conversion_method ? value.send(conversion_method) : value
|
data/lib/rbhive/table_schema.rb
CHANGED
@@ -8,6 +8,7 @@ module RBHive
|
|
8
8
|
@field_sep = options[:field_sep] || "\t"
|
9
9
|
@line_sep = options[:line_sep] || "\n"
|
10
10
|
@collection_sep = options[:collection_sep] || "|"
|
11
|
+
@stored_as = options[:stored_as] || :textfile
|
11
12
|
@columns = []
|
12
13
|
@partitions = []
|
13
14
|
@serde_name = nil
|
@@ -31,10 +32,14 @@ module RBHive
|
|
31
32
|
def create_table_statement()
|
32
33
|
%[CREATE #{external}TABLE #{table_statement}
|
33
34
|
ROW FORMAT #{row_format_statement}
|
34
|
-
STORED AS
|
35
|
+
STORED AS #{stored_as}
|
35
36
|
#{location}]
|
36
37
|
end
|
37
38
|
|
39
|
+
def stored_as
|
40
|
+
@stored_as.to_s.upcase
|
41
|
+
end
|
42
|
+
|
38
43
|
def row_format_statement
|
39
44
|
if @serde_name
|
40
45
|
serde_statement
|
data/lib/rbhive/version.rb
CHANGED
data/rbhive.gemspec
CHANGED
@@ -4,24 +4,24 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'rbhive/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
7
|
+
spec.name = 'rbhive'
|
8
8
|
spec.version = RBHive::VERSION
|
9
|
-
spec.authors =
|
10
|
-
spec.description =
|
11
|
-
spec.summary =
|
12
|
-
spec.email = [
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
15
|
-
|
9
|
+
spec.authors = %w(Forward3D KolobocK)
|
10
|
+
spec.description = 'Simple gem for executing Hive queries and collecting the results'
|
11
|
+
spec.summary = 'Simple gem for executing Hive queries'
|
12
|
+
spec.email = ['developers@forward3d.com', 'kolobock@gmail.com']
|
13
|
+
spec.homepage = 'http://github.com/forward3d/rbhive'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
16
|
spec.files = `git ls-files`.split($/)
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
-
spec.require_paths = [
|
20
|
-
|
21
|
-
spec.add_dependency('thrift', '
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_dependency('thrift', '~> 0.9')
|
22
22
|
spec.add_dependency('json')
|
23
23
|
|
24
|
-
spec.add_development_dependency
|
25
|
-
spec.add_development_dependency
|
24
|
+
spec.add_development_dependency 'rake'
|
25
|
+
spec.add_development_dependency 'bundler', '>= 1.3'
|
26
26
|
|
27
27
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbhive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0
|
5
|
-
prerelease: 6
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Forward3D
|
@@ -10,82 +9,73 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2018-12-17 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: thrift
|
17
16
|
requirement: !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
17
|
requirements:
|
20
|
-
- -
|
18
|
+
- - "~>"
|
21
19
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.9
|
20
|
+
version: '0.9'
|
23
21
|
type: :runtime
|
24
22
|
prerelease: false
|
25
23
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
27
24
|
requirements:
|
28
|
-
- -
|
25
|
+
- - "~>"
|
29
26
|
- !ruby/object:Gem::Version
|
30
|
-
version: 0.9
|
27
|
+
version: '0.9'
|
31
28
|
- !ruby/object:Gem::Dependency
|
32
29
|
name: json
|
33
30
|
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
31
|
requirements:
|
36
|
-
- -
|
32
|
+
- - ">="
|
37
33
|
- !ruby/object:Gem::Version
|
38
34
|
version: '0'
|
39
35
|
type: :runtime
|
40
36
|
prerelease: false
|
41
37
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
38
|
requirements:
|
44
|
-
- -
|
39
|
+
- - ">="
|
45
40
|
- !ruby/object:Gem::Version
|
46
41
|
version: '0'
|
47
42
|
- !ruby/object:Gem::Dependency
|
48
43
|
name: rake
|
49
44
|
requirement: !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
45
|
requirements:
|
52
|
-
- -
|
46
|
+
- - ">="
|
53
47
|
- !ruby/object:Gem::Version
|
54
48
|
version: '0'
|
55
49
|
type: :development
|
56
50
|
prerelease: false
|
57
51
|
version_requirements: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
52
|
requirements:
|
60
|
-
- -
|
53
|
+
- - ">="
|
61
54
|
- !ruby/object:Gem::Version
|
62
55
|
version: '0'
|
63
56
|
- !ruby/object:Gem::Dependency
|
64
57
|
name: bundler
|
65
58
|
requirement: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
59
|
requirements:
|
68
|
-
- -
|
60
|
+
- - ">="
|
69
61
|
- !ruby/object:Gem::Version
|
70
62
|
version: '1.3'
|
71
63
|
type: :development
|
72
64
|
prerelease: false
|
73
65
|
version_requirements: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
66
|
requirements:
|
76
|
-
- -
|
67
|
+
- - ">="
|
77
68
|
- !ruby/object:Gem::Version
|
78
69
|
version: '1.3'
|
79
70
|
description: Simple gem for executing Hive queries and collecting the results
|
80
71
|
email:
|
81
|
-
- andy@forward.co.uk
|
82
|
-
- kolobock@gmail.com
|
83
72
|
- developers@forward3d.com
|
73
|
+
- kolobock@gmail.com
|
84
74
|
executables: []
|
85
75
|
extensions: []
|
86
76
|
extra_rdoc_files: []
|
87
77
|
files:
|
88
|
-
- .gitignore
|
78
|
+
- ".gitignore"
|
89
79
|
- CHANGELOG.md
|
90
80
|
- Gemfile
|
91
81
|
- LICENSE
|
@@ -122,29 +112,25 @@ files:
|
|
122
112
|
homepage: http://github.com/forward3d/rbhive
|
123
113
|
licenses:
|
124
114
|
- MIT
|
115
|
+
metadata: {}
|
125
116
|
post_install_message:
|
126
117
|
rdoc_options: []
|
127
118
|
require_paths:
|
128
119
|
- lib
|
129
120
|
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
-
none: false
|
131
121
|
requirements:
|
132
|
-
- -
|
122
|
+
- - ">="
|
133
123
|
- !ruby/object:Gem::Version
|
134
124
|
version: '0'
|
135
|
-
segments:
|
136
|
-
- 0
|
137
|
-
hash: 2597338757284379755
|
138
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
-
none: false
|
140
126
|
requirements:
|
141
|
-
- -
|
127
|
+
- - ">="
|
142
128
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
129
|
+
version: '0'
|
144
130
|
requirements: []
|
145
131
|
rubyforge_project:
|
146
|
-
rubygems_version:
|
132
|
+
rubygems_version: 2.6.11
|
147
133
|
signing_key:
|
148
|
-
specification_version:
|
134
|
+
specification_version: 4
|
149
135
|
summary: Simple gem for executing Hive queries
|
150
136
|
test_files: []
|