rbhive 1.0.0.pre → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/README.md +12 -3
- data/lib/rbhive/result_set.rb +11 -11
- data/lib/rbhive/schema_definition.rb +4 -2
- data/lib/rbhive/t_c_l_i_connection.rb +19 -12
- data/lib/rbhive/t_c_l_i_schema_definition.rb +4 -3
- data/lib/rbhive/table_schema.rb +6 -1
- data/lib/rbhive/version.rb +1 -1
- data/rbhive.gemspec +13 -13
- metadata +20 -34
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 224509bb79d39f4f06ec081b96e96a4cbb3be341
|
4
|
+
data.tar.gz: f4df37b710b762c9f49238ba34e281773e7e7976
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 26a94f954cf6c76bbb6ed0ad345d56798beef6a933a756bba7778c1be5d66969606237052c0aed44978ecb073ad46836af0f830529220baec18f8d1f20e64fc8
|
7
|
+
data.tar.gz: ba9b396a9fcf388712cf5deefe2f9a16120fffc117b3e53631702f4a1d7df872d8bd209b8e0b1cedd485b6af364854b0a25577cd06ee8718fbee7363110eae6a
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
Versioning prior to 0.5.3 was not tracked, so this changelog only lists changes introduced after 0.5.3.
|
4
4
|
|
5
|
+
## 1.0.0
|
6
|
+
|
7
|
+
* Asynchronous execution with Hiveserver2
|
8
|
+
* Misc bugfixes
|
9
|
+
|
10
|
+
|
5
11
|
## 0.6.0
|
6
12
|
|
7
13
|
0.6.0 introduces one backwards-incompatible change:
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# RBHive - A Ruby Thrift client for Apache Hive
|
2
2
|
|
3
|
+
[](https://codeclimate.com/github/forward3d/rbhive)
|
4
|
+
|
3
5
|
RBHive is a simple Ruby gem to communicate with the [Apache Hive](http://hive.apache.org)
|
4
6
|
Thrift servers.
|
5
7
|
|
@@ -97,17 +99,23 @@ Connecting with the defaults:
|
|
97
99
|
connection.fetch('SHOW TABLES')
|
98
100
|
end
|
99
101
|
|
102
|
+
Connecting with a Logger:
|
103
|
+
|
104
|
+
RBHive.tcli_connect('hive.server.address', 10_000, { logger: Logger.new(STDOUT) }) do |connection|
|
105
|
+
connection.fetch('SHOW TABLES')
|
106
|
+
end
|
107
|
+
|
100
108
|
Connecting with a specific Hive version (0.12 in this case):
|
101
109
|
|
102
|
-
RBHive.tcli_connect('hive.server.address', 10_000, {:
|
110
|
+
RBHive.tcli_connect('hive.server.address', 10_000, { hive_version: 12 }) do |connection|
|
103
111
|
connection.fetch('SHOW TABLES')
|
104
112
|
end
|
105
113
|
|
106
114
|
Connecting with a specific Hive version (0.12) and using the `:http` transport:
|
107
115
|
|
108
|
-
RBHive.tcli_connect('hive.server.address', 10_000, {:
|
116
|
+
RBHive.tcli_connect('hive.server.address', 10_000, { hive_version: 12, transport: :http }) do |connection|
|
109
117
|
connection.fetch('SHOW TABLES')
|
110
|
-
end
|
118
|
+
end
|
111
119
|
|
112
120
|
We have not tested the SASL connection, as we don't run SASL; pull requests and testing are welcomed.
|
113
121
|
|
@@ -144,6 +152,7 @@ In addition, you can explicitly set the Thrift protocol version according to thi
|
|
144
152
|
In versions of Hive later than 0.12, the Thrift server supports asynchronous execution.
|
145
153
|
|
146
154
|
The high-level view of using this feature is as follows:
|
155
|
+
|
147
156
|
1. Submit your query using `async_execute(query)`. This function returns a hash
|
148
157
|
with the following keys: `:guid`, `:secret`, and `:session`. You don't need to
|
149
158
|
care about the internals of this hash - all methods that interact with an async
|
data/lib/rbhive/result_set.rb
CHANGED
@@ -4,34 +4,34 @@ module RBHive
|
|
4
4
|
@schema = schema
|
5
5
|
super(rows.map {|r| @schema.coerce_row(r) })
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
def column_names
|
9
9
|
@schema.column_names
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def column_type_map
|
13
13
|
@schema.column_type_map
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def to_csv(out_file=nil)
|
17
|
-
|
17
|
+
to_separated_output(",", out_file)
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def to_tsv(out_file=nil)
|
21
|
-
|
21
|
+
to_separated_output("\t", out_file)
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def as_arrays
|
25
25
|
@as_arrays ||= self.map{ |r| @schema.coerce_row_to_array(r) }
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
private
|
29
|
-
|
30
|
-
def
|
29
|
+
|
30
|
+
def to_separated_output(sep, out_file)
|
31
31
|
rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
|
32
32
|
sv = rows.join("\n")
|
33
33
|
return sv if out_file.nil?
|
34
34
|
File.open(out_file, 'w+') { |f| f << sv }
|
35
35
|
end
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
@@ -4,6 +4,8 @@ module RBHive
|
|
4
4
|
class SchemaDefinition
|
5
5
|
attr_reader :schema
|
6
6
|
|
7
|
+
NAN = Float::NAN rescue 0.0/0.0
|
8
|
+
INFINITY = Float::INFINITY rescue 1.0/0.0
|
7
9
|
TYPES = {
|
8
10
|
:boolean => :to_s,
|
9
11
|
:string => :to_s,
|
@@ -63,8 +65,8 @@ module RBHive
|
|
63
65
|
|
64
66
|
def coerce_column(column_name, value)
|
65
67
|
type = column_type_map[column_name]
|
66
|
-
return
|
67
|
-
return
|
68
|
+
return INFINITY if (type != :string && value == "Infinity")
|
69
|
+
return NAN if (type != :string && value == "NaN")
|
68
70
|
return coerce_complex_value(value) if type.to_s =~ /^array/
|
69
71
|
conversion_method = TYPES[type]
|
70
72
|
conversion_method ? value.send(conversion_method) : value
|
@@ -46,9 +46,10 @@ module RBHive
|
|
46
46
|
:PROTOCOL_V6 => 5,
|
47
47
|
:PROTOCOL_V7 => 6
|
48
48
|
}
|
49
|
-
|
50
|
-
def tcli_connect(server, port=10_000, options)
|
51
|
-
|
49
|
+
|
50
|
+
def tcli_connect(server, port = 10_000, options)
|
51
|
+
logger = options.key?(:logger) ? options.delete(:logger) : StdOutLogger.new
|
52
|
+
connection = RBHive::TCLIConnection.new(server, port, options, logger)
|
52
53
|
ret = nil
|
53
54
|
begin
|
54
55
|
connection.open
|
@@ -65,7 +66,7 @@ module RBHive
|
|
65
66
|
end
|
66
67
|
end
|
67
68
|
|
68
|
-
|
69
|
+
ret
|
69
70
|
end
|
70
71
|
module_function :tcli_connect
|
71
72
|
|
@@ -80,7 +81,7 @@ module RBHive
|
|
80
81
|
class TCLIConnection
|
81
82
|
attr_reader :client
|
82
83
|
|
83
|
-
def initialize(server, port=10_000, options={}, logger=StdOutLogger.new)
|
84
|
+
def initialize(server, port = 10_000, options = {}, logger = StdOutLogger.new)
|
84
85
|
options ||= {} # backwards compatibility
|
85
86
|
raise "'options' parameter must be a hash" unless options.is_a?(Hash)
|
86
87
|
|
@@ -191,14 +192,16 @@ module RBHive
|
|
191
192
|
# Async execute
|
192
193
|
def async_execute(query)
|
193
194
|
@logger.info("Executing query asynchronously: #{query}")
|
194
|
-
|
195
|
+
exec_result = @client.ExecuteStatement(
|
195
196
|
Hive2::Thrift::TExecuteStatementReq.new(
|
196
197
|
sessionHandle: @session.sessionHandle,
|
197
198
|
statement: query,
|
198
199
|
runAsync: true
|
199
200
|
)
|
200
|
-
)
|
201
|
-
|
201
|
+
)
|
202
|
+
raise_error_if_failed!(exec_result)
|
203
|
+
op_handle = exec_result.operationHandle
|
204
|
+
|
202
205
|
# Return handles to get hold of this query / session again
|
203
206
|
{
|
204
207
|
session: @session.sessionHandle,
|
@@ -235,7 +238,7 @@ module RBHive
|
|
235
238
|
response = @client.GetOperationStatus(
|
236
239
|
Hive2::Thrift::TGetOperationStatusReq.new(operationHandle: prepare_operation_handle(handles))
|
237
240
|
)
|
238
|
-
|
241
|
+
|
239
242
|
case response.operationState
|
240
243
|
when Hive2::Thrift::TOperationState::FINISHED_STATE
|
241
244
|
return :finished
|
@@ -253,6 +256,8 @@ module RBHive
|
|
253
256
|
return :unknown
|
254
257
|
when Hive2::Thrift::TOperationState::PENDING_STATE
|
255
258
|
return :pending
|
259
|
+
when nil
|
260
|
+
raise "No operation state found for handles - has the session been closed?"
|
256
261
|
else
|
257
262
|
return :state_not_in_protocol
|
258
263
|
end
|
@@ -262,7 +267,7 @@ module RBHive
|
|
262
267
|
def async_fetch(handles, max_rows = 100)
|
263
268
|
# Can't get data from an unfinished query
|
264
269
|
unless async_is_complete?(handles)
|
265
|
-
raise "Can't perform fetch on a query in state: #{async_state(handles
|
270
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
266
271
|
end
|
267
272
|
|
268
273
|
# Fetch and
|
@@ -275,7 +280,7 @@ module RBHive
|
|
275
280
|
raise "No block given for the batch fetch request!" unless block_given?
|
276
281
|
# Can't get data from an unfinished query
|
277
282
|
unless async_is_complete?(handles)
|
278
|
-
raise "Can't perform fetch on a query in state: #{async_state(handles
|
283
|
+
raise "Can't perform fetch on a query in state: #{async_state(handles)}"
|
279
284
|
end
|
280
285
|
|
281
286
|
# Now let's iterate over the results
|
@@ -428,7 +433,9 @@ module RBHive
|
|
428
433
|
def raise_error_if_failed!(result)
|
429
434
|
return if result.status.statusCode == 0
|
430
435
|
error_message = result.status.errorMessage || 'Execution failed!'
|
431
|
-
raise error_message
|
436
|
+
raise RBHive::TCLIConnectionError.new(error_message)
|
432
437
|
end
|
433
438
|
end
|
439
|
+
|
440
|
+
class TCLIConnectionError < StandardError; end
|
434
441
|
end
|
@@ -4,10 +4,11 @@ module RBHive
|
|
4
4
|
class TCLISchemaDefinition
|
5
5
|
attr_reader :schema
|
6
6
|
|
7
|
+
NAN = Float::NAN rescue 0.0/0.0
|
8
|
+
INFINITY = Float::INFINITY rescue 1.0/0.0
|
7
9
|
TYPES = {
|
8
10
|
:boolean => :to_s,
|
9
11
|
:string => :to_s,
|
10
|
-
:bigint => :to_i,
|
11
12
|
:float => :to_f,
|
12
13
|
:double => :to_f,
|
13
14
|
:int => :to_i,
|
@@ -65,8 +66,8 @@ module RBHive
|
|
65
66
|
|
66
67
|
def coerce_column(column_name, value)
|
67
68
|
type = column_type_map[column_name]
|
68
|
-
return
|
69
|
-
return
|
69
|
+
return INFINITY if (type != :string && value == "Infinity")
|
70
|
+
return NAN if (type != :string && value == "NaN")
|
70
71
|
return coerce_complex_value(value) if type.to_s =~ /^array/
|
71
72
|
conversion_method = TYPES[type]
|
72
73
|
conversion_method ? value.send(conversion_method) : value
|
data/lib/rbhive/table_schema.rb
CHANGED
@@ -8,6 +8,7 @@ module RBHive
|
|
8
8
|
@field_sep = options[:field_sep] || "\t"
|
9
9
|
@line_sep = options[:line_sep] || "\n"
|
10
10
|
@collection_sep = options[:collection_sep] || "|"
|
11
|
+
@stored_as = options[:stored_as] || :textfile
|
11
12
|
@columns = []
|
12
13
|
@partitions = []
|
13
14
|
@serde_name = nil
|
@@ -31,10 +32,14 @@ module RBHive
|
|
31
32
|
def create_table_statement()
|
32
33
|
%[CREATE #{external}TABLE #{table_statement}
|
33
34
|
ROW FORMAT #{row_format_statement}
|
34
|
-
STORED AS
|
35
|
+
STORED AS #{stored_as}
|
35
36
|
#{location}]
|
36
37
|
end
|
37
38
|
|
39
|
+
def stored_as
|
40
|
+
@stored_as.to_s.upcase
|
41
|
+
end
|
42
|
+
|
38
43
|
def row_format_statement
|
39
44
|
if @serde_name
|
40
45
|
serde_statement
|
data/lib/rbhive/version.rb
CHANGED
data/rbhive.gemspec
CHANGED
@@ -4,24 +4,24 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'rbhive/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
7
|
+
spec.name = 'rbhive'
|
8
8
|
spec.version = RBHive::VERSION
|
9
|
-
spec.authors =
|
10
|
-
spec.description =
|
11
|
-
spec.summary =
|
12
|
-
spec.email = [
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
15
|
-
|
9
|
+
spec.authors = %w(Forward3D KolobocK)
|
10
|
+
spec.description = 'Simple gem for executing Hive queries and collecting the results'
|
11
|
+
spec.summary = 'Simple gem for executing Hive queries'
|
12
|
+
spec.email = ['developers@forward3d.com', 'kolobock@gmail.com']
|
13
|
+
spec.homepage = 'http://github.com/forward3d/rbhive'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
16
|
spec.files = `git ls-files`.split($/)
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
-
spec.require_paths = [
|
20
|
-
|
21
|
-
spec.add_dependency('thrift', '
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_dependency('thrift', '~> 0.9')
|
22
22
|
spec.add_dependency('json')
|
23
23
|
|
24
|
-
spec.add_development_dependency
|
25
|
-
spec.add_development_dependency
|
24
|
+
spec.add_development_dependency 'rake'
|
25
|
+
spec.add_development_dependency 'bundler', '>= 1.3'
|
26
26
|
|
27
27
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbhive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0
|
5
|
-
prerelease: 6
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Forward3D
|
@@ -10,82 +9,73 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2018-12-17 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
15
14
|
- !ruby/object:Gem::Dependency
|
16
15
|
name: thrift
|
17
16
|
requirement: !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
17
|
requirements:
|
20
|
-
- -
|
18
|
+
- - "~>"
|
21
19
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.9
|
20
|
+
version: '0.9'
|
23
21
|
type: :runtime
|
24
22
|
prerelease: false
|
25
23
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
27
24
|
requirements:
|
28
|
-
- -
|
25
|
+
- - "~>"
|
29
26
|
- !ruby/object:Gem::Version
|
30
|
-
version: 0.9
|
27
|
+
version: '0.9'
|
31
28
|
- !ruby/object:Gem::Dependency
|
32
29
|
name: json
|
33
30
|
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
31
|
requirements:
|
36
|
-
- -
|
32
|
+
- - ">="
|
37
33
|
- !ruby/object:Gem::Version
|
38
34
|
version: '0'
|
39
35
|
type: :runtime
|
40
36
|
prerelease: false
|
41
37
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
38
|
requirements:
|
44
|
-
- -
|
39
|
+
- - ">="
|
45
40
|
- !ruby/object:Gem::Version
|
46
41
|
version: '0'
|
47
42
|
- !ruby/object:Gem::Dependency
|
48
43
|
name: rake
|
49
44
|
requirement: !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
45
|
requirements:
|
52
|
-
- -
|
46
|
+
- - ">="
|
53
47
|
- !ruby/object:Gem::Version
|
54
48
|
version: '0'
|
55
49
|
type: :development
|
56
50
|
prerelease: false
|
57
51
|
version_requirements: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
52
|
requirements:
|
60
|
-
- -
|
53
|
+
- - ">="
|
61
54
|
- !ruby/object:Gem::Version
|
62
55
|
version: '0'
|
63
56
|
- !ruby/object:Gem::Dependency
|
64
57
|
name: bundler
|
65
58
|
requirement: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
59
|
requirements:
|
68
|
-
- -
|
60
|
+
- - ">="
|
69
61
|
- !ruby/object:Gem::Version
|
70
62
|
version: '1.3'
|
71
63
|
type: :development
|
72
64
|
prerelease: false
|
73
65
|
version_requirements: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
66
|
requirements:
|
76
|
-
- -
|
67
|
+
- - ">="
|
77
68
|
- !ruby/object:Gem::Version
|
78
69
|
version: '1.3'
|
79
70
|
description: Simple gem for executing Hive queries and collecting the results
|
80
71
|
email:
|
81
|
-
- andy@forward.co.uk
|
82
|
-
- kolobock@gmail.com
|
83
72
|
- developers@forward3d.com
|
73
|
+
- kolobock@gmail.com
|
84
74
|
executables: []
|
85
75
|
extensions: []
|
86
76
|
extra_rdoc_files: []
|
87
77
|
files:
|
88
|
-
- .gitignore
|
78
|
+
- ".gitignore"
|
89
79
|
- CHANGELOG.md
|
90
80
|
- Gemfile
|
91
81
|
- LICENSE
|
@@ -122,29 +112,25 @@ files:
|
|
122
112
|
homepage: http://github.com/forward3d/rbhive
|
123
113
|
licenses:
|
124
114
|
- MIT
|
115
|
+
metadata: {}
|
125
116
|
post_install_message:
|
126
117
|
rdoc_options: []
|
127
118
|
require_paths:
|
128
119
|
- lib
|
129
120
|
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
-
none: false
|
131
121
|
requirements:
|
132
|
-
- -
|
122
|
+
- - ">="
|
133
123
|
- !ruby/object:Gem::Version
|
134
124
|
version: '0'
|
135
|
-
segments:
|
136
|
-
- 0
|
137
|
-
hash: 2597338757284379755
|
138
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
-
none: false
|
140
126
|
requirements:
|
141
|
-
- -
|
127
|
+
- - ">="
|
142
128
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
129
|
+
version: '0'
|
144
130
|
requirements: []
|
145
131
|
rubyforge_project:
|
146
|
-
rubygems_version:
|
132
|
+
rubygems_version: 2.6.11
|
147
133
|
signing_key:
|
148
|
-
specification_version:
|
134
|
+
specification_version: 4
|
149
135
|
summary: Simple gem for executing Hive queries
|
150
136
|
test_files: []
|