rbhive 1.0.0.pre → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 224509bb79d39f4f06ec081b96e96a4cbb3be341
4
+ data.tar.gz: f4df37b710b762c9f49238ba34e281773e7e7976
5
+ SHA512:
6
+ metadata.gz: 26a94f954cf6c76bbb6ed0ad345d56798beef6a933a756bba7778c1be5d66969606237052c0aed44978ecb073ad46836af0f830529220baec18f8d1f20e64fc8
7
+ data.tar.gz: ba9b396a9fcf388712cf5deefe2f9a16120fffc117b3e53631702f4a1d7df872d8bd209b8e0b1cedd485b6af364854b0a25577cd06ee8718fbee7363110eae6a
@@ -2,6 +2,12 @@
2
2
 
3
3
  Versioning prior to 0.5.3 was not tracked, so this changelog only lists changes introduced after 0.5.3.
4
4
 
5
+ ## 1.0.0
6
+
7
+ * Asynchronous execution with Hiveserver2
8
+ * Misc bugfixes
9
+
10
+
5
11
  ## 0.6.0
6
12
 
7
13
  0.6.0 introduces one backwards-incompatible change:
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # RBHive - A Ruby Thrift client for Apache Hive
2
2
 
3
+ [![Code Climate](https://codeclimate.com/github/forward3d/rbhive/badges/gpa.svg)](https://codeclimate.com/github/forward3d/rbhive)
4
+
3
5
  RBHive is a simple Ruby gem to communicate with the [Apache Hive](http://hive.apache.org)
4
6
  Thrift servers.
5
7
 
@@ -97,17 +99,23 @@ Connecting with the defaults:
97
99
  connection.fetch('SHOW TABLES')
98
100
  end
99
101
 
102
+ Connecting with a Logger:
103
+
104
+ RBHive.tcli_connect('hive.server.address', 10_000, { logger: Logger.new(STDOUT) }) do |connection|
105
+ connection.fetch('SHOW TABLES')
106
+ end
107
+
100
108
  Connecting with a specific Hive version (0.12 in this case):
101
109
 
102
- RBHive.tcli_connect('hive.server.address', 10_000, {:hive_version => 12}) do |connection|
110
+ RBHive.tcli_connect('hive.server.address', 10_000, { hive_version: 12 }) do |connection|
103
111
  connection.fetch('SHOW TABLES')
104
112
  end
105
113
 
106
114
  Connecting with a specific Hive version (0.12) and using the `:http` transport:
107
115
 
108
- RBHive.tcli_connect('hive.server.address', 10_000, {:hive_version => 12, :transport => :http}) do |connection|
116
+ RBHive.tcli_connect('hive.server.address', 10_000, { hive_version: 12, transport: :http }) do |connection|
109
117
  connection.fetch('SHOW TABLES')
110
- end
118
+ end
111
119
 
112
120
  We have not tested the SASL connection, as we don't run SASL; pull requests and testing are welcomed.
113
121
 
@@ -144,6 +152,7 @@ In addition, you can explicitly set the Thrift protocol version according to thi
144
152
  In versions of Hive later than 0.12, the Thrift server supports asynchronous execution.
145
153
 
146
154
  The high-level view of using this feature is as follows:
155
+
147
156
  1. Submit your query using `async_execute(query)`. This function returns a hash
148
157
  with the following keys: `:guid`, `:secret`, and `:session`. You don't need to
149
158
  care about the internals of this hash - all methods that interact with an async
@@ -4,34 +4,34 @@ module RBHive
4
4
  @schema = schema
5
5
  super(rows.map {|r| @schema.coerce_row(r) })
6
6
  end
7
-
7
+
8
8
  def column_names
9
9
  @schema.column_names
10
10
  end
11
-
11
+
12
12
  def column_type_map
13
13
  @schema.column_type_map
14
14
  end
15
-
15
+
16
16
  def to_csv(out_file=nil)
17
- to_seperated_output(",", out_file)
17
+ to_separated_output(",", out_file)
18
18
  end
19
-
19
+
20
20
  def to_tsv(out_file=nil)
21
- to_seperated_output("\t", out_file)
21
+ to_separated_output("\t", out_file)
22
22
  end
23
-
23
+
24
24
  def as_arrays
25
25
  @as_arrays ||= self.map{ |r| @schema.coerce_row_to_array(r) }
26
26
  end
27
-
27
+
28
28
  private
29
-
30
- def to_seperated_output(sep, out_file)
29
+
30
+ def to_separated_output(sep, out_file)
31
31
  rows = self.map { |r| @schema.coerce_row_to_array(r).join(sep) }
32
32
  sv = rows.join("\n")
33
33
  return sv if out_file.nil?
34
34
  File.open(out_file, 'w+') { |f| f << sv }
35
35
  end
36
36
  end
37
- end
37
+ end
@@ -4,6 +4,8 @@ module RBHive
4
4
  class SchemaDefinition
5
5
  attr_reader :schema
6
6
 
7
+ NAN = Float::NAN rescue 0.0/0.0
8
+ INFINITY = Float::INFINITY rescue 1.0/0.0
7
9
  TYPES = {
8
10
  :boolean => :to_s,
9
11
  :string => :to_s,
@@ -63,8 +65,8 @@ module RBHive
63
65
 
64
66
  def coerce_column(column_name, value)
65
67
  type = column_type_map[column_name]
66
- return 1.0/0.0 if(type != :string && value == "Infinity")
67
- return 0.0/0.0 if(type != :string && value == "NaN")
68
+ return INFINITY if (type != :string && value == "Infinity")
69
+ return NAN if (type != :string && value == "NaN")
68
70
  return coerce_complex_value(value) if type.to_s =~ /^array/
69
71
  conversion_method = TYPES[type]
70
72
  conversion_method ? value.send(conversion_method) : value
@@ -46,9 +46,10 @@ module RBHive
46
46
  :PROTOCOL_V6 => 5,
47
47
  :PROTOCOL_V7 => 6
48
48
  }
49
-
50
- def tcli_connect(server, port=10_000, options)
51
- connection = RBHive::TCLIConnection.new(server, port, options)
49
+
50
+ def tcli_connect(server, port = 10_000, options)
51
+ logger = options.key?(:logger) ? options.delete(:logger) : StdOutLogger.new
52
+ connection = RBHive::TCLIConnection.new(server, port, options, logger)
52
53
  ret = nil
53
54
  begin
54
55
  connection.open
@@ -65,7 +66,7 @@ module RBHive
65
66
  end
66
67
  end
67
68
 
68
- return ret
69
+ ret
69
70
  end
70
71
  module_function :tcli_connect
71
72
 
@@ -80,7 +81,7 @@ module RBHive
80
81
  class TCLIConnection
81
82
  attr_reader :client
82
83
 
83
- def initialize(server, port=10_000, options={}, logger=StdOutLogger.new)
84
+ def initialize(server, port = 10_000, options = {}, logger = StdOutLogger.new)
84
85
  options ||= {} # backwards compatibility
85
86
  raise "'options' parameter must be a hash" unless options.is_a?(Hash)
86
87
 
@@ -191,14 +192,16 @@ module RBHive
191
192
  # Async execute
192
193
  def async_execute(query)
193
194
  @logger.info("Executing query asynchronously: #{query}")
194
- op_handle = @client.ExecuteStatement(
195
+ exec_result = @client.ExecuteStatement(
195
196
  Hive2::Thrift::TExecuteStatementReq.new(
196
197
  sessionHandle: @session.sessionHandle,
197
198
  statement: query,
198
199
  runAsync: true
199
200
  )
200
- ).operationHandle
201
-
201
+ )
202
+ raise_error_if_failed!(exec_result)
203
+ op_handle = exec_result.operationHandle
204
+
202
205
  # Return handles to get hold of this query / session again
203
206
  {
204
207
  session: @session.sessionHandle,
@@ -235,7 +238,7 @@ module RBHive
235
238
  response = @client.GetOperationStatus(
236
239
  Hive2::Thrift::TGetOperationStatusReq.new(operationHandle: prepare_operation_handle(handles))
237
240
  )
238
- puts response.operationState
241
+
239
242
  case response.operationState
240
243
  when Hive2::Thrift::TOperationState::FINISHED_STATE
241
244
  return :finished
@@ -253,6 +256,8 @@ module RBHive
253
256
  return :unknown
254
257
  when Hive2::Thrift::TOperationState::PENDING_STATE
255
258
  return :pending
259
+ when nil
260
+ raise "No operation state found for handles - has the session been closed?"
256
261
  else
257
262
  return :state_not_in_protocol
258
263
  end
@@ -262,7 +267,7 @@ module RBHive
262
267
  def async_fetch(handles, max_rows = 100)
263
268
  # Can't get data from an unfinished query
264
269
  unless async_is_complete?(handles)
265
- raise "Can't perform fetch on a query in state: #{async_state(handles[:guid], handles[:secret])}"
270
+ raise "Can't perform fetch on a query in state: #{async_state(handles)}"
266
271
  end
267
272
 
268
273
  # Fetch and
@@ -275,7 +280,7 @@ module RBHive
275
280
  raise "No block given for the batch fetch request!" unless block_given?
276
281
  # Can't get data from an unfinished query
277
282
  unless async_is_complete?(handles)
278
- raise "Can't perform fetch on a query in state: #{async_state(handles[:guid], handles[:secret])}"
283
+ raise "Can't perform fetch on a query in state: #{async_state(handles)}"
279
284
  end
280
285
 
281
286
  # Now let's iterate over the results
@@ -428,7 +433,9 @@ module RBHive
428
433
  def raise_error_if_failed!(result)
429
434
  return if result.status.statusCode == 0
430
435
  error_message = result.status.errorMessage || 'Execution failed!'
431
- raise error_message
436
+ raise RBHive::TCLIConnectionError.new(error_message)
432
437
  end
433
438
  end
439
+
440
+ class TCLIConnectionError < StandardError; end
434
441
  end
@@ -4,10 +4,11 @@ module RBHive
4
4
  class TCLISchemaDefinition
5
5
  attr_reader :schema
6
6
 
7
+ NAN = Float::NAN rescue 0.0/0.0
8
+ INFINITY = Float::INFINITY rescue 1.0/0.0
7
9
  TYPES = {
8
10
  :boolean => :to_s,
9
11
  :string => :to_s,
10
- :bigint => :to_i,
11
12
  :float => :to_f,
12
13
  :double => :to_f,
13
14
  :int => :to_i,
@@ -65,8 +66,8 @@ module RBHive
65
66
 
66
67
  def coerce_column(column_name, value)
67
68
  type = column_type_map[column_name]
68
- return 1.0/0.0 if(type != :string && value == "Infinity")
69
- return 0.0/0.0 if(type != :string && value == "NaN")
69
+ return INFINITY if (type != :string && value == "Infinity")
70
+ return NAN if (type != :string && value == "NaN")
70
71
  return coerce_complex_value(value) if type.to_s =~ /^array/
71
72
  conversion_method = TYPES[type]
72
73
  conversion_method ? value.send(conversion_method) : value
@@ -8,6 +8,7 @@ module RBHive
8
8
  @field_sep = options[:field_sep] || "\t"
9
9
  @line_sep = options[:line_sep] || "\n"
10
10
  @collection_sep = options[:collection_sep] || "|"
11
+ @stored_as = options[:stored_as] || :textfile
11
12
  @columns = []
12
13
  @partitions = []
13
14
  @serde_name = nil
@@ -31,10 +32,14 @@ module RBHive
31
32
  def create_table_statement()
32
33
  %[CREATE #{external}TABLE #{table_statement}
33
34
  ROW FORMAT #{row_format_statement}
34
- STORED AS TEXTFILE
35
+ STORED AS #{stored_as}
35
36
  #{location}]
36
37
  end
37
38
 
39
+ def stored_as
40
+ @stored_as.to_s.upcase
41
+ end
42
+
38
43
  def row_format_statement
39
44
  if @serde_name
40
45
  serde_statement
@@ -1,3 +1,3 @@
1
1
  module RBHive
2
- VERSION = '1.0.0.pre'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -4,24 +4,24 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'rbhive/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "rbhive"
7
+ spec.name = 'rbhive'
8
8
  spec.version = RBHive::VERSION
9
- spec.authors = ["Forward3D","KolobocK"]
10
- spec.description = "Simple gem for executing Hive queries and collecting the results"
11
- spec.summary = "Simple gem for executing Hive queries"
12
- spec.email = ["andy@forward.co.uk","kolobock@gmail.com", "developers@forward3d.com"]
13
- spec.homepage = %q{http://github.com/forward3d/rbhive}
14
- spec.license = "MIT"
15
-
9
+ spec.authors = %w(Forward3D KolobocK)
10
+ spec.description = 'Simple gem for executing Hive queries and collecting the results'
11
+ spec.summary = 'Simple gem for executing Hive queries'
12
+ spec.email = ['developers@forward3d.com', 'kolobock@gmail.com']
13
+ spec.homepage = 'http://github.com/forward3d/rbhive'
14
+ spec.license = 'MIT'
15
+
16
16
  spec.files = `git ls-files`.split($/)
17
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ["lib"]
20
-
21
- spec.add_dependency('thrift', '= 0.9.0')
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency('thrift', '~> 0.9')
22
22
  spec.add_dependency('json')
23
23
 
24
- spec.add_development_dependency "rake"
25
- spec.add_development_dependency "bundler", ">= 1.3"
24
+ spec.add_development_dependency 'rake'
25
+ spec.add_development_dependency 'bundler', '>= 1.3'
26
26
 
27
27
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbhive
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.pre
5
- prerelease: 6
4
+ version: 1.0.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Forward3D
@@ -10,82 +9,73 @@ authors:
10
9
  autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
- date: 2014-03-31 00:00:00.000000000 Z
12
+ date: 2018-12-17 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: thrift
17
16
  requirement: !ruby/object:Gem::Requirement
18
- none: false
19
17
  requirements:
20
- - - '='
18
+ - - "~>"
21
19
  - !ruby/object:Gem::Version
22
- version: 0.9.0
20
+ version: '0.9'
23
21
  type: :runtime
24
22
  prerelease: false
25
23
  version_requirements: !ruby/object:Gem::Requirement
26
- none: false
27
24
  requirements:
28
- - - '='
25
+ - - "~>"
29
26
  - !ruby/object:Gem::Version
30
- version: 0.9.0
27
+ version: '0.9'
31
28
  - !ruby/object:Gem::Dependency
32
29
  name: json
33
30
  requirement: !ruby/object:Gem::Requirement
34
- none: false
35
31
  requirements:
36
- - - ! '>='
32
+ - - ">="
37
33
  - !ruby/object:Gem::Version
38
34
  version: '0'
39
35
  type: :runtime
40
36
  prerelease: false
41
37
  version_requirements: !ruby/object:Gem::Requirement
42
- none: false
43
38
  requirements:
44
- - - ! '>='
39
+ - - ">="
45
40
  - !ruby/object:Gem::Version
46
41
  version: '0'
47
42
  - !ruby/object:Gem::Dependency
48
43
  name: rake
49
44
  requirement: !ruby/object:Gem::Requirement
50
- none: false
51
45
  requirements:
52
- - - ! '>='
46
+ - - ">="
53
47
  - !ruby/object:Gem::Version
54
48
  version: '0'
55
49
  type: :development
56
50
  prerelease: false
57
51
  version_requirements: !ruby/object:Gem::Requirement
58
- none: false
59
52
  requirements:
60
- - - ! '>='
53
+ - - ">="
61
54
  - !ruby/object:Gem::Version
62
55
  version: '0'
63
56
  - !ruby/object:Gem::Dependency
64
57
  name: bundler
65
58
  requirement: !ruby/object:Gem::Requirement
66
- none: false
67
59
  requirements:
68
- - - ! '>='
60
+ - - ">="
69
61
  - !ruby/object:Gem::Version
70
62
  version: '1.3'
71
63
  type: :development
72
64
  prerelease: false
73
65
  version_requirements: !ruby/object:Gem::Requirement
74
- none: false
75
66
  requirements:
76
- - - ! '>='
67
+ - - ">="
77
68
  - !ruby/object:Gem::Version
78
69
  version: '1.3'
79
70
  description: Simple gem for executing Hive queries and collecting the results
80
71
  email:
81
- - andy@forward.co.uk
82
- - kolobock@gmail.com
83
72
  - developers@forward3d.com
73
+ - kolobock@gmail.com
84
74
  executables: []
85
75
  extensions: []
86
76
  extra_rdoc_files: []
87
77
  files:
88
- - .gitignore
78
+ - ".gitignore"
89
79
  - CHANGELOG.md
90
80
  - Gemfile
91
81
  - LICENSE
@@ -122,29 +112,25 @@ files:
122
112
  homepage: http://github.com/forward3d/rbhive
123
113
  licenses:
124
114
  - MIT
115
+ metadata: {}
125
116
  post_install_message:
126
117
  rdoc_options: []
127
118
  require_paths:
128
119
  - lib
129
120
  required_ruby_version: !ruby/object:Gem::Requirement
130
- none: false
131
121
  requirements:
132
- - - ! '>='
122
+ - - ">="
133
123
  - !ruby/object:Gem::Version
134
124
  version: '0'
135
- segments:
136
- - 0
137
- hash: 2597338757284379755
138
125
  required_rubygems_version: !ruby/object:Gem::Requirement
139
- none: false
140
126
  requirements:
141
- - - ! '>'
127
+ - - ">="
142
128
  - !ruby/object:Gem::Version
143
- version: 1.3.1
129
+ version: '0'
144
130
  requirements: []
145
131
  rubyforge_project:
146
- rubygems_version: 1.8.23
132
+ rubygems_version: 2.6.11
147
133
  signing_key:
148
- specification_version: 3
134
+ specification_version: 4
149
135
  summary: Simple gem for executing Hive queries
150
136
  test_files: []