sequel_impala 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +50 -0
- data/LICENSE +463 -0
- data/README.md +45 -0
- data/Rakefile +39 -0
- data/lib/driver/commons-collections-3.2.1.jar +0 -0
- data/lib/driver/commons-configuration-1.10.jar +0 -0
- data/lib/driver/commons-logging-1.2.jar +0 -0
- data/lib/driver/hadoop-auth-2.9.0.jar +0 -0
- data/lib/driver/hadoop-common-2.9.0.jar +0 -0
- data/lib/driver/hadoop-core-2.6.0.jar +0 -0
- data/lib/driver/hive-exec-1.1.0.jar +0 -0
- data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
- data/lib/driver/hive-metastore-1.1.0.jar +0 -0
- data/lib/driver/hive-service-1.1.0.jar +0 -0
- data/lib/driver/httpclient-4.3.jar +0 -0
- data/lib/driver/httpcore-4.3.jar +0 -0
- data/lib/driver/libfb303-0.9.0.jar +0 -0
- data/lib/driver/log4j-1.2.17.jar +0 -0
- data/lib/driver/slf4j-api-1.7.5.jar +0 -0
- data/lib/driver/stax2-api-3.1.4.jar +0 -0
- data/lib/driver/woodstox-core-asl-4.4.1.jar +0 -0
- data/lib/impala.rb +55 -0
- data/lib/impala/connection.rb +180 -0
- data/lib/impala/cursor.rb +200 -0
- data/lib/impala/progress_reporter.rb +40 -0
- data/lib/impala/protocol.rb +8 -0
- data/lib/impala/protocol/beeswax_constants.rb +15 -0
- data/lib/impala/protocol/beeswax_service.rb +747 -0
- data/lib/impala/protocol/beeswax_types.rb +193 -0
- data/lib/impala/protocol/exec_stats_constants.rb +13 -0
- data/lib/impala/protocol/exec_stats_types.rb +133 -0
- data/lib/impala/protocol/facebook_service.rb +706 -0
- data/lib/impala/protocol/fb303_constants.rb +15 -0
- data/lib/impala/protocol/fb303_types.rb +25 -0
- data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
- data/lib/impala/protocol/hive_metastore_types.rb +698 -0
- data/lib/impala/protocol/impala_hive_server2_service.rb +137 -0
- data/lib/impala/protocol/impala_service.rb +443 -0
- data/lib/impala/protocol/impala_service_constants.rb +13 -0
- data/lib/impala/protocol/impala_service_types.rb +192 -0
- data/lib/impala/protocol/status_constants.rb +13 -0
- data/lib/impala/protocol/status_types.rb +46 -0
- data/lib/impala/protocol/t_c_l_i_service.rb +1108 -0
- data/lib/impala/protocol/t_c_l_i_service_constants.rb +72 -0
- data/lib/impala/protocol/t_c_l_i_service_types.rb +1802 -0
- data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
- data/lib/impala/protocol/types_constants.rb +13 -0
- data/lib/impala/protocol/types_types.rb +332 -0
- data/lib/impala/sasl_transport.rb +117 -0
- data/lib/impala/thrift_patch.rb +31 -0
- data/lib/impala/version.rb +3 -0
- data/lib/jdbc/hive2.rb +52 -0
- data/lib/jdbc/impala.rb +50 -0
- data/lib/rbhive.rb +8 -0
- data/lib/rbhive/connection.rb +150 -0
- data/lib/rbhive/explain_result.rb +46 -0
- data/lib/rbhive/result_set.rb +37 -0
- data/lib/rbhive/schema_definition.rb +86 -0
- data/lib/rbhive/t_c_l_i_connection.rb +466 -0
- data/lib/rbhive/t_c_l_i_result_set.rb +3 -0
- data/lib/rbhive/t_c_l_i_schema_definition.rb +87 -0
- data/lib/rbhive/table_schema.rb +122 -0
- data/lib/rbhive/version.rb +3 -0
- data/lib/sequel/adapters/impala.rb +220 -0
- data/lib/sequel/adapters/jdbc/hive2.rb +36 -0
- data/lib/sequel/adapters/jdbc/impala.rb +38 -0
- data/lib/sequel/adapters/rbhive.rb +177 -0
- data/lib/sequel/adapters/shared/impala.rb +808 -0
- data/lib/sequel/extensions/csv_to_parquet.rb +166 -0
- data/lib/thrift/facebook_service.rb +700 -0
- data/lib/thrift/fb303_constants.rb +9 -0
- data/lib/thrift/fb303_types.rb +19 -0
- data/lib/thrift/hive_metastore_constants.rb +41 -0
- data/lib/thrift/hive_metastore_types.rb +630 -0
- data/lib/thrift/hive_service_constants.rb +13 -0
- data/lib/thrift/hive_service_types.rb +72 -0
- data/lib/thrift/queryplan_constants.rb +13 -0
- data/lib/thrift/queryplan_types.rb +261 -0
- data/lib/thrift/sasl_client_transport.rb +161 -0
- data/lib/thrift/serde_constants.rb +92 -0
- data/lib/thrift/serde_types.rb +7 -0
- data/lib/thrift/t_c_l_i_service.rb +1054 -0
- data/lib/thrift/t_c_l_i_service_constants.rb +72 -0
- data/lib/thrift/t_c_l_i_service_types.rb +1768 -0
- data/lib/thrift/thrift_hive.rb +508 -0
- data/lib/thrift/thrift_hive_metastore.rb +3856 -0
- data/spec/database_test.rb +56 -0
- data/spec/dataset_test.rb +1268 -0
- data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
- data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
- data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
- data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
- data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
- data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
- data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
- data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
- data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/files/reversible_migrations/001_reversible.rb +5 -0
- data/spec/files/reversible_migrations/002_reversible.rb +5 -0
- data/spec/files/reversible_migrations/003_reversible.rb +5 -0
- data/spec/files/reversible_migrations/004_reversible.rb +5 -0
- data/spec/files/reversible_migrations/005_reversible.rb +10 -0
- data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/impala_test.rb +290 -0
- data/spec/migrator_test.rb +240 -0
- data/spec/plugin_test.rb +91 -0
- data/spec/prepared_statement_test.rb +327 -0
- data/spec/schema_test.rb +356 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/timezone_test.rb +86 -0
- data/spec/type_test.rb +99 -0
- metadata +294 -0
data/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# sequel_impala
|
|
2
|
+
|
|
3
|
+
sequel_impala adds support for Sequel to connect to the Impala database
|
|
4
|
+
via the included impala driver, and the included jdbc-hive2 driver under JRuby.
|
|
5
|
+
|
|
6
|
+
# Source Code
|
|
7
|
+
|
|
8
|
+
Source code is available on GitHub at https://github.com/outcomesinsights/sequel_impala
|
|
9
|
+
|
|
10
|
+
# Usage
|
|
11
|
+
|
|
12
|
+
After installation, Sequel will automatically pick up the adapter as long as
|
|
13
|
+
the lib directory is in RUBYLIB, if you use a connection string starting with
|
|
14
|
+
`impala`, or `jdbc:hive2` on JRuby.
|
|
15
|
+
|
|
16
|
+
# Connection Strings
|
|
17
|
+
|
|
18
|
+
If using the impala driver (default host is localhost, default port is 21000):
|
|
19
|
+
|
|
20
|
+
impala://host:port
|
|
21
|
+
|
|
22
|
+
If using the jdbc:hive2 driver on JRuby (port 21050 works in testing):
|
|
23
|
+
|
|
24
|
+
jdbc:hive2://host:port/;auth=noSasl
|
|
25
|
+
|
|
26
|
+
# Dependencies
|
|
27
|
+
|
|
28
|
+
* sequel 4+
|
|
29
|
+
* thrift gem
|
|
30
|
+
|
|
31
|
+
# License
|
|
32
|
+
|
|
33
|
+
MIT/Apache
|
|
34
|
+
|
|
35
|
+
# Author
|
|
36
|
+
|
|
37
|
+
Ryan Duryea <aguynamedryan@gmail.com>
|
|
38
|
+
|
|
39
|
+
Work on sequel_impala is generously funded by [Outcomes Insights, Inc.](http://outins.com)
|
|
40
|
+
|
|
41
|
+
# Previous Author
|
|
42
|
+
|
|
43
|
+
Jeremy Evans <code@jeremyevans.net>
|
|
44
|
+
|
|
45
|
+
Provided initial work on this gem, and continues to maintain [Sequel](http://sequel.jeremyevans.net/). We can't thank you enough!
|
data/Rakefile
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
require "rake"
|
|
2
|
+
require "rake/clean"
|
|
3
|
+
|
|
4
|
+
CLEAN.include ["sequel_impala-*.gem", "rdoc"]
|
|
5
|
+
|
|
6
|
+
desc "Build sequel_impala gem"
|
|
7
|
+
task :package=>[:clean] do |p|
|
|
8
|
+
sh %{#{FileUtils::RUBY} -S gem build sequel_impala.gemspec}
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
### Specs
|
|
12
|
+
|
|
13
|
+
desc "Run specs"
|
|
14
|
+
task "spec" do
|
|
15
|
+
sh "#{FileUtils::RUBY} -rubygems -I lib -e 'ARGV.each{|f| require f}' ./spec/*_test.rb"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
task :default => :spec
|
|
19
|
+
|
|
20
|
+
### RDoc
|
|
21
|
+
|
|
22
|
+
RDOC_DEFAULT_OPTS = ["--quiet", "--line-numbers", "--inline-source", '--title', 'sequel_impala: Sequel support for Impala database']
|
|
23
|
+
|
|
24
|
+
begin
|
|
25
|
+
gem 'rdoc'
|
|
26
|
+
gem 'hanna-nouveau'
|
|
27
|
+
RDOC_DEFAULT_OPTS.concat(['-f', 'hanna'])
|
|
28
|
+
rescue Gem::LoadError
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
RDOC_OPTS = RDOC_DEFAULT_OPTS + ['--main', 'README.rdoc']
|
|
32
|
+
|
|
33
|
+
require 'rdoc/task'
|
|
34
|
+
RDoc::Task.new do |rdoc|
|
|
35
|
+
rdoc.rdoc_dir = "rdoc"
|
|
36
|
+
rdoc.options += RDOC_OPTS
|
|
37
|
+
rdoc.rdoc_files.add %w"README.rdoc CHANGELOG LICENSE lib/**/*.rb"
|
|
38
|
+
end
|
|
39
|
+
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/impala.rb
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
|
|
2
|
+
# the generated ruby files use a relative require, so we need to add the
|
|
3
|
+
# generated directory to $LOAD_PATH
|
|
4
|
+
this_dir = File.expand_path(File.dirname(__FILE__))
|
|
5
|
+
gen_dir = File.join(this_dir, 'impala/protocol')
|
|
6
|
+
$LOAD_PATH.push(gen_dir) unless $LOAD_PATH.include?(gen_dir)
|
|
7
|
+
|
|
8
|
+
require 'impala/version'
|
|
9
|
+
|
|
10
|
+
require 'thrift'
|
|
11
|
+
require 'time'
|
|
12
|
+
require 'impala/protocol'
|
|
13
|
+
begin
|
|
14
|
+
require 'impala/sasl_transport'
|
|
15
|
+
rescue LoadError
|
|
16
|
+
# gssapi not supported by operating system, continue as impala adapter
|
|
17
|
+
# can be used in buffered (non-SASL) mode.
|
|
18
|
+
end
|
|
19
|
+
require 'impala/progress_reporter'
|
|
20
|
+
require 'impala/cursor'
|
|
21
|
+
require 'impala/connection'
|
|
22
|
+
require 'impala/thrift_patch'
|
|
23
|
+
|
|
24
|
+
module Impala
|
|
25
|
+
DEFAULT_HOST = 'localhost'
|
|
26
|
+
DEFAULT_PORT = 21000
|
|
27
|
+
class InvalidQueryError < StandardError; end
|
|
28
|
+
class ConnectionError < StandardError; end
|
|
29
|
+
class CursorError < StandardError; end
|
|
30
|
+
class ParsingError < StandardError; end
|
|
31
|
+
|
|
32
|
+
# Connect to an Impala server. If a block is given, it will close the
|
|
33
|
+
# connection after yielding the connection to the block.
|
|
34
|
+
# @param [String] host the hostname or IP address of the Impala server
|
|
35
|
+
# @param [int] port the port that the Impala server is listening on
|
|
36
|
+
# @yieldparam [Connection] conn the open connection. Will be closed once the block
|
|
37
|
+
# finishes
|
|
38
|
+
# @return [Connection] the open connection, or, if a block is
|
|
39
|
+
# passed, the return value of the block
|
|
40
|
+
def self.connect(host=DEFAULT_HOST, port=DEFAULT_PORT, options={})
|
|
41
|
+
connection = Connection.new(host, port, options)
|
|
42
|
+
|
|
43
|
+
if block_given?
|
|
44
|
+
begin
|
|
45
|
+
ret = yield connection
|
|
46
|
+
ensure
|
|
47
|
+
connection.close
|
|
48
|
+
end
|
|
49
|
+
else
|
|
50
|
+
ret = connection
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
ret
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
module Impala
|
|
2
|
+
# This object represents a connection to an Impala server. It can be used to
|
|
3
|
+
# perform queries on the database.
|
|
4
|
+
class Connection
|
|
5
|
+
attr_accessor :host, :port
|
|
6
|
+
|
|
7
|
+
# Don't instantiate Connections directly; instead, use {Impala.connect}.
|
|
8
|
+
def initialize(host, port, options={})
|
|
9
|
+
@host = host
|
|
10
|
+
@port = port
|
|
11
|
+
@connected = false
|
|
12
|
+
@options = options.dup
|
|
13
|
+
@options[:transport] ||= :buffered
|
|
14
|
+
@loggers = @options.fetch(:loggers, [])
|
|
15
|
+
open
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def inspect
|
|
19
|
+
"#<#{self.class} #{@host}:#{@port}#{open? ? '' : ' (DISCONNECTED)'}>"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Open the connection if it's currently closed.
|
|
23
|
+
def open
|
|
24
|
+
return if @connected
|
|
25
|
+
|
|
26
|
+
@transport = thrift_transport(host, port)
|
|
27
|
+
@transport.open do |transport|
|
|
28
|
+
enable_keepalive(transport)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
proto = Thrift::BinaryProtocol.new(@transport)
|
|
32
|
+
@service = Protocol::ImpalaService::Client.new(proto)
|
|
33
|
+
@connected = true
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def thrift_transport(server, port)
|
|
37
|
+
socket = thrift_socket(server, port, @options[:timeout])
|
|
38
|
+
|
|
39
|
+
case @options[:transport]
|
|
40
|
+
when :buffered
|
|
41
|
+
return Thrift::BufferedTransport.new(socket)
|
|
42
|
+
when :sasl
|
|
43
|
+
opts = parse_sasl_params(@options[:sasl_params])
|
|
44
|
+
mechanism = opts.delete(:mechanism)
|
|
45
|
+
return SASLTransport.new(socket, mechanism, opts)
|
|
46
|
+
else
|
|
47
|
+
raise "Unrecognised transport type '#{@options[:transport]}'"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def thrift_socket(server, port, timeout)
|
|
52
|
+
socket = Thrift::Socket.new(server, port)
|
|
53
|
+
socket.timeout = timeout
|
|
54
|
+
socket
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Processes SASL connection params and returns a hash with symbol keys or a nil
|
|
58
|
+
def parse_sasl_params(sasl_params)
|
|
59
|
+
# Symbilize keys in a hash
|
|
60
|
+
if sasl_params.kind_of?(Hash)
|
|
61
|
+
return sasl_params.inject({}) do |memo,(k,v)|
|
|
62
|
+
memo[k.to_sym] = v;
|
|
63
|
+
memo
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
return nil
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Close this connection. It can still be reopened with {#open}.
|
|
70
|
+
def close
|
|
71
|
+
return unless @connected
|
|
72
|
+
|
|
73
|
+
@transport.close
|
|
74
|
+
@connected = false
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Returns true if the connection is currently open.
|
|
78
|
+
def open?
|
|
79
|
+
@connected
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Refresh the metadata store.
|
|
83
|
+
def refresh
|
|
84
|
+
raise ConnectionError.new("Connection closed") unless open?
|
|
85
|
+
@service.ResetCatalog
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Perform a query and return all the results. This will
|
|
89
|
+
# load the entire result set into memory, so if you're dealing with lots
|
|
90
|
+
# of rows, {#execute} may work better.
|
|
91
|
+
# @param [String] query the query you want to run
|
|
92
|
+
# @param [Hash] query_options the options to set user and configuration
|
|
93
|
+
# except for :user, see TImpalaQueryOptions in ImpalaService.thrift
|
|
94
|
+
# @option query_options [String] :user the user runs the query
|
|
95
|
+
# @return [Array<Hash>] an array of hashes, one for each row.
|
|
96
|
+
def query(raw_query, query_options = {})
|
|
97
|
+
execute(raw_query, query_options).fetch_all
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Perform a query and return a cursor for iterating over the results.
|
|
101
|
+
# @param [String] query the query you want to run
|
|
102
|
+
# @param [Hash] query_options the options to set user and configuration
|
|
103
|
+
# except for :user, see TImpalaQueryOptions in ImpalaService.thrift
|
|
104
|
+
# @option query_options [String] :user the user runs the query
|
|
105
|
+
# @return [Cursor] a cursor for the result rows
|
|
106
|
+
def execute(raw_query, query_options = {})
|
|
107
|
+
raise ConnectionError.new("Connection closed") unless open?
|
|
108
|
+
|
|
109
|
+
query = sanitize_query(raw_query)
|
|
110
|
+
handle = send_query(query, query_options)
|
|
111
|
+
|
|
112
|
+
cursor = Cursor.new(handle, @service, @options)
|
|
113
|
+
cursor.wait!
|
|
114
|
+
cursor
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def close_handle(handle)
|
|
118
|
+
@service.close(handle)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
|
|
123
|
+
def sanitize_query(raw_query)
|
|
124
|
+
words = raw_query.split
|
|
125
|
+
raise InvalidQueryError.new("Empty query") if words.empty?
|
|
126
|
+
|
|
127
|
+
command = words.first.downcase
|
|
128
|
+
([command] + words[1..-1]).join(' ')
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def send_query(sanitized_query, query_options)
|
|
132
|
+
query = Protocol::Beeswax::Query.new
|
|
133
|
+
query.query = sanitized_query
|
|
134
|
+
|
|
135
|
+
query.hadoop_user = query_options.delete(:user) if query_options[:user]
|
|
136
|
+
query.configuration = query_options.map do |key, value|
|
|
137
|
+
"#{key.upcase}=#{value}"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
@service.query(query)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def enable_keepalive(transport)
|
|
144
|
+
s = transport.handle
|
|
145
|
+
log_debug("Enabling KEEPALIVE...")
|
|
146
|
+
s.setsockopt(::Socket::SOL_SOCKET, ::Socket::SO_KEEPALIVE, true)
|
|
147
|
+
|
|
148
|
+
# Apparently Mac OS X (Darwin) doesn't implement the SOL_TCP options below
|
|
149
|
+
# so we'll hope keep alive works under Mac OS X, but in production
|
|
150
|
+
# we Dockerize Jigsaw, so these options should be available when
|
|
151
|
+
# we're running on Linux
|
|
152
|
+
if defined?(::Socket::SOL_TCP)
|
|
153
|
+
opts = {}
|
|
154
|
+
|
|
155
|
+
if defined?(::Socket::TCP_KEEPIDLE)
|
|
156
|
+
opts[::Socket::TCP_KEEPIDLE] = 60
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
if defined?(::Socket::TCP_KEEPINTVL)
|
|
160
|
+
opts[::Socket::TCP_KEEPINTVL] = 10
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
if defined?(::Socket::TCP_KEEPCNT)
|
|
164
|
+
opts[::Socket::TCP_KEEPCNT] = 5
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
log_debug("Also enabling: #{opts.inspect}")
|
|
168
|
+
opts.each do |opt, value|
|
|
169
|
+
s.setsockopt(::Socket::SOL_TCP, opt, value)
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def log_debug(message)
|
|
175
|
+
@loggers.each do |logger|
|
|
176
|
+
logger.debug(message)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
module Impala
|
|
2
|
+
# Cursors are used to iterate over result sets without loading them all
|
|
3
|
+
# into memory at once. This can be useful if you're dealing with lots of
|
|
4
|
+
# rows. It implements Enumerable, so you can use each/select/map/etc.
|
|
5
|
+
class Cursor
|
|
6
|
+
BUFFER_SIZE = 1024
|
|
7
|
+
include Enumerable
|
|
8
|
+
|
|
9
|
+
def self.typecast_boolean(value)
|
|
10
|
+
value == 'true'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.typecast_int(value)
|
|
14
|
+
value.to_i
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.typecast_float(value)
|
|
18
|
+
value.to_f
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.typecast_decimal(value)
|
|
22
|
+
BigDecimal.new(value)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.typecast_timestamp(value)
|
|
26
|
+
Time.parse(value)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
TYPECAST_MAP = {
|
|
30
|
+
'boolean'=>method(:typecast_boolean),
|
|
31
|
+
'int'=>method(:typecast_int),
|
|
32
|
+
'double'=>method(:typecast_float),
|
|
33
|
+
'decimal'=>method(:typecast_decimal),
|
|
34
|
+
'timestamp'=>method(:typecast_timestamp),
|
|
35
|
+
}
|
|
36
|
+
TYPECAST_MAP['tinyint'] = TYPECAST_MAP['smallint'] = TYPECAST_MAP['bigint'] = TYPECAST_MAP['int']
|
|
37
|
+
TYPECAST_MAP['float'] = TYPECAST_MAP['double']
|
|
38
|
+
TYPECAST_MAP.freeze
|
|
39
|
+
|
|
40
|
+
NULL = 'NULL'.freeze
|
|
41
|
+
|
|
42
|
+
attr_reader :typecast_map
|
|
43
|
+
|
|
44
|
+
attr_reader :handle
|
|
45
|
+
|
|
46
|
+
def initialize(handle, service, options = {})
|
|
47
|
+
@handle = handle
|
|
48
|
+
@service = service
|
|
49
|
+
|
|
50
|
+
@row_buffer = []
|
|
51
|
+
@done = false
|
|
52
|
+
@open = true
|
|
53
|
+
@typecast_map = TYPECAST_MAP.dup
|
|
54
|
+
@options = options.dup
|
|
55
|
+
@progress_reporter = ProgressReporter.new(self, @options)
|
|
56
|
+
@poll_every = options.fetch(:poll_every, 0.1)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def columns
|
|
60
|
+
@columns ||= metadata.schema.fieldSchemas.map(&:name)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def inspect
|
|
64
|
+
"#<#{self.class}#{handle ? " QueryID: #{handle.id}" : ''}#{open? ? '' : ' (CLOSED)'}>"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def each
|
|
68
|
+
while row = fetch_row
|
|
69
|
+
yield row
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Returns the next available row as a hash, or nil if there are none left.
|
|
74
|
+
# @return [Hash, nil] the next available row, or nil if there are none
|
|
75
|
+
# left
|
|
76
|
+
# @see #fetch_all
|
|
77
|
+
def fetch_row
|
|
78
|
+
if @row_buffer.empty?
|
|
79
|
+
if @done
|
|
80
|
+
return nil
|
|
81
|
+
else
|
|
82
|
+
fetch_more
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
@row_buffer.shift
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Returns all the remaining rows in the result set.
|
|
90
|
+
# @return [Array<Hash>] the remaining rows in the result set
|
|
91
|
+
# @see #fetch_one
|
|
92
|
+
def fetch_all
|
|
93
|
+
self.to_a
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Close the cursor on the remote server. Once a cursor is closed, you
|
|
97
|
+
# can no longer fetch any rows from it.
|
|
98
|
+
def close
|
|
99
|
+
@open = false
|
|
100
|
+
@service.close(@handle)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Returns true if the cursor is still open.
|
|
104
|
+
def open?
|
|
105
|
+
@open
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Returns true if the query is done running, and results can be fetched.
|
|
109
|
+
def query_done?
|
|
110
|
+
[
|
|
111
|
+
Protocol::Beeswax::QueryState::EXCEPTION,
|
|
112
|
+
Protocol::Beeswax::QueryState::FINISHED
|
|
113
|
+
].include?(@service.get_state(@handle))
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Blocks until the query done running.
|
|
117
|
+
def wait!
|
|
118
|
+
until query_done?
|
|
119
|
+
periodic_callback
|
|
120
|
+
sleep @poll_every
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Returns true if there are any more rows to fetch.
|
|
125
|
+
def has_more?
|
|
126
|
+
!@done || !@row_buffer.empty?
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def runtime_profile
|
|
130
|
+
@service.GetRuntimeProfile(@handle)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def exec_summary
|
|
134
|
+
@service.GetExecSummary(@handle)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Returns the progress for the query.
|
|
138
|
+
def progress
|
|
139
|
+
summary = exec_summary
|
|
140
|
+
summary.progress.num_completed_scan_ranges.to_f / summary.progress.total_scan_ranges.to_f
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
private
|
|
144
|
+
|
|
145
|
+
attr :progress_reporter
|
|
146
|
+
|
|
147
|
+
def periodic_callback
|
|
148
|
+
return unless progress_reporter.show?
|
|
149
|
+
progress_reporter.report
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def metadata
|
|
153
|
+
@metadata ||= @service.get_results_metadata(@handle)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def fetch_more
|
|
157
|
+
fetch_batch until @done || @row_buffer.count >= BUFFER_SIZE
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def exceptional?
|
|
161
|
+
@service.get_state(@handle) == Protocol::Beeswax::QueryState::EXCEPTION
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def fetch_batch
|
|
165
|
+
raise CursorError.new("Cursor has expired or been closed") unless @open
|
|
166
|
+
raise ConnectionError.new("The query was aborted") if exceptional?
|
|
167
|
+
|
|
168
|
+
begin
|
|
169
|
+
res = @service.fetch(@handle, false, BUFFER_SIZE)
|
|
170
|
+
rescue Protocol::Beeswax::BeeswaxException
|
|
171
|
+
@open = false
|
|
172
|
+
raise CursorError.new("Cursor has expired or been closed")
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
rows = res.data.map { |raw| parse_row(raw) }
|
|
176
|
+
@row_buffer.concat(rows)
|
|
177
|
+
|
|
178
|
+
unless res.has_more
|
|
179
|
+
@done = true
|
|
180
|
+
close
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def parse_row(raw)
|
|
185
|
+
row = {}
|
|
186
|
+
fields = raw.split(metadata.delim)
|
|
187
|
+
|
|
188
|
+
row_convertor.each do |c, p, i|
|
|
189
|
+
v = fields[i]
|
|
190
|
+
row[c] = (p ? p.call(v) : v unless v == NULL)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
row
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def row_convertor
|
|
197
|
+
@row_convertor ||= columns.zip(metadata.schema.fieldSchemas.map{|s| typecast_map[s.type]}, (0...(columns.length)).to_a)
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|