sequel_impala 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +50 -0
  3. data/LICENSE +463 -0
  4. data/README.md +45 -0
  5. data/Rakefile +39 -0
  6. data/lib/driver/commons-collections-3.2.1.jar +0 -0
  7. data/lib/driver/commons-configuration-1.10.jar +0 -0
  8. data/lib/driver/commons-logging-1.2.jar +0 -0
  9. data/lib/driver/hadoop-auth-2.9.0.jar +0 -0
  10. data/lib/driver/hadoop-common-2.9.0.jar +0 -0
  11. data/lib/driver/hadoop-core-2.6.0.jar +0 -0
  12. data/lib/driver/hive-exec-1.1.0.jar +0 -0
  13. data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
  14. data/lib/driver/hive-metastore-1.1.0.jar +0 -0
  15. data/lib/driver/hive-service-1.1.0.jar +0 -0
  16. data/lib/driver/httpclient-4.3.jar +0 -0
  17. data/lib/driver/httpcore-4.3.jar +0 -0
  18. data/lib/driver/libfb303-0.9.0.jar +0 -0
  19. data/lib/driver/log4j-1.2.17.jar +0 -0
  20. data/lib/driver/slf4j-api-1.7.5.jar +0 -0
  21. data/lib/driver/stax2-api-3.1.4.jar +0 -0
  22. data/lib/driver/woodstox-core-asl-4.4.1.jar +0 -0
  23. data/lib/impala.rb +55 -0
  24. data/lib/impala/connection.rb +180 -0
  25. data/lib/impala/cursor.rb +200 -0
  26. data/lib/impala/progress_reporter.rb +40 -0
  27. data/lib/impala/protocol.rb +8 -0
  28. data/lib/impala/protocol/beeswax_constants.rb +15 -0
  29. data/lib/impala/protocol/beeswax_service.rb +747 -0
  30. data/lib/impala/protocol/beeswax_types.rb +193 -0
  31. data/lib/impala/protocol/exec_stats_constants.rb +13 -0
  32. data/lib/impala/protocol/exec_stats_types.rb +133 -0
  33. data/lib/impala/protocol/facebook_service.rb +706 -0
  34. data/lib/impala/protocol/fb303_constants.rb +15 -0
  35. data/lib/impala/protocol/fb303_types.rb +25 -0
  36. data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
  37. data/lib/impala/protocol/hive_metastore_types.rb +698 -0
  38. data/lib/impala/protocol/impala_hive_server2_service.rb +137 -0
  39. data/lib/impala/protocol/impala_service.rb +443 -0
  40. data/lib/impala/protocol/impala_service_constants.rb +13 -0
  41. data/lib/impala/protocol/impala_service_types.rb +192 -0
  42. data/lib/impala/protocol/status_constants.rb +13 -0
  43. data/lib/impala/protocol/status_types.rb +46 -0
  44. data/lib/impala/protocol/t_c_l_i_service.rb +1108 -0
  45. data/lib/impala/protocol/t_c_l_i_service_constants.rb +72 -0
  46. data/lib/impala/protocol/t_c_l_i_service_types.rb +1802 -0
  47. data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
  48. data/lib/impala/protocol/types_constants.rb +13 -0
  49. data/lib/impala/protocol/types_types.rb +332 -0
  50. data/lib/impala/sasl_transport.rb +117 -0
  51. data/lib/impala/thrift_patch.rb +31 -0
  52. data/lib/impala/version.rb +3 -0
  53. data/lib/jdbc/hive2.rb +52 -0
  54. data/lib/jdbc/impala.rb +50 -0
  55. data/lib/rbhive.rb +8 -0
  56. data/lib/rbhive/connection.rb +150 -0
  57. data/lib/rbhive/explain_result.rb +46 -0
  58. data/lib/rbhive/result_set.rb +37 -0
  59. data/lib/rbhive/schema_definition.rb +86 -0
  60. data/lib/rbhive/t_c_l_i_connection.rb +466 -0
  61. data/lib/rbhive/t_c_l_i_result_set.rb +3 -0
  62. data/lib/rbhive/t_c_l_i_schema_definition.rb +87 -0
  63. data/lib/rbhive/table_schema.rb +122 -0
  64. data/lib/rbhive/version.rb +3 -0
  65. data/lib/sequel/adapters/impala.rb +220 -0
  66. data/lib/sequel/adapters/jdbc/hive2.rb +36 -0
  67. data/lib/sequel/adapters/jdbc/impala.rb +38 -0
  68. data/lib/sequel/adapters/rbhive.rb +177 -0
  69. data/lib/sequel/adapters/shared/impala.rb +808 -0
  70. data/lib/sequel/extensions/csv_to_parquet.rb +166 -0
  71. data/lib/thrift/facebook_service.rb +700 -0
  72. data/lib/thrift/fb303_constants.rb +9 -0
  73. data/lib/thrift/fb303_types.rb +19 -0
  74. data/lib/thrift/hive_metastore_constants.rb +41 -0
  75. data/lib/thrift/hive_metastore_types.rb +630 -0
  76. data/lib/thrift/hive_service_constants.rb +13 -0
  77. data/lib/thrift/hive_service_types.rb +72 -0
  78. data/lib/thrift/queryplan_constants.rb +13 -0
  79. data/lib/thrift/queryplan_types.rb +261 -0
  80. data/lib/thrift/sasl_client_transport.rb +161 -0
  81. data/lib/thrift/serde_constants.rb +92 -0
  82. data/lib/thrift/serde_types.rb +7 -0
  83. data/lib/thrift/t_c_l_i_service.rb +1054 -0
  84. data/lib/thrift/t_c_l_i_service_constants.rb +72 -0
  85. data/lib/thrift/t_c_l_i_service_types.rb +1768 -0
  86. data/lib/thrift/thrift_hive.rb +508 -0
  87. data/lib/thrift/thrift_hive_metastore.rb +3856 -0
  88. data/spec/database_test.rb +56 -0
  89. data/spec/dataset_test.rb +1268 -0
  90. data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
  91. data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
  92. data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  93. data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  94. data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
  95. data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
  96. data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
  97. data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
  98. data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
  99. data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
  100. data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
  101. data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
  102. data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  103. data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
  104. data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
  105. data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
  106. data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
  107. data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
  108. data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  109. data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
  110. data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  111. data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
  112. data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
  113. data/spec/files/reversible_migrations/001_reversible.rb +5 -0
  114. data/spec/files/reversible_migrations/002_reversible.rb +5 -0
  115. data/spec/files/reversible_migrations/003_reversible.rb +5 -0
  116. data/spec/files/reversible_migrations/004_reversible.rb +5 -0
  117. data/spec/files/reversible_migrations/005_reversible.rb +10 -0
  118. data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
  119. data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
  120. data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
  121. data/spec/impala_test.rb +290 -0
  122. data/spec/migrator_test.rb +240 -0
  123. data/spec/plugin_test.rb +91 -0
  124. data/spec/prepared_statement_test.rb +327 -0
  125. data/spec/schema_test.rb +356 -0
  126. data/spec/spec_helper.rb +19 -0
  127. data/spec/timezone_test.rb +86 -0
  128. data/spec/type_test.rb +99 -0
  129. metadata +294 -0
@@ -0,0 +1,45 @@
1
+ # sequel_impala
2
+
3
+ sequel_impala adds support for Sequel to connect to the Impala database
4
+ via the included impala driver, and the included jdbc-hive2 driver under JRuby.
5
+
6
+ # Source Code
7
+
8
+ Source code is available on GitHub at https://github.com/outcomesinsights/sequel_impala
9
+
10
+ # Usage
11
+
12
+ After installation, Sequel will automatically pick up the adapter as long as
13
+ the lib directory is in RUBYLIB, if you use a connection string starting with
14
+ `impala`, or `jdbc:hive2` on JRuby.
15
+
16
+ # Connection Strings
17
+
18
+ If using the impala driver (default host is localhost, default port is 21000):
19
+
20
+ impala://host:port
21
+
22
+ If using the jdbc:hive2 driver on JRuby (port 21050 works in testing):
23
+
24
+ jdbc:hive2://host:port/;auth=noSasl
25
+
26
+ # Dependencies
27
+
28
+ * sequel 4+
29
+ * thrift gem
30
+
31
+ # License
32
+
33
+ MIT/Apache
34
+
35
+ # Author
36
+
37
+ Ryan Duryea <aguynamedryan@gmail.com>
38
+
39
+ Work on sequel_impala is generously funded by [Outcomes Insights, Inc.](http://outins.com)
40
+
41
+ # Previous Author
42
+
43
+ Jeremy Evans <code@jeremyevans.net>
44
+
45
+ Provided initial work on this gem, and continues to maintain [Sequel](http://sequel.jeremyevans.net/). We can't thank you enough!
@@ -0,0 +1,39 @@
1
+ require "rake"
2
+ require "rake/clean"
3
+
4
+ CLEAN.include ["sequel_impala-*.gem", "rdoc"]
5
+
6
+ desc "Build sequel_impala gem"
7
+ task :package=>[:clean] do |p|
8
+ sh %{#{FileUtils::RUBY} -S gem build sequel_impala.gemspec}
9
+ end
10
+
11
+ ### Specs
12
+
13
+ desc "Run specs"
14
+ task "spec" do
15
+ sh "#{FileUtils::RUBY} -rubygems -I lib -e 'ARGV.each{|f| require f}' ./spec/*_test.rb"
16
+ end
17
+
18
+ task :default => :spec
19
+
20
+ ### RDoc
21
+
22
+ RDOC_DEFAULT_OPTS = ["--quiet", "--line-numbers", "--inline-source", '--title', 'sequel_impala: Sequel support for Impala database']
23
+
24
+ begin
25
+ gem 'rdoc'
26
+ gem 'hanna-nouveau'
27
+ RDOC_DEFAULT_OPTS.concat(['-f', 'hanna'])
28
+ rescue Gem::LoadError
29
+ end
30
+
31
+ RDOC_OPTS = RDOC_DEFAULT_OPTS + ['--main', 'README.rdoc']
32
+
33
+ require 'rdoc/task'
34
+ RDoc::Task.new do |rdoc|
35
+ rdoc.rdoc_dir = "rdoc"
36
+ rdoc.options += RDOC_OPTS
37
+ rdoc.rdoc_files.add %w"README.rdoc CHANGELOG LICENSE lib/**/*.rb"
38
+ end
39
+
@@ -0,0 +1,55 @@
1
+
2
+ # the generated ruby files use a relative require, so we need to add the
3
+ # generated directory to $LOAD_PATH
4
+ this_dir = File.expand_path(File.dirname(__FILE__))
5
+ gen_dir = File.join(this_dir, 'impala/protocol')
6
+ $LOAD_PATH.push(gen_dir) unless $LOAD_PATH.include?(gen_dir)
7
+
8
+ require 'impala/version'
9
+
10
+ require 'thrift'
11
+ require 'time'
12
+ require 'impala/protocol'
13
+ begin
14
+ require 'impala/sasl_transport'
15
+ rescue LoadError
16
+ # gssapi not supported by operating system, continue as impala adapter
17
+ # can be used in buffered (non-SASL) mode.
18
+ end
19
+ require 'impala/progress_reporter'
20
+ require 'impala/cursor'
21
+ require 'impala/connection'
22
+ require 'impala/thrift_patch'
23
+
24
+ module Impala
25
+ DEFAULT_HOST = 'localhost'
26
+ DEFAULT_PORT = 21000
27
+ class InvalidQueryError < StandardError; end
28
+ class ConnectionError < StandardError; end
29
+ class CursorError < StandardError; end
30
+ class ParsingError < StandardError; end
31
+
32
+ # Connect to an Impala server. If a block is given, it will close the
33
+ # connection after yielding the connection to the block.
34
+ # @param [String] host the hostname or IP address of the Impala server
35
+ # @param [int] port the port that the Impala server is listening on
36
+ # @yieldparam [Connection] conn the open connection. Will be closed once the block
37
+ # finishes
38
+ # @return [Connection] the open connection, or, if a block is
39
+ # passed, the return value of the block
40
+ def self.connect(host=DEFAULT_HOST, port=DEFAULT_PORT, options={})
41
+ connection = Connection.new(host, port, options)
42
+
43
+ if block_given?
44
+ begin
45
+ ret = yield connection
46
+ ensure
47
+ connection.close
48
+ end
49
+ else
50
+ ret = connection
51
+ end
52
+
53
+ ret
54
+ end
55
+ end
@@ -0,0 +1,180 @@
1
+ module Impala
2
+ # This object represents a connection to an Impala server. It can be used to
3
+ # perform queries on the database.
4
+ class Connection
5
+ attr_accessor :host, :port
6
+
7
+ # Don't instantiate Connections directly; instead, use {Impala.connect}.
8
+ def initialize(host, port, options={})
9
+ @host = host
10
+ @port = port
11
+ @connected = false
12
+ @options = options.dup
13
+ @options[:transport] ||= :buffered
14
+ @loggers = @options.fetch(:loggers, [])
15
+ open
16
+ end
17
+
18
+ def inspect
19
+ "#<#{self.class} #{@host}:#{@port}#{open? ? '' : ' (DISCONNECTED)'}>"
20
+ end
21
+
22
+ # Open the connection if it's currently closed.
23
+ def open
24
+ return if @connected
25
+
26
+ @transport = thrift_transport(host, port)
27
+ @transport.open do |transport|
28
+ enable_keepalive(transport)
29
+ end
30
+
31
+ proto = Thrift::BinaryProtocol.new(@transport)
32
+ @service = Protocol::ImpalaService::Client.new(proto)
33
+ @connected = true
34
+ end
35
+
36
+ def thrift_transport(server, port)
37
+ socket = thrift_socket(server, port, @options[:timeout])
38
+
39
+ case @options[:transport]
40
+ when :buffered
41
+ return Thrift::BufferedTransport.new(socket)
42
+ when :sasl
43
+ opts = parse_sasl_params(@options[:sasl_params])
44
+ mechanism = opts.delete(:mechanism)
45
+ return SASLTransport.new(socket, mechanism, opts)
46
+ else
47
+ raise "Unrecognised transport type '#{@options[:transport]}'"
48
+ end
49
+ end
50
+
51
+ def thrift_socket(server, port, timeout)
52
+ socket = Thrift::Socket.new(server, port)
53
+ socket.timeout = timeout
54
+ socket
55
+ end
56
+
57
+ # Processes SASL connection params and returns a hash with symbol keys or a nil
58
+ def parse_sasl_params(sasl_params)
59
+ # Symbilize keys in a hash
60
+ if sasl_params.kind_of?(Hash)
61
+ return sasl_params.inject({}) do |memo,(k,v)|
62
+ memo[k.to_sym] = v;
63
+ memo
64
+ end
65
+ end
66
+ return nil
67
+ end
68
+
69
+ # Close this connection. It can still be reopened with {#open}.
70
+ def close
71
+ return unless @connected
72
+
73
+ @transport.close
74
+ @connected = false
75
+ end
76
+
77
+ # Returns true if the connection is currently open.
78
+ def open?
79
+ @connected
80
+ end
81
+
82
+ # Refresh the metadata store.
83
+ def refresh
84
+ raise ConnectionError.new("Connection closed") unless open?
85
+ @service.ResetCatalog
86
+ end
87
+
88
+ # Perform a query and return all the results. This will
89
+ # load the entire result set into memory, so if you're dealing with lots
90
+ # of rows, {#execute} may work better.
91
+ # @param [String] query the query you want to run
92
+ # @param [Hash] query_options the options to set user and configuration
93
+ # except for :user, see TImpalaQueryOptions in ImpalaService.thrift
94
+ # @option query_options [String] :user the user runs the query
95
+ # @return [Array<Hash>] an array of hashes, one for each row.
96
+ def query(raw_query, query_options = {})
97
+ execute(raw_query, query_options).fetch_all
98
+ end
99
+
100
+ # Perform a query and return a cursor for iterating over the results.
101
+ # @param [String] query the query you want to run
102
+ # @param [Hash] query_options the options to set user and configuration
103
+ # except for :user, see TImpalaQueryOptions in ImpalaService.thrift
104
+ # @option query_options [String] :user the user runs the query
105
+ # @return [Cursor] a cursor for the result rows
106
+ def execute(raw_query, query_options = {})
107
+ raise ConnectionError.new("Connection closed") unless open?
108
+
109
+ query = sanitize_query(raw_query)
110
+ handle = send_query(query, query_options)
111
+
112
+ cursor = Cursor.new(handle, @service, @options)
113
+ cursor.wait!
114
+ cursor
115
+ end
116
+
117
+ def close_handle(handle)
118
+ @service.close(handle)
119
+ end
120
+
121
+ private
122
+
123
+ def sanitize_query(raw_query)
124
+ words = raw_query.split
125
+ raise InvalidQueryError.new("Empty query") if words.empty?
126
+
127
+ command = words.first.downcase
128
+ ([command] + words[1..-1]).join(' ')
129
+ end
130
+
131
+ def send_query(sanitized_query, query_options)
132
+ query = Protocol::Beeswax::Query.new
133
+ query.query = sanitized_query
134
+
135
+ query.hadoop_user = query_options.delete(:user) if query_options[:user]
136
+ query.configuration = query_options.map do |key, value|
137
+ "#{key.upcase}=#{value}"
138
+ end
139
+
140
+ @service.query(query)
141
+ end
142
+
143
+ def enable_keepalive(transport)
144
+ s = transport.handle
145
+ log_debug("Enabling KEEPALIVE...")
146
+ s.setsockopt(::Socket::SOL_SOCKET, ::Socket::SO_KEEPALIVE, true)
147
+
148
+ # Apparently Mac OS X (Darwin) doesn't implement the SOL_TCP options below
149
+ # so we'll hope keep alive works under Mac OS X, but in production
150
+ # we Dockerize Jigsaw, so these options should be available when
151
+ # we're running on Linux
152
+ if defined?(::Socket::SOL_TCP)
153
+ opts = {}
154
+
155
+ if defined?(::Socket::TCP_KEEPIDLE)
156
+ opts[::Socket::TCP_KEEPIDLE] = 60
157
+ end
158
+
159
+ if defined?(::Socket::TCP_KEEPINTVL)
160
+ opts[::Socket::TCP_KEEPINTVL] = 10
161
+ end
162
+
163
+ if defined?(::Socket::TCP_KEEPCNT)
164
+ opts[::Socket::TCP_KEEPCNT] = 5
165
+ end
166
+
167
+ log_debug("Also enabling: #{opts.inspect}")
168
+ opts.each do |opt, value|
169
+ s.setsockopt(::Socket::SOL_TCP, opt, value)
170
+ end
171
+ end
172
+ end
173
+
174
+ def log_debug(message)
175
+ @loggers.each do |logger|
176
+ logger.debug(message)
177
+ end
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,200 @@
1
+ module Impala
2
+ # Cursors are used to iterate over result sets without loading them all
3
+ # into memory at once. This can be useful if you're dealing with lots of
4
+ # rows. It implements Enumerable, so you can use each/select/map/etc.
5
+ class Cursor
6
+ BUFFER_SIZE = 1024
7
+ include Enumerable
8
+
9
+ def self.typecast_boolean(value)
10
+ value == 'true'
11
+ end
12
+
13
+ def self.typecast_int(value)
14
+ value.to_i
15
+ end
16
+
17
+ def self.typecast_float(value)
18
+ value.to_f
19
+ end
20
+
21
+ def self.typecast_decimal(value)
22
+ BigDecimal.new(value)
23
+ end
24
+
25
+ def self.typecast_timestamp(value)
26
+ Time.parse(value)
27
+ end
28
+
29
+ TYPECAST_MAP = {
30
+ 'boolean'=>method(:typecast_boolean),
31
+ 'int'=>method(:typecast_int),
32
+ 'double'=>method(:typecast_float),
33
+ 'decimal'=>method(:typecast_decimal),
34
+ 'timestamp'=>method(:typecast_timestamp),
35
+ }
36
+ TYPECAST_MAP['tinyint'] = TYPECAST_MAP['smallint'] = TYPECAST_MAP['bigint'] = TYPECAST_MAP['int']
37
+ TYPECAST_MAP['float'] = TYPECAST_MAP['double']
38
+ TYPECAST_MAP.freeze
39
+
40
+ NULL = 'NULL'.freeze
41
+
42
+ attr_reader :typecast_map
43
+
44
+ attr_reader :handle
45
+
46
+ def initialize(handle, service, options = {})
47
+ @handle = handle
48
+ @service = service
49
+
50
+ @row_buffer = []
51
+ @done = false
52
+ @open = true
53
+ @typecast_map = TYPECAST_MAP.dup
54
+ @options = options.dup
55
+ @progress_reporter = ProgressReporter.new(self, @options)
56
+ @poll_every = options.fetch(:poll_every, 0.1)
57
+ end
58
+
59
+ def columns
60
+ @columns ||= metadata.schema.fieldSchemas.map(&:name)
61
+ end
62
+
63
+ def inspect
64
+ "#<#{self.class}#{handle ? " QueryID: #{handle.id}" : ''}#{open? ? '' : ' (CLOSED)'}>"
65
+ end
66
+
67
+ def each
68
+ while row = fetch_row
69
+ yield row
70
+ end
71
+ end
72
+
73
+ # Returns the next available row as a hash, or nil if there are none left.
74
+ # @return [Hash, nil] the next available row, or nil if there are none
75
+ # left
76
+ # @see #fetch_all
77
+ def fetch_row
78
+ if @row_buffer.empty?
79
+ if @done
80
+ return nil
81
+ else
82
+ fetch_more
83
+ end
84
+ end
85
+
86
+ @row_buffer.shift
87
+ end
88
+
89
+ # Returns all the remaining rows in the result set.
90
+ # @return [Array<Hash>] the remaining rows in the result set
91
+ # @see #fetch_one
92
+ def fetch_all
93
+ self.to_a
94
+ end
95
+
96
+ # Close the cursor on the remote server. Once a cursor is closed, you
97
+ # can no longer fetch any rows from it.
98
+ def close
99
+ @open = false
100
+ @service.close(@handle)
101
+ end
102
+
103
+ # Returns true if the cursor is still open.
104
+ def open?
105
+ @open
106
+ end
107
+
108
+ # Returns true if the query is done running, and results can be fetched.
109
+ def query_done?
110
+ [
111
+ Protocol::Beeswax::QueryState::EXCEPTION,
112
+ Protocol::Beeswax::QueryState::FINISHED
113
+ ].include?(@service.get_state(@handle))
114
+ end
115
+
116
+ # Blocks until the query done running.
117
+ def wait!
118
+ until query_done?
119
+ periodic_callback
120
+ sleep @poll_every
121
+ end
122
+ end
123
+
124
+ # Returns true if there are any more rows to fetch.
125
+ def has_more?
126
+ !@done || !@row_buffer.empty?
127
+ end
128
+
129
+ def runtime_profile
130
+ @service.GetRuntimeProfile(@handle)
131
+ end
132
+
133
+ def exec_summary
134
+ @service.GetExecSummary(@handle)
135
+ end
136
+
137
+ # Returns the progress for the query.
138
+ def progress
139
+ summary = exec_summary
140
+ summary.progress.num_completed_scan_ranges.to_f / summary.progress.total_scan_ranges.to_f
141
+ end
142
+
143
+ private
144
+
145
+ attr :progress_reporter
146
+
147
+ def periodic_callback
148
+ return unless progress_reporter.show?
149
+ progress_reporter.report
150
+ end
151
+
152
+ def metadata
153
+ @metadata ||= @service.get_results_metadata(@handle)
154
+ end
155
+
156
+ def fetch_more
157
+ fetch_batch until @done || @row_buffer.count >= BUFFER_SIZE
158
+ end
159
+
160
+ def exceptional?
161
+ @service.get_state(@handle) == Protocol::Beeswax::QueryState::EXCEPTION
162
+ end
163
+
164
+ def fetch_batch
165
+ raise CursorError.new("Cursor has expired or been closed") unless @open
166
+ raise ConnectionError.new("The query was aborted") if exceptional?
167
+
168
+ begin
169
+ res = @service.fetch(@handle, false, BUFFER_SIZE)
170
+ rescue Protocol::Beeswax::BeeswaxException
171
+ @open = false
172
+ raise CursorError.new("Cursor has expired or been closed")
173
+ end
174
+
175
+ rows = res.data.map { |raw| parse_row(raw) }
176
+ @row_buffer.concat(rows)
177
+
178
+ unless res.has_more
179
+ @done = true
180
+ close
181
+ end
182
+ end
183
+
184
+ def parse_row(raw)
185
+ row = {}
186
+ fields = raw.split(metadata.delim)
187
+
188
+ row_convertor.each do |c, p, i|
189
+ v = fields[i]
190
+ row[c] = (p ? p.call(v) : v unless v == NULL)
191
+ end
192
+
193
+ row
194
+ end
195
+
196
+ def row_convertor
197
+ @row_convertor ||= columns.zip(metadata.schema.fieldSchemas.map{|s| typecast_map[s.type]}, (0...(columns.length)).to_a)
198
+ end
199
+ end
200
+ end