sequel-impala 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +45 -0
  3. data/lib/impala.rb +14 -6
  4. data/lib/impala/connection.rb +46 -23
  5. data/lib/impala/cursor.rb +48 -4
  6. data/lib/impala/progress_reporter.rb +40 -0
  7. data/lib/impala/protocol/beeswax_constants.rb +1 -1
  8. data/lib/impala/protocol/beeswax_service.rb +1 -20
  9. data/lib/impala/protocol/beeswax_types.rb +1 -1
  10. data/lib/impala/protocol/exec_stats_constants.rb +13 -0
  11. data/lib/impala/protocol/exec_stats_types.rb +133 -0
  12. data/lib/impala/protocol/facebook_service.rb +3 -3
  13. data/lib/impala/protocol/fb303_constants.rb +1 -1
  14. data/lib/impala/protocol/fb303_types.rb +1 -1
  15. data/lib/impala/protocol/hive_metastore_constants.rb +1 -1
  16. data/lib/impala/protocol/hive_metastore_types.rb +1 -1
  17. data/lib/impala/protocol/impala_hive_server2_service.rb +111 -3
  18. data/lib/impala/protocol/impala_service.rb +67 -1
  19. data/lib/impala/protocol/impala_service_constants.rb +1 -1
  20. data/lib/impala/protocol/impala_service_types.rb +109 -7
  21. data/lib/impala/protocol/status_constants.rb +1 -1
  22. data/lib/impala/protocol/status_types.rb +1 -1
  23. data/lib/impala/protocol/t_c_l_i_service.rb +884 -724
  24. data/lib/impala/protocol/t_c_l_i_service_constants.rb +72 -0
  25. data/lib/impala/protocol/t_c_l_i_service_types.rb +1799 -0
  26. data/lib/impala/protocol/thrift_hive_metastore.rb +1 -1
  27. data/lib/impala/protocol/types_constants.rb +13 -0
  28. data/lib/impala/protocol/types_types.rb +332 -0
  29. data/lib/impala/sasl_transport.rb +117 -0
  30. data/lib/impala/thrift_patch.rb +42 -0
  31. data/lib/rbhive/connection.rb +25 -25
  32. data/lib/rbhive/explain_result.rb +9 -9
  33. data/lib/rbhive/schema_definition.rb +12 -12
  34. data/lib/rbhive/t_c_l_i_connection.rb +28 -26
  35. data/lib/rbhive/t_c_l_i_schema_definition.rb +1 -1
  36. data/lib/rbhive/table_schema.rb +1 -1
  37. data/lib/sequel/adapters/impala.rb +63 -6
  38. data/lib/sequel/adapters/jdbc/hive2.rb +1 -1
  39. data/lib/sequel/adapters/rbhive.rb +3 -2
  40. data/lib/sequel/adapters/shared/impala.rb +133 -25
  41. data/lib/thrift/sasl_client_transport.rb +2 -2
  42. data/lib/thrift/thrift_hive.rb +2 -2
  43. data/lib/thrift/thrift_hive_metastore.rb +2 -2
  44. data/spec/dataset_test.rb +85 -85
  45. data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +1 -1
  46. data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +1 -1
  47. data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +1 -1
  48. data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +1 -1
  49. data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +1 -1
  50. data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +1 -1
  51. data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +1 -1
  52. data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +1 -1
  53. data/spec/files/integer_migrations/001_create_sessions.rb +1 -1
  54. data/spec/files/integer_migrations/002_create_nodes.rb +1 -1
  55. data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +1 -1
  56. data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +1 -1
  57. data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +1 -1
  58. data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +1 -1
  59. data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +1 -1
  60. data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +1 -1
  61. data/spec/migrator_test.rb +2 -2
  62. data/spec/prepared_statement_test.rb +12 -12
  63. data/spec/schema_test.rb +6 -6
  64. data/spec/type_test.rb +8 -8
  65. metadata +30 -11
  66. data/CHANGELOG +0 -19
  67. data/lib/impala/protocol/cli_service_constants.rb +0 -60
  68. data/lib/impala/protocol/cli_service_types.rb +0 -1452
@@ -48,7 +48,7 @@ module RBHive
48
48
  end
49
49
 
50
50
  def column_type_map
51
- @column_type_map ||= column_names.inject({}) do |hsh, c|
51
+ @column_type_map ||= column_names.inject({}) do |hsh, c|
52
52
  definition = @schema.columns.find {|s| s.columnName.to_sym == c }
53
53
  # If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
54
54
  type = TYPE_NAMES[definition.typeDesc.types.first.primitiveEntry.type].downcase rescue nil
@@ -119,4 +119,4 @@ module RBHive
119
119
  end
120
120
  end
121
121
  end
122
- end
122
+ end
@@ -8,7 +8,10 @@ module Sequel
8
8
 
9
9
  # Exception classes used by Impala.
10
10
  ImpalaExceptions = [
11
- ::Impala::Error,
11
+ ::Impala::InvalidQueryError,
12
+ ::Impala::ConnectionError,
13
+ ::Impala::CursorError,
14
+ ::Impala::ParsingError,
12
15
  ::Impala::Protocol::Beeswax::BeeswaxException,
13
16
  ::Thrift::TransportException,
14
17
  IOError
@@ -21,11 +24,12 @@ module Sequel
21
24
 
22
25
  set_adapter_scheme :impala
23
26
 
27
+
24
28
  # Connect to the Impala server. Currently, only the :host and :port options
25
29
  # are respected, and they default to 'localhost' and 21000, respectively.
26
30
  def connect(server)
27
31
  opts = server_opts(server)
28
- ::Impala.connect(opts[:host]||'localhost', (opts[:port]||21000).to_i)
32
+ ::Impala.connect(opts[:host]||'localhost', (opts[:port]||21000).to_i, opts)
29
33
  end
30
34
 
31
35
  def database_error_classes
@@ -40,19 +44,62 @@ module Sequel
40
44
  def execute(sql, opts=OPTS)
41
45
  synchronize(opts[:server]) do |c|
42
46
  begin
43
- cursor = log_yield(sql){c.execute(sql)}
47
+ cursor = record_query_id(opts) do
48
+ log_yield(sql) do
49
+ c.execute(sql){}
50
+ end
51
+ end
44
52
  yield cursor if block_given?
45
53
  nil
46
54
  rescue *ImpalaExceptions => e
55
+ puts $!.message
56
+ puts $!.backtrace.join("\n")
47
57
  raise_error(e)
58
+ rescue
59
+ puts $!.message
60
+ puts $!.backtrace.join("\n")
61
+ raise
48
62
  ensure
63
+ record_profile(cursor, opts)
49
64
  cursor.close if cursor && cursor.open?
50
65
  end
51
66
  end
52
67
  end
53
68
 
69
+ def profile_for(profile_name=:default)
70
+ Sequel.synchronize{@runtime_profiles[profile_name]}
71
+ end
72
+
73
+ def query_id_for(query_id_name=:default)
74
+ Sequel.synchronize{@query_ids[query_id_name]}
75
+ end
76
+
54
77
  private
55
78
 
79
+ def record_profile(cursor, opts)
80
+ if cursor && profile_name = opts[:profile_name]
81
+ profile = cursor.runtime_profile
82
+ Sequel.synchronize{@runtime_profiles[profile_name] = profile}
83
+ end
84
+ end
85
+
86
+ def record_query_id(opts = OPTS)
87
+ start = Time.now if opts[:query_id_name]
88
+
89
+ cursor = yield
90
+
91
+ if cursor && query_id_name = opts[:query_id_name]
92
+ Sequel.synchronize{ @query_ids[query_id_name] = { query_id: cursor.handle.id, start_time: start } }
93
+ end
94
+
95
+ cursor
96
+ end
97
+
98
+ def adapter_initialize
99
+ @runtime_profiles = {}
100
+ @query_ids = {}
101
+ end
102
+
56
103
  def connection_execute_method
57
104
  :query
58
105
  end
@@ -113,13 +160,23 @@ module Sequel
113
160
  STRING_ESCAPE_RE = /(#{Regexp.union(STRING_ESCAPES.keys)})/
114
161
 
115
162
  def fetch_rows(sql)
116
- execute(sql) do |cursor|
163
+ execute(sql, @opts) do |cursor|
117
164
  @columns = cursor.columns.map!{|c| output_identifier(c)}
118
165
  cursor.typecast_map['timestamp'] = db.method(:to_application_timestamp)
119
166
  cursor.each do |row|
120
167
  yield row
121
168
  end
122
169
  end
170
+
171
+ self
172
+ end
173
+
174
+ def profile(profile_name=:default)
175
+ clone(:profile_name => profile_name)
176
+ end
177
+
178
+ def query_id(query_id_name=:default)
179
+ clone(:query_id_name => query_id_name)
123
180
  end
124
181
 
125
182
  private
@@ -128,8 +185,8 @@ module Sequel
128
185
  # some values in string literals to get correct results, but not the
129
186
  # tab character or things break.
130
187
  def literal_string_append(sql, s)
131
- sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
188
+ sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
132
189
  end
133
190
  end
134
191
  end
135
- end
192
+ end
@@ -17,7 +17,7 @@ module Sequel
17
17
  extend Sequel::Database::ResetIdentifierMangling
18
18
  include Sequel::Impala::DatabaseMethods
19
19
  end
20
-
20
+
21
21
  class Dataset < JDBC::Dataset
22
22
  include Sequel::Impala::DatasetMethods
23
23
  end
@@ -74,6 +74,7 @@ module Sequel
74
74
  def execute(sql, opts=OPTS)
75
75
  synchronize(opts[:server]) do |c|
76
76
  begin
77
+ puts sql
77
78
  r = log_yield(sql){c.execute(sql)}
78
79
  yield(c, r) if block_given?
79
80
  nil
@@ -166,9 +167,9 @@ module Sequel
166
167
  private
167
168
 
168
169
  def literal_string_append(sql, s)
169
- sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
170
+ sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
170
171
  end
171
172
  end
172
173
  end
173
- end
174
+ end
174
175
 
@@ -1,6 +1,10 @@
1
1
  module Sequel
2
2
  module Impala
3
+ Sequel::Database.set_shared_adapter_scheme :impala, self
4
+
3
5
  module DatabaseMethods
6
+ extend Sequel::Database::ResetIdentifierMangling
7
+
4
8
  # Do not use a composite primary key, foreign keys, or an
5
9
  # index when creating a join table, as Impala doesn't support those.
6
10
  def create_join_table(hash, options=OPTS)
@@ -12,6 +16,14 @@ module Sequel
12
16
  end
13
17
  end
14
18
 
19
+ def refresh(table_name)
20
+ run(refresh_sql(table_name))
21
+ end
22
+
23
+ def compute_stats(table_name)
24
+ run(compute_stats_sql(table_name))
25
+ end
26
+
15
27
  # Create a database/schema in Imapala.
16
28
  #
17
29
  # Options:
@@ -98,7 +110,12 @@ module Sequel
98
110
  if schema = search_path_table_schemas[table]
99
111
  Sequel.qualify(schema, table)
100
112
  else
101
- Sequel.identifier(table)
113
+ invalidate_table_schemas
114
+ if schema = search_path_table_schemas[table]
115
+ Sequel.qualify(schema, table)
116
+ else
117
+ Sequel.identifier(table)
118
+ end
102
119
  end
103
120
  when SQL::Identifier
104
121
  implicit_qualify(table.value.to_s)
@@ -150,7 +167,7 @@ module Sequel
150
167
  # tables and not views. The Hive2 JDBC driver returns views when listing
151
168
  # tables and nothing when listing views.
152
169
  def tables(opts=OPTS)
153
- _tables(opts).select{|t| is_valid_table?(t)}
170
+ _tables(opts).select{|t| is_valid_table?(t, opts)}
154
171
  end
155
172
 
156
173
  # Impala doesn't support transactions, so instead of issuing a
@@ -166,7 +183,30 @@ module Sequel
166
183
  # Determine the available views for listing all tables via JDBC (which
167
184
  # includes both tables and views), and removing all valid tables.
168
185
  def views(opts=OPTS)
169
- _tables(opts).reject{|t| is_valid_table?(t)}
186
+ _tables(opts).reject{|t| is_valid_table?(t, opts)}
187
+ end
188
+
189
+ def invalidate_table_schemas
190
+ @search_path_table_schemas = nil
191
+ end
192
+
193
+ # Creates a dataset that uses the VALUES clause:
194
+ #
195
+ # DB.values([[1, 2], [3, 4]])
196
+ # VALUES ((1, 2), (3, 4))
197
+ def values(v)
198
+ @default_dataset.clone(:values=>v)
199
+ end
200
+
201
+ def invalidate_table_schemas
202
+ @search_path_table_schemas = nil
203
+ end
204
+
205
+ # Sets options in the current db connection for each key/value pair
206
+ def set(opts)
207
+ set_sql(opts).each do |sql|
208
+ run(sql)
209
+ end
170
210
  end
171
211
 
172
212
  private
@@ -251,6 +291,14 @@ module Sequel
251
291
  sql
252
292
  end
253
293
 
294
+ def refresh_sql(table_name)
295
+ "REFRESH #{quote_schema_table(table_name)}"
296
+ end
297
+
298
+ def compute_stats_sql(table_name)
299
+ "COMPUTE STATS #{quote_schema_table(table_name)}"
300
+ end
301
+
254
302
  def drop_schema_sql(schema, options)
255
303
  "DROP SCHEMA #{'IF EXISTS ' if options[:if_exists]}#{quote_identifier(schema)}"
256
304
  end
@@ -264,6 +312,10 @@ module Sequel
264
312
  nil
265
313
  end
266
314
 
315
+ def quote_identifiers_default
316
+ true
317
+ end
318
+
267
319
  def search_path_table_schemas
268
320
  @search_path_table_schemas ||= begin
269
321
  search_path = opts[:search_path]
@@ -280,7 +332,8 @@ module Sequel
280
332
 
281
333
  # SHOW TABLE STATS will raise an error if given a view and not a table,
282
334
  # so use that to differentiate tables from views.
283
- def is_valid_table?(t)
335
+ def is_valid_table?(t, opts=OPTS)
336
+ t = [opts[:schema], t].map(&:to_s).join('__').to_sym if opts[:schema]
284
337
  rows = describe(t, :formatted=>true)
285
338
  if row = rows.find{|r| r[:name].to_s.strip == 'Table Type:'}
286
339
  row[:type].to_s.strip !~ /VIEW/
@@ -302,6 +355,21 @@ module Sequel
302
355
  )
303
356
  end
304
357
 
358
+ # Impala doesn't like the word "integer"
359
+ def type_literal_generic_integer(column)
360
+ :int
361
+ end
362
+
363
+ # Impala doesn't like the word "biginteger"
364
+ def type_literal_generic_bignum_symbol(column)
365
+ :bigint
366
+ end
367
+
368
+ # Impala doesn't like the word "biginteger"
369
+ def type_literal_generic_bignum(column)
370
+ :bigint
371
+ end
372
+
305
373
  # Impala doesn't support date columns yet, so use timestamp until date
306
374
  # is natively supported.
307
375
  def type_literal_generic_date(column)
@@ -331,6 +399,10 @@ module Sequel
331
399
  :string
332
400
  end
333
401
  end
402
+
403
+ def set_sql(opts)
404
+ opts.map { |k, v| "SET #{k}=#{v}" }
405
+ end
334
406
  end
335
407
 
336
408
  module DatasetMethods
@@ -347,8 +419,10 @@ module Sequel
347
419
  NOT = 'NOT '.freeze
348
420
  REGEXP = ' REGEXP '.freeze
349
421
  EXCEPT_SOURCE_COLUMN = :__source__
422
+ EXCEPT_STRATEGIES = [:not_exists, :not_in, :left_join, :group_by].freeze
423
+ SELECT_VALUES = 'VALUES '.freeze
350
424
 
351
- Dataset.def_sql_method(self, :select, %w'with select distinct columns from join where group having compounds order limit')
425
+ Dataset.def_sql_method(self, :select, [['if opts[:values]', %w'values'], ['else', %w'with select distinct columns from join where group having compounds order limit']])
352
426
 
353
427
  # Handle string concatenation using the concat string function.
354
428
  # Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as
@@ -454,19 +528,48 @@ module Sequel
454
528
  get(Sequel::SQL::AliasedExpression.new(1, :one)).nil?
455
529
  end
456
530
 
457
- # Emulate INTERSECT using a UNION ALL and checking for values in only the first table.
531
+ # Emulate EXCEPT using a chosen strategy and checking for values in only the first table.
458
532
  def except(other, opts=OPTS)
459
533
  raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all]
460
534
  raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false
461
- cols = columns
462
- rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
463
- from_self.
464
- select_group(*cols).
465
- select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
466
- union(rhs, all: true).
467
- select_group(*cols).
468
- having{{count{}.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}.
469
- from_self(opts)
535
+
536
+ strategy, *keys = @opts[:except_strategy]
537
+ ds = from_self(:alias=>:t1)
538
+
539
+ ds = case strategy
540
+ when :not_exists
541
+ ds.exclude(other.
542
+ from_self(:alias=>:t2).
543
+ where(keys.map{|key| [Sequel.qualify(:t1, key), Sequel.qualify(:t2, key)]}).
544
+ select(nil).
545
+ exists)
546
+ when :not_in
547
+ raise Sequel::Error, ":not_in EXCEPT strategy only supports a single key" unless keys.length == 1
548
+ key = keys.first
549
+ ds.exclude(Sequel.qualify(:t1, key)=>other.from_self(:alias=>:t2).select(key))
550
+ when :left_join
551
+ ds.left_join(other.from_self(:alias=>:t2).as(:t2), keys.map{|key| [key, key]}).
552
+ where(Sequel.or(keys.map{|key| [Sequel.qualify(:t2, key), nil]})).
553
+ select_all(:t1)
554
+ else
555
+ cols = columns
556
+ rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
557
+ ds.select_group(*cols).
558
+ select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
559
+ union(rhs, all: true).
560
+ select_group(*cols).
561
+ having{{count{}.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}
562
+ end
563
+
564
+ ds.from_self(opts)
565
+ end
566
+
567
+ # The strategy to use for EXCEPT emulation. By default, uses a GROUP BY emulation,
568
+ # as that doesn't require you provide a key column, but you can use this to choose
569
+ # a NOT EXISTS, NOT IN, or LEFT JOIN emulation, providing the unique key column.
570
+ def except_strategy(strategy, *keys)
571
+ raise Sequel::Error, "invalid EXCEPT strategy: #{strategy.inspect}" unless EXCEPT_STRATEGIES.include?(strategy)
572
+ clone(:except_strategy=>[strategy, *keys])
470
573
  end
471
574
 
472
575
  # Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:
@@ -487,19 +590,17 @@ module Sequel
487
590
  raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all]
488
591
  raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false
489
592
  cols = columns
490
- from_self.
491
- select_group(*cols).
492
- union(other.from_self.select_group(*other.columns), all: true).
493
- select_group(*cols).
494
- having{count{}.* > 1}.
495
- from_self(opts)
593
+ (from_self(alias: :l)
594
+ .join(other, cols)
595
+ .select_all(:l))
596
+ .from_self(opts)
496
597
  end
497
598
 
498
599
  # Impala supports non-recursive common table expressions.
499
600
  def supports_cte?(type=:select)
500
601
  true
501
602
  end
502
-
603
+
503
604
  def supports_cte_in_subqueries?
504
605
  true
505
606
  end
@@ -520,7 +621,7 @@ module Sequel
520
621
  def supports_is_true?
521
622
  false
522
623
  end
523
-
624
+
524
625
  # Impala doesn't support IN when used with multiple columns.
525
626
  def supports_multiple_column_in?
526
627
  false
@@ -602,7 +703,7 @@ module Sequel
602
703
  def insert_empty_columns_values
603
704
  [[columns.last], [nil]]
604
705
  end
605
-
706
+
606
707
  def literal_true
607
708
  BOOL_TRUE
608
709
  end
@@ -619,7 +720,7 @@ module Sequel
619
720
  # Double backslashes in all strings, and escape all apostrophes with
620
721
  # backslashes.
621
722
  def literal_string_append(sql, s)
622
- sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
723
+ sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
623
724
  end
624
725
 
625
726
  def multi_insert_sql_strategy
@@ -638,6 +739,13 @@ module Sequel
638
739
  return unless opts[:from]
639
740
  super
640
741
  end
742
+
743
+
744
+ # Support VALUES clause instead of the SELECT clause to return rows.
745
+ def select_values_sql(sql)
746
+ sql << SELECT_VALUES
747
+ expression_list_append(sql, opts[:values])
748
+ end
641
749
  end
642
750
  end
643
751
  end
@@ -21,7 +21,7 @@ module Thrift
21
21
  @sasl_mechanism = sasl_params.fetch(:mechanism, 'PLAIN')
22
22
  raise 'Unknown SASL mechanism: #{@sasl_mechanism}' unless ['PLAIN', 'GSSAPI'].include? @sasl_mechanism
23
23
  if @sasl_mechanism == 'GSSAPI'
24
- require 'gssapi'
24
+ require 'gssapi'
25
25
  @sasl_remote_principal = sasl_params[:remote_principal]
26
26
  @sasl_remote_host = sasl_params[:remote_host]
27
27
  @gsscli = GSSAPI::Simple.new(@sasl_remote_host, @sasl_remote_principal)
@@ -34,7 +34,7 @@ module Thrift
34
34
  @index += sz
35
35
  ret = @rbuf.slice(@index - sz, sz) || Bytes.empty_byte_buffer
36
36
  if ret.length < sz
37
- sz -= ret.length
37
+ sz -= ret.length
38
38
  read_into_buffer(@rbuf, [sz, len || 0].max)
39
39
  @index = sz
40
40
  ret += @rbuf.slice(0, sz) || Bytes.empty_byte_buffer