sequel-impala 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +3 -0
  3. data/LICENSE +462 -0
  4. data/README.rdoc +39 -0
  5. data/Rakefile +39 -0
  6. data/lib/driver/commons-logging-1.2.jar +0 -0
  7. data/lib/driver/hadoop-common-2.6.0.jar +0 -0
  8. data/lib/driver/hadoop-core-2.6.0.jar +0 -0
  9. data/lib/driver/hive-exec-1.1.0.jar +0 -0
  10. data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
  11. data/lib/driver/hive-metastore-1.1.0.jar +0 -0
  12. data/lib/driver/hive-service-1.1.0.jar +0 -0
  13. data/lib/driver/httpclient-4.3.jar +0 -0
  14. data/lib/driver/httpcore-4.3.jar +0 -0
  15. data/lib/driver/libfb303-0.9.0.jar +0 -0
  16. data/lib/driver/slf4j-api-1.7.5.jar +0 -0
  17. data/lib/impala.rb +47 -0
  18. data/lib/impala/connection.rb +117 -0
  19. data/lib/impala/cursor.rb +157 -0
  20. data/lib/impala/protocol.rb +8 -0
  21. data/lib/impala/protocol/beeswax_constants.rb +15 -0
  22. data/lib/impala/protocol/beeswax_service.rb +766 -0
  23. data/lib/impala/protocol/beeswax_types.rb +193 -0
  24. data/lib/impala/protocol/cli_service_constants.rb +60 -0
  25. data/lib/impala/protocol/cli_service_types.rb +1452 -0
  26. data/lib/impala/protocol/facebook_service.rb +706 -0
  27. data/lib/impala/protocol/fb303_constants.rb +15 -0
  28. data/lib/impala/protocol/fb303_types.rb +25 -0
  29. data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
  30. data/lib/impala/protocol/hive_metastore_types.rb +698 -0
  31. data/lib/impala/protocol/impala_hive_server2_service.rb +29 -0
  32. data/lib/impala/protocol/impala_service.rb +377 -0
  33. data/lib/impala/protocol/impala_service_constants.rb +13 -0
  34. data/lib/impala/protocol/impala_service_types.rb +90 -0
  35. data/lib/impala/protocol/status_constants.rb +13 -0
  36. data/lib/impala/protocol/status_types.rb +46 -0
  37. data/lib/impala/protocol/t_c_l_i_service.rb +948 -0
  38. data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
  39. data/lib/impala/version.rb +3 -0
  40. data/lib/jdbc/hive2.rb +46 -0
  41. data/lib/sequel/adapters/impala.rb +123 -0
  42. data/lib/sequel/adapters/jdbc/hive2.rb +26 -0
  43. data/lib/sequel/adapters/shared/impala.rb +635 -0
  44. data/lib/sequel/extensions/csv_to_parquet.rb +112 -0
  45. data/spec/database_test.rb +56 -0
  46. data/spec/dataset_test.rb +1268 -0
  47. data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
  48. data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
  49. data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  50. data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  51. data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
  52. data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
  53. data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
  54. data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
  55. data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
  56. data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
  57. data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
  58. data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
  59. data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  60. data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
  61. data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
  62. data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
  63. data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
  64. data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
  65. data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  66. data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
  67. data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  68. data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
  69. data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
  70. data/spec/files/reversible_migrations/001_reversible.rb +5 -0
  71. data/spec/files/reversible_migrations/002_reversible.rb +5 -0
  72. data/spec/files/reversible_migrations/003_reversible.rb +5 -0
  73. data/spec/files/reversible_migrations/004_reversible.rb +5 -0
  74. data/spec/files/reversible_migrations/005_reversible.rb +10 -0
  75. data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
  76. data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
  77. data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
  78. data/spec/impala_test.rb +285 -0
  79. data/spec/migrator_test.rb +240 -0
  80. data/spec/plugin_test.rb +91 -0
  81. data/spec/prepared_statement_test.rb +327 -0
  82. data/spec/schema_test.rb +356 -0
  83. data/spec/spec_helper.rb +15 -0
  84. data/spec/timezone_test.rb +86 -0
  85. data/spec/type_test.rb +99 -0
  86. metadata +239 -0
@@ -0,0 +1,3 @@
1
+ module Impala
2
+ VERSION = "0.4.3"
3
+ end
@@ -0,0 +1,46 @@
1
+ warn 'jdbc-hive2 is only for use with JRuby' if (JRUBY_VERSION.nil? rescue true)
2
+
3
+ module Jdbc
4
+ module Hive2
5
+ DRIVER_VERSION = '1.1.0'
6
+ VERSION = DRIVER_VERSION + '.0'
7
+
8
+ def self.driver_jar
9
+ %W(
10
+ driver/libfb303-0.9.0.jar
11
+ driver/slf4j-api-1.7.5.jar
12
+ driver/hadoop-common-2.6.0.jar
13
+ driver/hadoop-core-2.6.0.jar
14
+ driver/commons-logging-1.2.jar
15
+ driver/hive-exec-1.1.0.jar
16
+ driver/hive-jdbc-1.1.0.jar
17
+ driver/hive-metastore-1.1.0.jar
18
+ driver/hive-service-1.1.0.jar
19
+ driver/httpcore-4.3.jar
20
+ driver/httpclient-4.3.jar
21
+ )
22
+ end
23
+
24
+ def self.load_driver(method = :load)
25
+ # case version
26
+ # when 11
27
+ # when 12
28
+ # when :cdh5
29
+ # else # 11
30
+ # end
31
+ driver_jar.each do |jar|
32
+ send method, jar
33
+ end
34
+ end
35
+
36
+ def self.driver_name
37
+ 'org.apache.hive.jdbc.HiveDriver'
38
+ end
39
+
40
+ if defined?(JRUBY_VERSION) && # enable backwards-compat behavior
41
+ (Java::JavaLang::Boolean.get_boolean('jdbc.driver.autoload'))
42
+ warn "autoloading jdbc driver on require 'jdbc/hive2'" if $VERBOSE
43
+ load_driver :require
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,123 @@
1
+ require 'impala'
2
+ require 'sequel/adapters/shared/impala'
3
+
4
+ module Sequel
5
+ module Impala
6
+ class Database < Sequel::Database
7
+ include DatabaseMethods
8
+
9
+ # Exception classes used by Impala.
10
+ ImpalaExceptions = [
11
+ ::Impala::Error,
12
+ ::Impala::Protocol::Beeswax::BeeswaxException,
13
+ IOError
14
+ ].freeze
15
+
16
+ set_adapter_scheme :impala
17
+
18
+ # Connect to the Impala server. Currently, only the :host and :port options
19
+ # are respected, and they default to 'localhost' and 21000, respectively.
20
+ def connect(server)
21
+ opts = server_opts(server)
22
+ ::Impala.connect(opts[:host]||'localhost', (opts[:port]||21000).to_i)
23
+ end
24
+
25
+ def database_error_classes
26
+ ImpalaExceptions
27
+ end
28
+
29
+ def disconnect_connection(c)
30
+ c.close
31
+ end
32
+
33
+ def execute(sql, opts=OPTS)
34
+ synchronize(opts[:server]) do |c|
35
+ begin
36
+ cursor = log_yield(sql){c.execute(sql)}
37
+ yield cursor if block_given?
38
+ nil
39
+ rescue *ImpalaExceptions => e
40
+ raise_error(e)
41
+ ensure
42
+ cursor.close if cursor && cursor.open?
43
+ end
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ def connection_execute_method
50
+ :query
51
+ end
52
+
53
+ # Impala raises IOError if it detects a problem on the connection, and
54
+ # in most cases that results in an unusable connection, so treat it as a
55
+ # disconnect error so Sequel will reconnect.
56
+ def disconnect_error?(exception, opts)
57
+ exception.is_a?(IOError) || super
58
+ end
59
+
60
+ # Use DESCRIBE to get the column names and types for the table.
61
+ def schema_parse_table(table_name, opts)
62
+ m = output_identifier_meth(opts[:dataset])
63
+
64
+ table = if opts[:schema]
65
+ Sequel.qualify(opts[:schema], table_name)
66
+ else
67
+ Sequel.identifier(table_name)
68
+ end
69
+
70
+ describe(table, opts).map do |row|
71
+ row[:db_type] = row[:type]
72
+ row[:type] = schema_column_type(row[:db_type])
73
+ row[:default] = nil
74
+ row[:primary_key] = false
75
+ [m.call(row.delete(:name)), row]
76
+ end
77
+ end
78
+ end
79
+
80
+ class Dataset < Sequel::Dataset
81
+ include DatasetMethods
82
+
83
+ Database::DatasetClass = self
84
+
85
+ APOS = "'".freeze
86
+ STRING_ESCAPES = {
87
+ "\\" => "\\\\".freeze,
88
+ "'" => "\\'".freeze,
89
+ "\n" => "\\n".freeze,
90
+ "\r" => "\\r".freeze,
91
+ "\0" => "\\0".freeze,
92
+ "\b" => "\\b".freeze,
93
+ "\04" => "\\Z".freeze,
94
+ # Impala is supposed to support this, but using it
95
+ # breaks things to the point of returning bad data.
96
+ # If you don't do this, the tabs in the input
97
+ # get converted to spaces, but that's better than the
98
+ # alternative.
99
+ # "\t" => "\\t".freeze,
100
+ }.freeze
101
+ STRING_ESCAPE_RE = /(#{Regexp.union(STRING_ESCAPES.keys)})/
102
+
103
+ def fetch_rows(sql)
104
+ execute(sql) do |cursor|
105
+ @columns = cursor.columns.map!{|c| output_identifier(c)}
106
+ cursor.typecast_map['timestamp'] = db.method(:to_application_timestamp)
107
+ cursor.each do |row|
108
+ yield row
109
+ end
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ # Unlike the jdbc/hive2 driver, the impala driver requires you escape
116
+ # some values in string literals to get correct results, but not the
117
+ # tab character or things break.
118
+ def literal_string_append(sql, s)
119
+ sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,26 @@
1
+ require 'sequel/adapters/shared/impala'
2
+
3
+ Sequel::JDBC.load_driver('org.apache.hive.jdbc.HiveDriver', :Hive2)
4
+
5
+ module Sequel
6
+ module JDBC
7
+ Sequel.synchronize do
8
+ DATABASE_SETUP[:hive2] = proc do |db|
9
+ db.extend(Sequel::JDBC::Hive2::DatabaseMethods)
10
+ db.dataset_class = Sequel::JDBC::Hive2::Dataset
11
+ org.apache.hive.jdbc.HiveDriver
12
+ end
13
+ end
14
+
15
+ module Hive2
16
+ module DatabaseMethods
17
+ extend Sequel::Database::ResetIdentifierMangling
18
+ include Sequel::Impala::DatabaseMethods
19
+ end
20
+
21
+ class Dataset < JDBC::Dataset
22
+ include Sequel::Impala::DatasetMethods
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,635 @@
1
+ module Sequel
2
+ module Impala
3
+ module DatabaseMethods
4
+ # Do not use a composite primary key, foreign keys, or an
5
+ # index when creating a join table, as Impala doesn't support those.
6
+ def create_join_table(hash, options=OPTS)
7
+ keys = hash.keys.sort_by(&:to_s)
8
+ create_table(join_table_name(hash, options), options) do
9
+ keys.each do |key|
10
+ Integer key
11
+ end
12
+ end
13
+ end
14
+
15
+ # Create a database/schema in Imapala.
16
+ #
17
+ # Options:
18
+ # :if_not_exists :: Don't raise an error if the schema already exists.
19
+ # :location :: Set the file system location to store the data for tables
20
+ # in the created schema.
21
+ #
22
+ # Examples:
23
+ #
24
+ # create_schema(:s)
25
+ # # CREATE SCHEMA `s`
26
+ #
27
+ # create_schema(:s, :if_not_exists=>true)
28
+ # # CREATE SCHEMA IF NOT EXISTS `s`
29
+ #
30
+ # create_schema(:s, :location=>'/a/b')
31
+ # # CREATE SCHEMA `s` LOCATION '/a/b'
32
+ def create_schema(schema, options=OPTS)
33
+ run(create_schema_sql(schema, options))
34
+ end
35
+
36
+ # Set the database_type for this database to :impala.
37
+ def database_type
38
+ :impala
39
+ end
40
+
41
+ # Return the DESCRIBE output for the table, showing table
42
+ # columns, types, and comments. If the :formatted option
43
+ # is given, use DESCRIBE FORMATTED and return a lot more
44
+ # information about the table. Both of these return arrays
45
+ # of hashes.
46
+ #
47
+ # Examples:
48
+ #
49
+ # describe(:t)
50
+ # # DESCRIBE `t`
51
+ #
52
+ # describe(:t, :formatted=>true)
53
+ # # DESCRIBE FORMATTED `t`
54
+ def describe(table, opts=OPTS)
55
+ if ds = opts[:dataset]
56
+ ds = ds.naked
57
+ else
58
+ ds = dataset.clone
59
+ ds.identifier_input_method = identifier_input_method
60
+ end
61
+ ds.identifier_output_method = nil
62
+ ds.with_sql("DESCRIBE #{'FORMATTED ' if opts[:formatted]} ?", table).all
63
+ end
64
+
65
+ # Drop a database/schema from Imapala.
66
+ #
67
+ # Options:
68
+ # :if_exists :: Don't raise an error if the schema doesn't exist.
69
+ #
70
+ # Examples:
71
+ #
72
+ # drop_schema(:s)
73
+ # # DROP SCHEMA `s`
74
+ #
75
+ # create_schema(:s, :if_exists=>true)
76
+ # # DROP SCHEMA IF EXISTS `s`
77
+ def drop_schema(schema, options=OPTS)
78
+ run(drop_schema_sql(schema, options))
79
+ end
80
+
81
+ # Implicitly quailfy the table if using the :search_path option.
82
+ # This will look at all of the tables and views in the schemas,
83
+ # and if an unqualified table is used and appears in one of the
84
+ # schemas, it will be implicitly qualified with the given schema
85
+ # name.
86
+ def implicit_qualify(table)
87
+ return table unless opts[:search_path]
88
+
89
+ case table
90
+ when Symbol
91
+ s, t, a = Sequel.split_symbol(table)
92
+ if s
93
+ return table
94
+ end
95
+ t = implicit_qualify(t)
96
+ a ? Sequel.as(t, a) : t
97
+ when String
98
+ if schema = search_path_table_schemas[table]
99
+ Sequel.qualify(schema, table)
100
+ else
101
+ table
102
+ end
103
+ when SQL::Identifier
104
+ implicit_qualify(table.value.to_s)
105
+ when SQL::AliasedExpression
106
+ SQL::AliasedExpression.new(implicit_qualify(table), v.alias)
107
+ else
108
+ table
109
+ end
110
+ end
111
+
112
+ # Load data from HDFS into Impala.
113
+ #
114
+ # Options:
115
+ # :overwrite :: Overwrite the existing table instead of appending to it.
116
+ #
117
+ # Examples:
118
+ #
119
+ # load_data('/user/foo', :bar)
120
+ # LOAD DATA INPATH '/user/foo' INTO TABLE `bar`
121
+ #
122
+ # load_data('/user/foo', :bar, :overwrite=>true)
123
+ # LOAD DATA INPATH '/user/foo' OVERWRITE INTO TABLE `bar`
124
+ def load_data(path, table, options=OPTS)
125
+ run(load_data_sql(path, table, options))
126
+ end
127
+
128
+ # Don't use PRIMARY KEY or AUTOINCREMENT on Impala, as Impala doesn't
129
+ # support either.
130
+ def serial_primary_key_options
131
+ {:type=>Integer}
132
+ end
133
+
134
+ # Impala supports CREATE TABLE IF NOT EXISTS.
135
+ def supports_create_table_if_not_exists?
136
+ true
137
+ end
138
+
139
+ # Impala does not support foreign keys.
140
+ def supports_foreign_key_parsing?
141
+ false
142
+ end
143
+
144
+ # Impala does not support indexes.
145
+ def supports_index_parsing?
146
+ false
147
+ end
148
+
149
+ # Check that the tables returned by the JDBC driver are actually valid
150
+ # tables and not views. The Hive2 JDBC driver returns views when listing
151
+ # tables and nothing when listing views.
152
+ def tables(opts=OPTS)
153
+ _tables(opts).select{|t| is_valid_table?(t)}
154
+ end
155
+
156
+ # Impala doesn't support transactions, so instead of issuing a
157
+ # transaction, just checkout a connection. This ensures the same
158
+ # connection is used for the transaction block, but as Impala
159
+ # doesn't support transactions, you can't rollback.
160
+ def transaction(opts=OPTS)
161
+ synchronize(opts[:server]) do |c|
162
+ yield c
163
+ end
164
+ end
165
+
166
+ # Determine the available views for listing all tables via JDBC (which
167
+ # includes both tables and views), and removing all valid tables.
168
+ def views(opts=OPTS)
169
+ _tables(opts).reject{|t| is_valid_table?(t)}
170
+ end
171
+
172
+ private
173
+
174
+ def _tables(opts)
175
+ m = output_identifier_meth
176
+ metadata_dataset.with_sql("SHOW TABLES#{" IN #{quote_identifier(opts[:schema])}" if opts[:schema]}").
177
+ select_map(:name).map do |table|
178
+ m.call(table)
179
+ end
180
+ end
181
+
182
+ # Impala uses ADD COLUMNS instead of ADD COLUMN. As its use of
183
+ # ADD COLUMNS implies, it supports adding multiple columns at once,
184
+ # but this adapter doesn't offer an API for that.
185
+ def alter_table_add_column_sql(table, op)
186
+ "ADD COLUMNS (#{column_definition_sql(op)})"
187
+ end
188
+
189
+ # Impala uses CHANGE instead of having separate RENAME syntax
190
+ # for renaming tables. As CHANGE requires a type, look up the
191
+ # type from the database schema.
192
+ def alter_table_rename_column_sql(table, op)
193
+ old_name = op[:name]
194
+ opts = schema(table).find{|x| x.first == old_name}
195
+ opts = opts ? opts.last : {}
196
+ unless opts[:db_type]
197
+ raise Error, "cannot determine database type to use for CHANGE COLUMN operation"
198
+ end
199
+ new_col = op.merge(:type=>opts[:db_type], :name=>op[:new_name])
200
+ "CHANGE #{quote_identifier(old_name)} #{column_definition_sql(new_col)}"
201
+ end
202
+
203
+ def alter_table_set_column_type_sql(table, op)
204
+ "CHANGE #{quote_identifier(op[:name])} #{column_definition_sql(op)}"
205
+ end
206
+
207
+ # Add COMMENT when defining the column, if :comment is present.
208
+ def column_definition_comment_sql(sql, column)
209
+ sql << " COMMENT #{literal(column[:comment])}" if column[:comment]
210
+ end
211
+
212
+ def column_definition_order
213
+ [:comment]
214
+ end
215
+
216
+ def create_schema_sql(schema, options)
217
+ "CREATE SCHEMA #{'IF NOT EXISTS ' if options[:if_not_exists]}#{quote_identifier(schema)}#{" LOCATION #{literal(options[:location])}" if options[:location]}"
218
+ end
219
+
220
+ # Support using table parameters for CREATE TABLE AS, necessary for
221
+ # creating parquet files from datasets.
222
+ def create_table_as_sql(name, sql, options)
223
+ "#{create_table_prefix_sql(name, options)}#{create_table_parameters_sql(options) } AS #{sql}"
224
+ end
225
+
226
+ def create_table_prefix_sql(name, options)
227
+ "CREATE #{'EXTERNAL ' if options[:external]}TABLE#{' IF NOT EXISTS' if options[:if_not_exists]} #{quote_schema_table(name)}"
228
+ end
229
+
230
+ def create_table_sql(name, generator, options)
231
+ sql = super
232
+ sql << create_table_parameters_sql(options)
233
+ sql
234
+ end
235
+
236
+ def create_table_parameters_sql(options)
237
+ sql = ""
238
+ sql << " COMMENT #{literal(options[:comment])}" if options[:comment]
239
+ if options[:field_term] || options[:line_term]
240
+ sql << " ROW FORMAT DELIMITED"
241
+ if options[:field_term]
242
+ sql << " FIELDS TERMINATED BY #{literal(options[:field_term])}"
243
+ sql << " ESCAPED BY #{literal(options[:field_escape])}" if options[:field_escape]
244
+ end
245
+ if options[:line_term]
246
+ sql << " LINES TERMINATED BY #{literal(options[:line_term])}"
247
+ end
248
+ end
249
+ sql << " STORED AS #{options[:stored_as]}" if options[:stored_as]
250
+ sql << " LOCATION #{literal(options[:location])}" if options[:location]
251
+ sql
252
+ end
253
+
254
+ def drop_schema_sql(schema, options)
255
+ "DROP SCHEMA #{'IF EXISTS ' if options[:if_exists]}#{quote_identifier(schema)}"
256
+ end
257
+
258
+ # Impala folds identifiers to lowercase, quoted or not, and is actually
259
+ # case insensitive, so don't use an identifier input or output method.
260
+ def identifier_input_method_default
261
+ nil
262
+ end
263
+ def identifier_output_method_default
264
+ nil
265
+ end
266
+
267
+ def search_path_table_schemas
268
+ @search_path_table_schemas ||= begin
269
+ search_path = opts[:search_path]
270
+ search_path = search_path.split(',') if search_path.is_a?(String)
271
+ table_schemas = {}
272
+ search_path.reverse_each do |schema|
273
+ _tables(:schema=>schema).each do |table|
274
+ table_schemas[table.to_s] = schema.to_s
275
+ end
276
+ end
277
+ table_schemas
278
+ end
279
+ end
280
+
281
+ # SHOW TABLE STATS will raise an error if given a view and not a table,
282
+ # so use that to differentiate tables from views.
283
+ def is_valid_table?(t)
284
+ rows = describe(t, :formatted=>true)
285
+ if row = rows.find{|r| r[:name].to_s.strip == 'Table Type:'}
286
+ row[:type].to_s.strip !~ /VIEW/
287
+ end
288
+ end
289
+
290
+ def load_data_sql(path, table, options)
291
+ "LOAD DATA INPATH #{literal(path)}#{' OVERWRITE' if options[:overwrite]} INTO TABLE #{literal(table)}"
292
+ end
293
+
294
+ # Metadata queries on JDBC use uppercase keys, so set the identifier
295
+ # output method to downcase so that metadata queries work correctly.
296
+ def metadata_dataset
297
+ @metadata_dataset ||= (
298
+ ds = dataset;
299
+ ds.identifier_input_method = identifier_input_method_default;
300
+ ds.identifier_output_method = :downcase;
301
+ ds
302
+ )
303
+ end
304
+
305
+ # Impala doesn't support date columns yet, so use timestamp until date
306
+ # is natively supported.
307
+ def type_literal_generic_date(column)
308
+ :timestamp
309
+ end
310
+
311
+ # Impala uses double instead of "double precision" for floating point
312
+ # values.
313
+ def type_literal_generic_float(column)
314
+ :double
315
+ end
316
+
317
+ # Impala uses decimal instead of numeric for arbitrary precision
318
+ # numeric values.
319
+ def type_literal_generic_numeric(column)
320
+ column[:size] ? "decimal(#{Array(column[:size]).join(', ')})" : :decimal
321
+ end
322
+
323
+ # Use char or varchar if given a size, otherwise use string.
324
+ # Using a size is not recommend, as Impala doesn't implicitly
325
+ # cast string values to char or varchar, and doesn't implicitly
326
+ # cast from different sizes of varchar.
327
+ def type_literal_generic_string(column)
328
+ if size = column[:size]
329
+ "#{'var' unless column[:fixed]}char(#{size})"
330
+ else
331
+ :string
332
+ end
333
+ end
334
+ end
335
+
336
+ module DatasetMethods
337
+ BACKTICK = '`'.freeze
338
+ APOS = "'".freeze
339
+ STRING_ESCAPE_RE = /([\\'])/
340
+ STRING_ESCAPE_REPLACE = '\\\\\1'.freeze
341
+ BOOL_TRUE = 'true'.freeze
342
+ BOOL_FALSE = 'false'.freeze
343
+ CONSTANT_LITERAL_MAP = {:CURRENT_TIMESTAMP=>'now()'.freeze}.freeze
344
+ PAREN_OPEN = Dataset::PAREN_OPEN
345
+ PAREN_CLOSE = Dataset::PAREN_CLOSE
346
+ SPACE = Dataset::SPACE
347
+ NOT = 'NOT '.freeze
348
+ REGEXP = ' REGEXP '.freeze
349
+ EXCEPT_SOURCE_COLUMN = :__source__
350
+
351
+ Dataset.def_sql_method(self, :select, %w'with select distinct columns from join where group having compounds order limit')
352
+
353
+ # Handle string concatenation using the concat string function.
354
+ # Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as
355
+ # Impala doesn't support escaping LIKE metacharacters.
356
+ # Support regexps on Impala using the REGEXP operator.
357
+ # For cast insensitive regexps, cast both values to uppercase first.
358
+ def complex_expression_sql_append(sql, op, args)
359
+ case op
360
+ when :'||'
361
+ literal_append(sql, Sequel.function(:concat, *args))
362
+ when :LIKE, :'NOT LIKE'
363
+ sql << PAREN_OPEN
364
+ literal_append(sql, args.at(0))
365
+ sql << SPACE << op.to_s << SPACE
366
+ literal_append(sql, args.at(1))
367
+ sql << PAREN_CLOSE
368
+ when :~, :'!~', :'~*', :'!~*'
369
+ if op == :'~*' || op == :'!~*'
370
+ args = args.map{|a| Sequel.function(:upper, a)}
371
+ end
372
+ sql << NOT if op == :'!~' || op == :'!~*'
373
+ sql << PAREN_OPEN
374
+ literal_append(sql, args.at(0))
375
+ sql << REGEXP
376
+ literal_append(sql, args.at(1))
377
+ sql << PAREN_CLOSE
378
+ else
379
+ super
380
+ end
381
+ end
382
+
383
+ # Use now() for current timestamp, as Impala doesn't support
384
+ # CURRENT_TIMESTAMP.
385
+ def constant_sql_append(sql, constant)
386
+ sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s)
387
+ end
388
+
389
+ # Use the addition operator combined with interval types to
390
+ # handle date arithmetic when using the date_arithmetic
391
+ # extension.
392
+ def date_add_sql_append(sql, da)
393
+ h = da.interval
394
+ expr = da.expr
395
+ intervals = []
396
+ each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit|
397
+ intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}")
398
+ end
399
+ if intervals.empty?
400
+ return literal_append(sql, Sequel.cast(expr, Time))
401
+ else
402
+ intervals.unshift(Sequel.cast(expr, Time))
403
+ return complex_expression_sql_append(sql, :+, intervals)
404
+ end
405
+ end
406
+
407
+ # DELETE is emulated on Impala and doesn't return the number of
408
+ # modified rows.
409
+ def delete
410
+ super
411
+ nil
412
+ end
413
+
414
+ # Emulate DELETE using INSERT OVERWRITE selecting all columns from
415
+ # the table, with a reversed condition used for WHERE.
416
+ def delete_sql
417
+ sql = "INSERT OVERWRITE "
418
+ source_list_append(sql, opts[:from])
419
+ sql << " SELECT * FROM "
420
+ source_list_append(sql, opts[:from])
421
+ if where = opts[:where]
422
+ sql << " WHERE NOT ("
423
+ literal_append(sql, where)
424
+ sql << ")"
425
+ else
426
+ sql << " WHERE false"
427
+ end
428
+ sql
429
+ end
430
+
431
+ # Implicitly qualify tables if using the :search_path database option.
432
+ def from(*)
433
+ ds = super
434
+ ds.opts[:from].map!{|t| db.implicit_qualify(t)}
435
+ ds
436
+ end
437
+
438
+ # Implicitly qualify tables if using the :search_path database option.
439
+ def join_table(type, table, expr=nil, options=OPTS, &block)
440
+ super(type, db.implicit_qualify(table), expr, options, &block)
441
+ end
442
+
443
+ # Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns
444
+ # from the table, with WHERE false.
445
+ def truncate_sql
446
+ ds = clone
447
+ ds.opts.delete(:where)
448
+ ds.delete_sql
449
+ end
450
+
451
+ # Don't remove an order, because that breaks things when offsets
452
+ # are used, as Impala requires an order when using an offset.
453
+ def empty?
454
+ get(Sequel::SQL::AliasedExpression.new(1, :one)).nil?
455
+ end
456
+
457
+ # Emulate INTERSECT using a UNION ALL and checking for values in only the first table.
458
+ def except(other, opts=OPTS)
459
+ raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all]
460
+ raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false
461
+ cols = columns
462
+ rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
463
+ from_self.
464
+ select_group(*cols).
465
+ select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
466
+ union(rhs, all: true).
467
+ select_group(*cols).
468
+ having{{count{}.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}.
469
+ from_self(opts)
470
+ end
471
+
472
+ # Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:
473
+ #
474
+ # DB[:table].insert_overwrite.insert(DB[:other])
475
+ # # INSERT OVERWRITE table SELECT * FROM other
476
+ def insert_overwrite
477
+ clone(:insert_overwrite=>true)
478
+ end
479
+
480
+ # Impala does not support INSERT DEFAULT VALUES.
481
+ def insert_supports_empty_values?
482
+ false
483
+ end
484
+
485
+ # Emulate INTERSECT using a UNION ALL and checking for values in both tables.
486
+ def intersect(other, opts=OPTS)
487
+ raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all]
488
+ raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false
489
+ cols = columns
490
+ from_self.
491
+ select_group(*cols).
492
+ union(other.from_self.select_group(*other.columns), all: true).
493
+ select_group(*cols).
494
+ having{count{}.* > 1}.
495
+ from_self(opts)
496
+ end
497
+
498
+ # Impala supports non-recursive common table expressions.
499
+ def supports_cte?(type=:select)
500
+ true
501
+ end
502
+
503
+ # Impala doesn't support derived column lists when aliasing
504
+ # tables.
505
+ def supports_derived_column_lists?
506
+ false
507
+ end
508
+
509
+ # Impala doesn't support EXCEPT or INTERSECT, but support is emulated for them.
510
+ # However, EXCEPT ALL and INTERSECT ALL are not emulated.
511
+ def supports_intersect_except_all?
512
+ false
513
+ end
514
+
515
+ # Impala only support IS NULL, not IS TRUE or IS FALSE.
516
+ def supports_is_true?
517
+ false
518
+ end
519
+
520
+ # Impala doesn't support IN when used with multiple columns.
521
+ def supports_multiple_column_in?
522
+ false
523
+ end
524
+
525
+ # Impala supports regexps using the REGEXP operator.
526
+ def supports_regexp?
527
+ true
528
+ end
529
+
530
+ # Impala supports window functions.
531
+ def supports_window_functions?
532
+ true
533
+ end
534
+
535
+ # Create a parquet file from this dataset. +table+ should
536
+ # be the table name to create. To specify a path for the
537
+ # parquet file, use the :location option.
538
+ #
539
+ # Examples:
540
+ #
541
+ # DB[:t].to_parquet(:p)
542
+ # # CREATE TABLE `p` STORED AS parquet AS
543
+ # # SELECT * FROM `t`
544
+ #
545
+ # DB[:t].to_parquet(:p, :location=>'/a/b')
546
+ # # CREATE TABLE `p` STORED AS parquet LOCATION '/a/b'
547
+ # # SELECT * FROM `t`
548
+ def to_parquet(table, options=OPTS)
549
+ db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet))
550
+ end
551
+
552
+ # UPDATE is emulated on Impala, and returns nil instead of the number of
553
+ # modified rows
554
+ def update(values=OPTS)
555
+ super
556
+ nil
557
+ end
558
+
559
+ # Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used
560
+ # in the given +values+, use a CASE statement. In the CASE statement,
561
+ # set the value to the new value if the row matches WHERE conditions of
562
+ # the current dataset, otherwise use the existing value.
563
+ def update_sql(values)
564
+ sql = "INSERT OVERWRITE "
565
+ source_list_append(sql, opts[:from])
566
+ sql << " SELECT "
567
+ comma = false
568
+
569
+ if where = opts[:where]
570
+ where = Sequel.lit(literal(where))
571
+ else
572
+ where = true
573
+ end
574
+
575
+ select_all.columns.each do |c|
576
+ if comma
577
+ sql << comma
578
+ else
579
+ comma = ', '
580
+ end
581
+
582
+ if values.has_key?(c)
583
+ new_value = values[c]
584
+ literal_append(sql, Sequel.case({where=>new_value}, c).as(c))
585
+ else
586
+ quote_identifier_append(sql, c)
587
+ end
588
+ end
589
+ sql << " FROM "
590
+ source_list_append(sql, opts[:from])
591
+ sql
592
+ end
593
+
594
+ private
595
+
596
+ # Impala doesn't handle the DEFAULT keyword used in inserts, as all default
597
+ # values in Impala are NULL, so just use a NULL value.
598
+ def insert_empty_columns_values
599
+ [[columns.last], [nil]]
600
+ end
601
+
602
+ def literal_true
603
+ BOOL_TRUE
604
+ end
605
+
606
+ def literal_false
607
+ BOOL_FALSE
608
+ end
609
+
610
+ def insert_into_sql(sql)
611
+ sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ')
612
+ identifier_append(sql, unaliased_identifier(@opts[:from].first))
613
+ end
614
+
615
+ # Double backslashes in all strings, and escape all apostrophes with
616
+ # backslashes.
617
+ def literal_string_append(sql, s)
618
+ sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
619
+ end
620
+
621
+ # Impala doesn't support esacping of identifiers, so you can't use backtick in
622
+ # an identifier name.
623
+ def quoted_identifier_append(sql, name)
624
+ sql << BACKTICK << name.to_s << BACKTICK
625
+ end
626
+
627
+ # Don't include a LIMIT clause if there is no FROM clause. In general,
628
+ # such queries can only return 1 row.
629
+ def select_limit_sql(sql)
630
+ return unless opts[:from]
631
+ super
632
+ end
633
+ end
634
+ end
635
+ end