sequel-impala 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +3 -0
  3. data/LICENSE +462 -0
  4. data/README.rdoc +39 -0
  5. data/Rakefile +39 -0
  6. data/lib/driver/commons-logging-1.2.jar +0 -0
  7. data/lib/driver/hadoop-common-2.6.0.jar +0 -0
  8. data/lib/driver/hadoop-core-2.6.0.jar +0 -0
  9. data/lib/driver/hive-exec-1.1.0.jar +0 -0
  10. data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
  11. data/lib/driver/hive-metastore-1.1.0.jar +0 -0
  12. data/lib/driver/hive-service-1.1.0.jar +0 -0
  13. data/lib/driver/httpclient-4.3.jar +0 -0
  14. data/lib/driver/httpcore-4.3.jar +0 -0
  15. data/lib/driver/libfb303-0.9.0.jar +0 -0
  16. data/lib/driver/slf4j-api-1.7.5.jar +0 -0
  17. data/lib/impala.rb +47 -0
  18. data/lib/impala/connection.rb +117 -0
  19. data/lib/impala/cursor.rb +157 -0
  20. data/lib/impala/protocol.rb +8 -0
  21. data/lib/impala/protocol/beeswax_constants.rb +15 -0
  22. data/lib/impala/protocol/beeswax_service.rb +766 -0
  23. data/lib/impala/protocol/beeswax_types.rb +193 -0
  24. data/lib/impala/protocol/cli_service_constants.rb +60 -0
  25. data/lib/impala/protocol/cli_service_types.rb +1452 -0
  26. data/lib/impala/protocol/facebook_service.rb +706 -0
  27. data/lib/impala/protocol/fb303_constants.rb +15 -0
  28. data/lib/impala/protocol/fb303_types.rb +25 -0
  29. data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
  30. data/lib/impala/protocol/hive_metastore_types.rb +698 -0
  31. data/lib/impala/protocol/impala_hive_server2_service.rb +29 -0
  32. data/lib/impala/protocol/impala_service.rb +377 -0
  33. data/lib/impala/protocol/impala_service_constants.rb +13 -0
  34. data/lib/impala/protocol/impala_service_types.rb +90 -0
  35. data/lib/impala/protocol/status_constants.rb +13 -0
  36. data/lib/impala/protocol/status_types.rb +46 -0
  37. data/lib/impala/protocol/t_c_l_i_service.rb +948 -0
  38. data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
  39. data/lib/impala/version.rb +3 -0
  40. data/lib/jdbc/hive2.rb +46 -0
  41. data/lib/sequel/adapters/impala.rb +123 -0
  42. data/lib/sequel/adapters/jdbc/hive2.rb +26 -0
  43. data/lib/sequel/adapters/shared/impala.rb +635 -0
  44. data/lib/sequel/extensions/csv_to_parquet.rb +112 -0
  45. data/spec/database_test.rb +56 -0
  46. data/spec/dataset_test.rb +1268 -0
  47. data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
  48. data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
  49. data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  50. data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  51. data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
  52. data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
  53. data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
  54. data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
  55. data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
  56. data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
  57. data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
  58. data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
  59. data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  60. data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
  61. data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
  62. data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
  63. data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
  64. data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
  65. data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
  66. data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
  67. data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
  68. data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
  69. data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
  70. data/spec/files/reversible_migrations/001_reversible.rb +5 -0
  71. data/spec/files/reversible_migrations/002_reversible.rb +5 -0
  72. data/spec/files/reversible_migrations/003_reversible.rb +5 -0
  73. data/spec/files/reversible_migrations/004_reversible.rb +5 -0
  74. data/spec/files/reversible_migrations/005_reversible.rb +10 -0
  75. data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
  76. data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
  77. data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
  78. data/spec/impala_test.rb +285 -0
  79. data/spec/migrator_test.rb +240 -0
  80. data/spec/plugin_test.rb +91 -0
  81. data/spec/prepared_statement_test.rb +327 -0
  82. data/spec/schema_test.rb +356 -0
  83. data/spec/spec_helper.rb +15 -0
  84. data/spec/timezone_test.rb +86 -0
  85. data/spec/type_test.rb +99 -0
  86. metadata +239 -0
@@ -0,0 +1,3 @@
1
+ module Impala
2
+ VERSION = "0.4.3"
3
+ end
@@ -0,0 +1,46 @@
1
+ warn 'jdbc-hive2 is only for use with JRuby' if (JRUBY_VERSION.nil? rescue true)
2
+
3
+ module Jdbc
4
+ module Hive2
5
+ DRIVER_VERSION = '1.1.0'
6
+ VERSION = DRIVER_VERSION + '.0'
7
+
8
+ def self.driver_jar
9
+ %W(
10
+ driver/libfb303-0.9.0.jar
11
+ driver/slf4j-api-1.7.5.jar
12
+ driver/hadoop-common-2.6.0.jar
13
+ driver/hadoop-core-2.6.0.jar
14
+ driver/commons-logging-1.2.jar
15
+ driver/hive-exec-1.1.0.jar
16
+ driver/hive-jdbc-1.1.0.jar
17
+ driver/hive-metastore-1.1.0.jar
18
+ driver/hive-service-1.1.0.jar
19
+ driver/httpcore-4.3.jar
20
+ driver/httpclient-4.3.jar
21
+ )
22
+ end
23
+
24
+ def self.load_driver(method = :load)
25
+ # case version
26
+ # when 11
27
+ # when 12
28
+ # when :cdh5
29
+ # else # 11
30
+ # end
31
+ driver_jar.each do |jar|
32
+ send method, jar
33
+ end
34
+ end
35
+
36
+ def self.driver_name
37
+ 'org.apache.hive.jdbc.HiveDriver'
38
+ end
39
+
40
+ if defined?(JRUBY_VERSION) && # enable backwards-compat behavior
41
+ (Java::JavaLang::Boolean.get_boolean('jdbc.driver.autoload'))
42
+ warn "autoloading jdbc driver on require 'jdbc/hive2'" if $VERBOSE
43
+ load_driver :require
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,123 @@
1
+ require 'impala'
2
+ require 'sequel/adapters/shared/impala'
3
+
4
+ module Sequel
5
+ module Impala
6
+ class Database < Sequel::Database
7
+ include DatabaseMethods
8
+
9
+ # Exception classes used by Impala.
10
+ ImpalaExceptions = [
11
+ ::Impala::Error,
12
+ ::Impala::Protocol::Beeswax::BeeswaxException,
13
+ IOError
14
+ ].freeze
15
+
16
+ set_adapter_scheme :impala
17
+
18
+ # Connect to the Impala server. Currently, only the :host and :port options
19
+ # are respected, and they default to 'localhost' and 21000, respectively.
20
+ def connect(server)
21
+ opts = server_opts(server)
22
+ ::Impala.connect(opts[:host]||'localhost', (opts[:port]||21000).to_i)
23
+ end
24
+
25
+ def database_error_classes
26
+ ImpalaExceptions
27
+ end
28
+
29
+ def disconnect_connection(c)
30
+ c.close
31
+ end
32
+
33
+ def execute(sql, opts=OPTS)
34
+ synchronize(opts[:server]) do |c|
35
+ begin
36
+ cursor = log_yield(sql){c.execute(sql)}
37
+ yield cursor if block_given?
38
+ nil
39
+ rescue *ImpalaExceptions => e
40
+ raise_error(e)
41
+ ensure
42
+ cursor.close if cursor && cursor.open?
43
+ end
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ def connection_execute_method
50
+ :query
51
+ end
52
+
53
+ # Impala raises IOError if it detects a problem on the connection, and
54
+ # in most cases that results in an unusable connection, so treat it as a
55
+ # disconnect error so Sequel will reconnect.
56
+ def disconnect_error?(exception, opts)
57
+ exception.is_a?(IOError) || super
58
+ end
59
+
60
+ # Use DESCRIBE to get the column names and types for the table.
61
+ def schema_parse_table(table_name, opts)
62
+ m = output_identifier_meth(opts[:dataset])
63
+
64
+ table = if opts[:schema]
65
+ Sequel.qualify(opts[:schema], table_name)
66
+ else
67
+ Sequel.identifier(table_name)
68
+ end
69
+
70
+ describe(table, opts).map do |row|
71
+ row[:db_type] = row[:type]
72
+ row[:type] = schema_column_type(row[:db_type])
73
+ row[:default] = nil
74
+ row[:primary_key] = false
75
+ [m.call(row.delete(:name)), row]
76
+ end
77
+ end
78
+ end
79
+
80
+ class Dataset < Sequel::Dataset
81
+ include DatasetMethods
82
+
83
+ Database::DatasetClass = self
84
+
85
+ APOS = "'".freeze
86
+ STRING_ESCAPES = {
87
+ "\\" => "\\\\".freeze,
88
+ "'" => "\\'".freeze,
89
+ "\n" => "\\n".freeze,
90
+ "\r" => "\\r".freeze,
91
+ "\0" => "\\0".freeze,
92
+ "\b" => "\\b".freeze,
93
+ "\04" => "\\Z".freeze,
94
+ # Impala is supposed to support this, but using it
95
+ # breaks things to the point of returning bad data.
96
+ # If you don't do this, the tabs in the input
97
+ # get converted to spaces, but that's better than the
98
+ # alternative.
99
+ # "\t" => "\\t".freeze,
100
+ }.freeze
101
+ STRING_ESCAPE_RE = /(#{Regexp.union(STRING_ESCAPES.keys)})/
102
+
103
+ def fetch_rows(sql)
104
+ execute(sql) do |cursor|
105
+ @columns = cursor.columns.map!{|c| output_identifier(c)}
106
+ cursor.typecast_map['timestamp'] = db.method(:to_application_timestamp)
107
+ cursor.each do |row|
108
+ yield row
109
+ end
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ # Unlike the jdbc/hive2 driver, the impala driver requires you escape
116
+ # some values in string literals to get correct results, but not the
117
+ # tab character or things break.
118
+ def literal_string_append(sql, s)
119
+ sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,26 @@
1
+ require 'sequel/adapters/shared/impala'
2
+
3
+ Sequel::JDBC.load_driver('org.apache.hive.jdbc.HiveDriver', :Hive2)
4
+
5
+ module Sequel
6
+ module JDBC
7
+ Sequel.synchronize do
8
+ DATABASE_SETUP[:hive2] = proc do |db|
9
+ db.extend(Sequel::JDBC::Hive2::DatabaseMethods)
10
+ db.dataset_class = Sequel::JDBC::Hive2::Dataset
11
+ org.apache.hive.jdbc.HiveDriver
12
+ end
13
+ end
14
+
15
+ module Hive2
16
+ module DatabaseMethods
17
+ extend Sequel::Database::ResetIdentifierMangling
18
+ include Sequel::Impala::DatabaseMethods
19
+ end
20
+
21
+ class Dataset < JDBC::Dataset
22
+ include Sequel::Impala::DatasetMethods
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,635 @@
1
+ module Sequel
2
+ module Impala
3
+ module DatabaseMethods
4
+ # Do not use a composite primary key, foreign keys, or an
5
+ # index when creating a join table, as Impala doesn't support those.
6
+ def create_join_table(hash, options=OPTS)
7
+ keys = hash.keys.sort_by(&:to_s)
8
+ create_table(join_table_name(hash, options), options) do
9
+ keys.each do |key|
10
+ Integer key
11
+ end
12
+ end
13
+ end
14
+
15
+ # Create a database/schema in Imapala.
16
+ #
17
+ # Options:
18
+ # :if_not_exists :: Don't raise an error if the schema already exists.
19
+ # :location :: Set the file system location to store the data for tables
20
+ # in the created schema.
21
+ #
22
+ # Examples:
23
+ #
24
+ # create_schema(:s)
25
+ # # CREATE SCHEMA `s`
26
+ #
27
+ # create_schema(:s, :if_not_exists=>true)
28
+ # # CREATE SCHEMA IF NOT EXISTS `s`
29
+ #
30
+ # create_schema(:s, :location=>'/a/b')
31
+ # # CREATE SCHEMA `s` LOCATION '/a/b'
32
+ def create_schema(schema, options=OPTS)
33
+ run(create_schema_sql(schema, options))
34
+ end
35
+
36
+ # Set the database_type for this database to :impala.
37
+ def database_type
38
+ :impala
39
+ end
40
+
41
+ # Return the DESCRIBE output for the table, showing table
42
+ # columns, types, and comments. If the :formatted option
43
+ # is given, use DESCRIBE FORMATTED and return a lot more
44
+ # information about the table. Both of these return arrays
45
+ # of hashes.
46
+ #
47
+ # Examples:
48
+ #
49
+ # describe(:t)
50
+ # # DESCRIBE `t`
51
+ #
52
+ # describe(:t, :formatted=>true)
53
+ # # DESCRIBE FORMATTED `t`
54
+ def describe(table, opts=OPTS)
55
+ if ds = opts[:dataset]
56
+ ds = ds.naked
57
+ else
58
+ ds = dataset.clone
59
+ ds.identifier_input_method = identifier_input_method
60
+ end
61
+ ds.identifier_output_method = nil
62
+ ds.with_sql("DESCRIBE #{'FORMATTED ' if opts[:formatted]} ?", table).all
63
+ end
64
+
65
+ # Drop a database/schema from Imapala.
66
+ #
67
+ # Options:
68
+ # :if_exists :: Don't raise an error if the schema doesn't exist.
69
+ #
70
+ # Examples:
71
+ #
72
+ # drop_schema(:s)
73
+ # # DROP SCHEMA `s`
74
+ #
75
+ # create_schema(:s, :if_exists=>true)
76
+ # # DROP SCHEMA IF EXISTS `s`
77
+ def drop_schema(schema, options=OPTS)
78
+ run(drop_schema_sql(schema, options))
79
+ end
80
+
81
+ # Implicitly quailfy the table if using the :search_path option.
82
+ # This will look at all of the tables and views in the schemas,
83
+ # and if an unqualified table is used and appears in one of the
84
+ # schemas, it will be implicitly qualified with the given schema
85
+ # name.
86
+ def implicit_qualify(table)
87
+ return table unless opts[:search_path]
88
+
89
+ case table
90
+ when Symbol
91
+ s, t, a = Sequel.split_symbol(table)
92
+ if s
93
+ return table
94
+ end
95
+ t = implicit_qualify(t)
96
+ a ? Sequel.as(t, a) : t
97
+ when String
98
+ if schema = search_path_table_schemas[table]
99
+ Sequel.qualify(schema, table)
100
+ else
101
+ table
102
+ end
103
+ when SQL::Identifier
104
+ implicit_qualify(table.value.to_s)
105
+ when SQL::AliasedExpression
106
+ SQL::AliasedExpression.new(implicit_qualify(table), v.alias)
107
+ else
108
+ table
109
+ end
110
+ end
111
+
112
+ # Load data from HDFS into Impala.
113
+ #
114
+ # Options:
115
+ # :overwrite :: Overwrite the existing table instead of appending to it.
116
+ #
117
+ # Examples:
118
+ #
119
+ # load_data('/user/foo', :bar)
120
+ # LOAD DATA INPATH '/user/foo' INTO TABLE `bar`
121
+ #
122
+ # load_data('/user/foo', :bar, :overwrite=>true)
123
+ # LOAD DATA INPATH '/user/foo' OVERWRITE INTO TABLE `bar`
124
+ def load_data(path, table, options=OPTS)
125
+ run(load_data_sql(path, table, options))
126
+ end
127
+
128
+ # Don't use PRIMARY KEY or AUTOINCREMENT on Impala, as Impala doesn't
129
+ # support either.
130
+ def serial_primary_key_options
131
+ {:type=>Integer}
132
+ end
133
+
134
+ # Impala supports CREATE TABLE IF NOT EXISTS.
135
+ def supports_create_table_if_not_exists?
136
+ true
137
+ end
138
+
139
+ # Impala does not support foreign keys.
140
+ def supports_foreign_key_parsing?
141
+ false
142
+ end
143
+
144
+ # Impala does not support indexes.
145
+ def supports_index_parsing?
146
+ false
147
+ end
148
+
149
+ # Check that the tables returned by the JDBC driver are actually valid
150
+ # tables and not views. The Hive2 JDBC driver returns views when listing
151
+ # tables and nothing when listing views.
152
+ def tables(opts=OPTS)
153
+ _tables(opts).select{|t| is_valid_table?(t)}
154
+ end
155
+
156
+ # Impala doesn't support transactions, so instead of issuing a
157
+ # transaction, just checkout a connection. This ensures the same
158
+ # connection is used for the transaction block, but as Impala
159
+ # doesn't support transactions, you can't rollback.
160
+ def transaction(opts=OPTS)
161
+ synchronize(opts[:server]) do |c|
162
+ yield c
163
+ end
164
+ end
165
+
166
+ # Determine the available views for listing all tables via JDBC (which
167
+ # includes both tables and views), and removing all valid tables.
168
+ def views(opts=OPTS)
169
+ _tables(opts).reject{|t| is_valid_table?(t)}
170
+ end
171
+
172
+ private
173
+
174
+ def _tables(opts)
175
+ m = output_identifier_meth
176
+ metadata_dataset.with_sql("SHOW TABLES#{" IN #{quote_identifier(opts[:schema])}" if opts[:schema]}").
177
+ select_map(:name).map do |table|
178
+ m.call(table)
179
+ end
180
+ end
181
+
182
+ # Impala uses ADD COLUMNS instead of ADD COLUMN. As its use of
183
+ # ADD COLUMNS implies, it supports adding multiple columns at once,
184
+ # but this adapter doesn't offer an API for that.
185
+ def alter_table_add_column_sql(table, op)
186
+ "ADD COLUMNS (#{column_definition_sql(op)})"
187
+ end
188
+
189
+ # Impala uses CHANGE instead of having separate RENAME syntax
190
+ # for renaming tables. As CHANGE requires a type, look up the
191
+ # type from the database schema.
192
+ def alter_table_rename_column_sql(table, op)
193
+ old_name = op[:name]
194
+ opts = schema(table).find{|x| x.first == old_name}
195
+ opts = opts ? opts.last : {}
196
+ unless opts[:db_type]
197
+ raise Error, "cannot determine database type to use for CHANGE COLUMN operation"
198
+ end
199
+ new_col = op.merge(:type=>opts[:db_type], :name=>op[:new_name])
200
+ "CHANGE #{quote_identifier(old_name)} #{column_definition_sql(new_col)}"
201
+ end
202
+
203
+ def alter_table_set_column_type_sql(table, op)
204
+ "CHANGE #{quote_identifier(op[:name])} #{column_definition_sql(op)}"
205
+ end
206
+
207
+ # Add COMMENT when defining the column, if :comment is present.
208
+ def column_definition_comment_sql(sql, column)
209
+ sql << " COMMENT #{literal(column[:comment])}" if column[:comment]
210
+ end
211
+
212
+ def column_definition_order
213
+ [:comment]
214
+ end
215
+
216
+ def create_schema_sql(schema, options)
217
+ "CREATE SCHEMA #{'IF NOT EXISTS ' if options[:if_not_exists]}#{quote_identifier(schema)}#{" LOCATION #{literal(options[:location])}" if options[:location]}"
218
+ end
219
+
220
+ # Support using table parameters for CREATE TABLE AS, necessary for
221
+ # creating parquet files from datasets.
222
+ def create_table_as_sql(name, sql, options)
223
+ "#{create_table_prefix_sql(name, options)}#{create_table_parameters_sql(options) } AS #{sql}"
224
+ end
225
+
226
+ def create_table_prefix_sql(name, options)
227
+ "CREATE #{'EXTERNAL ' if options[:external]}TABLE#{' IF NOT EXISTS' if options[:if_not_exists]} #{quote_schema_table(name)}"
228
+ end
229
+
230
+ def create_table_sql(name, generator, options)
231
+ sql = super
232
+ sql << create_table_parameters_sql(options)
233
+ sql
234
+ end
235
+
236
+ def create_table_parameters_sql(options)
237
+ sql = ""
238
+ sql << " COMMENT #{literal(options[:comment])}" if options[:comment]
239
+ if options[:field_term] || options[:line_term]
240
+ sql << " ROW FORMAT DELIMITED"
241
+ if options[:field_term]
242
+ sql << " FIELDS TERMINATED BY #{literal(options[:field_term])}"
243
+ sql << " ESCAPED BY #{literal(options[:field_escape])}" if options[:field_escape]
244
+ end
245
+ if options[:line_term]
246
+ sql << " LINES TERMINATED BY #{literal(options[:line_term])}"
247
+ end
248
+ end
249
+ sql << " STORED AS #{options[:stored_as]}" if options[:stored_as]
250
+ sql << " LOCATION #{literal(options[:location])}" if options[:location]
251
+ sql
252
+ end
253
+
254
+ def drop_schema_sql(schema, options)
255
+ "DROP SCHEMA #{'IF EXISTS ' if options[:if_exists]}#{quote_identifier(schema)}"
256
+ end
257
+
258
+ # Impala folds identifiers to lowercase, quoted or not, and is actually
259
+ # case insensitive, so don't use an identifier input or output method.
260
+ def identifier_input_method_default
261
+ nil
262
+ end
263
+ def identifier_output_method_default
264
+ nil
265
+ end
266
+
267
+ def search_path_table_schemas
268
+ @search_path_table_schemas ||= begin
269
+ search_path = opts[:search_path]
270
+ search_path = search_path.split(',') if search_path.is_a?(String)
271
+ table_schemas = {}
272
+ search_path.reverse_each do |schema|
273
+ _tables(:schema=>schema).each do |table|
274
+ table_schemas[table.to_s] = schema.to_s
275
+ end
276
+ end
277
+ table_schemas
278
+ end
279
+ end
280
+
281
+ # SHOW TABLE STATS will raise an error if given a view and not a table,
282
+ # so use that to differentiate tables from views.
283
+ def is_valid_table?(t)
284
+ rows = describe(t, :formatted=>true)
285
+ if row = rows.find{|r| r[:name].to_s.strip == 'Table Type:'}
286
+ row[:type].to_s.strip !~ /VIEW/
287
+ end
288
+ end
289
+
290
+ def load_data_sql(path, table, options)
291
+ "LOAD DATA INPATH #{literal(path)}#{' OVERWRITE' if options[:overwrite]} INTO TABLE #{literal(table)}"
292
+ end
293
+
294
+ # Metadata queries on JDBC use uppercase keys, so set the identifier
295
+ # output method to downcase so that metadata queries work correctly.
296
+ def metadata_dataset
297
+ @metadata_dataset ||= (
298
+ ds = dataset;
299
+ ds.identifier_input_method = identifier_input_method_default;
300
+ ds.identifier_output_method = :downcase;
301
+ ds
302
+ )
303
+ end
304
+
305
+ # Impala doesn't support date columns yet, so use timestamp until date
306
+ # is natively supported.
307
+ def type_literal_generic_date(column)
308
+ :timestamp
309
+ end
310
+
311
+ # Impala uses double instead of "double precision" for floating point
312
+ # values.
313
+ def type_literal_generic_float(column)
314
+ :double
315
+ end
316
+
317
+ # Impala uses decimal instead of numeric for arbitrary precision
318
+ # numeric values.
319
+ def type_literal_generic_numeric(column)
320
+ column[:size] ? "decimal(#{Array(column[:size]).join(', ')})" : :decimal
321
+ end
322
+
323
+ # Use char or varchar if given a size, otherwise use string.
324
+ # Using a size is not recommend, as Impala doesn't implicitly
325
+ # cast string values to char or varchar, and doesn't implicitly
326
+ # cast from different sizes of varchar.
327
+ def type_literal_generic_string(column)
328
+ if size = column[:size]
329
+ "#{'var' unless column[:fixed]}char(#{size})"
330
+ else
331
+ :string
332
+ end
333
+ end
334
+ end
335
+
336
+ module DatasetMethods
337
+ BACKTICK = '`'.freeze
338
+ APOS = "'".freeze
339
+ STRING_ESCAPE_RE = /([\\'])/
340
+ STRING_ESCAPE_REPLACE = '\\\\\1'.freeze
341
+ BOOL_TRUE = 'true'.freeze
342
+ BOOL_FALSE = 'false'.freeze
343
+ CONSTANT_LITERAL_MAP = {:CURRENT_TIMESTAMP=>'now()'.freeze}.freeze
344
+ PAREN_OPEN = Dataset::PAREN_OPEN
345
+ PAREN_CLOSE = Dataset::PAREN_CLOSE
346
+ SPACE = Dataset::SPACE
347
+ NOT = 'NOT '.freeze
348
+ REGEXP = ' REGEXP '.freeze
349
+ EXCEPT_SOURCE_COLUMN = :__source__
350
+
351
+ Dataset.def_sql_method(self, :select, %w'with select distinct columns from join where group having compounds order limit')
352
+
353
+ # Handle string concatenation using the concat string function.
354
+ # Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as
355
+ # Impala doesn't support escaping LIKE metacharacters.
356
+ # Support regexps on Impala using the REGEXP operator.
357
+ # For cast insensitive regexps, cast both values to uppercase first.
358
+ def complex_expression_sql_append(sql, op, args)
359
+ case op
360
+ when :'||'
361
+ literal_append(sql, Sequel.function(:concat, *args))
362
+ when :LIKE, :'NOT LIKE'
363
+ sql << PAREN_OPEN
364
+ literal_append(sql, args.at(0))
365
+ sql << SPACE << op.to_s << SPACE
366
+ literal_append(sql, args.at(1))
367
+ sql << PAREN_CLOSE
368
+ when :~, :'!~', :'~*', :'!~*'
369
+ if op == :'~*' || op == :'!~*'
370
+ args = args.map{|a| Sequel.function(:upper, a)}
371
+ end
372
+ sql << NOT if op == :'!~' || op == :'!~*'
373
+ sql << PAREN_OPEN
374
+ literal_append(sql, args.at(0))
375
+ sql << REGEXP
376
+ literal_append(sql, args.at(1))
377
+ sql << PAREN_CLOSE
378
+ else
379
+ super
380
+ end
381
+ end
382
+
383
+ # Use now() for current timestamp, as Impala doesn't support
384
+ # CURRENT_TIMESTAMP.
385
+ def constant_sql_append(sql, constant)
386
+ sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s)
387
+ end
388
+
389
+ # Use the addition operator combined with interval types to
390
+ # handle date arithmetic when using the date_arithmetic
391
+ # extension.
392
+ def date_add_sql_append(sql, da)
393
+ h = da.interval
394
+ expr = da.expr
395
+ intervals = []
396
+ each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit|
397
+ intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}")
398
+ end
399
+ if intervals.empty?
400
+ return literal_append(sql, Sequel.cast(expr, Time))
401
+ else
402
+ intervals.unshift(Sequel.cast(expr, Time))
403
+ return complex_expression_sql_append(sql, :+, intervals)
404
+ end
405
+ end
406
+
407
+ # DELETE is emulated on Impala and doesn't return the number of
408
+ # modified rows.
409
+ def delete
410
+ super
411
+ nil
412
+ end
413
+
414
+ # Emulate DELETE using INSERT OVERWRITE selecting all columns from
415
+ # the table, with a reversed condition used for WHERE.
416
+ def delete_sql
417
+ sql = "INSERT OVERWRITE "
418
+ source_list_append(sql, opts[:from])
419
+ sql << " SELECT * FROM "
420
+ source_list_append(sql, opts[:from])
421
+ if where = opts[:where]
422
+ sql << " WHERE NOT ("
423
+ literal_append(sql, where)
424
+ sql << ")"
425
+ else
426
+ sql << " WHERE false"
427
+ end
428
+ sql
429
+ end
430
+
431
+ # Implicitly qualify tables if using the :search_path database option.
432
+ def from(*)
433
+ ds = super
434
+ ds.opts[:from].map!{|t| db.implicit_qualify(t)}
435
+ ds
436
+ end
437
+
438
+ # Implicitly qualify tables if using the :search_path database option.
439
+ def join_table(type, table, expr=nil, options=OPTS, &block)
440
+ super(type, db.implicit_qualify(table), expr, options, &block)
441
+ end
442
+
443
+ # Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns
444
+ # from the table, with WHERE false.
445
+ def truncate_sql
446
+ ds = clone
447
+ ds.opts.delete(:where)
448
+ ds.delete_sql
449
+ end
450
+
451
+ # Don't remove an order, because that breaks things when offsets
452
+ # are used, as Impala requires an order when using an offset.
453
+ def empty?
454
+ get(Sequel::SQL::AliasedExpression.new(1, :one)).nil?
455
+ end
456
+
457
+ # Emulate INTERSECT using a UNION ALL and checking for values in only the first table.
458
+ def except(other, opts=OPTS)
459
+ raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all]
460
+ raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false
461
+ cols = columns
462
+ rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
463
+ from_self.
464
+ select_group(*cols).
465
+ select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
466
+ union(rhs, all: true).
467
+ select_group(*cols).
468
+ having{{count{}.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}.
469
+ from_self(opts)
470
+ end
471
+
472
+ # Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:
473
+ #
474
+ # DB[:table].insert_overwrite.insert(DB[:other])
475
+ # # INSERT OVERWRITE table SELECT * FROM other
476
+ def insert_overwrite
477
+ clone(:insert_overwrite=>true)
478
+ end
479
+
480
+ # Impala does not support INSERT DEFAULT VALUES.
481
+ def insert_supports_empty_values?
482
+ false
483
+ end
484
+
485
+ # Emulate INTERSECT using a UNION ALL and checking for values in both tables.
486
+ def intersect(other, opts=OPTS)
487
+ raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all]
488
+ raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false
489
+ cols = columns
490
+ from_self.
491
+ select_group(*cols).
492
+ union(other.from_self.select_group(*other.columns), all: true).
493
+ select_group(*cols).
494
+ having{count{}.* > 1}.
495
+ from_self(opts)
496
+ end
497
+
498
+ # Impala supports non-recursive common table expressions.
499
+ def supports_cte?(type=:select)
500
+ true
501
+ end
502
+
503
+ # Impala doesn't support derived column lists when aliasing
504
+ # tables.
505
+ def supports_derived_column_lists?
506
+ false
507
+ end
508
+
509
+ # Impala doesn't support EXCEPT or INTERSECT, but support is emulated for them.
510
+ # However, EXCEPT ALL and INTERSECT ALL are not emulated.
511
+ def supports_intersect_except_all?
512
+ false
513
+ end
514
+
515
+ # Impala only support IS NULL, not IS TRUE or IS FALSE.
516
+ def supports_is_true?
517
+ false
518
+ end
519
+
520
+ # Impala doesn't support IN when used with multiple columns.
521
+ def supports_multiple_column_in?
522
+ false
523
+ end
524
+
525
+ # Impala supports regexps using the REGEXP operator.
526
+ def supports_regexp?
527
+ true
528
+ end
529
+
530
+ # Impala supports window functions.
531
+ def supports_window_functions?
532
+ true
533
+ end
534
+
535
+ # Create a parquet file from this dataset. +table+ should
536
+ # be the table name to create. To specify a path for the
537
+ # parquet file, use the :location option.
538
+ #
539
+ # Examples:
540
+ #
541
+ # DB[:t].to_parquet(:p)
542
+ # # CREATE TABLE `p` STORED AS parquet AS
543
+ # # SELECT * FROM `t`
544
+ #
545
+ # DB[:t].to_parquet(:p, :location=>'/a/b')
546
+ # # CREATE TABLE `p` STORED AS parquet LOCATION '/a/b'
547
+ # # SELECT * FROM `t`
548
+ def to_parquet(table, options=OPTS)
549
+ db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet))
550
+ end
551
+
552
+ # UPDATE is emulated on Impala, and returns nil instead of the number of
553
+ # modified rows
554
+ def update(values=OPTS)
555
+ super
556
+ nil
557
+ end
558
+
559
+ # Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used
560
+ # in the given +values+, use a CASE statement. In the CASE statement,
561
+ # set the value to the new value if the row matches WHERE conditions of
562
+ # the current dataset, otherwise use the existing value.
563
+ def update_sql(values)
564
+ sql = "INSERT OVERWRITE "
565
+ source_list_append(sql, opts[:from])
566
+ sql << " SELECT "
567
+ comma = false
568
+
569
+ if where = opts[:where]
570
+ where = Sequel.lit(literal(where))
571
+ else
572
+ where = true
573
+ end
574
+
575
+ select_all.columns.each do |c|
576
+ if comma
577
+ sql << comma
578
+ else
579
+ comma = ', '
580
+ end
581
+
582
+ if values.has_key?(c)
583
+ new_value = values[c]
584
+ literal_append(sql, Sequel.case({where=>new_value}, c).as(c))
585
+ else
586
+ quote_identifier_append(sql, c)
587
+ end
588
+ end
589
+ sql << " FROM "
590
+ source_list_append(sql, opts[:from])
591
+ sql
592
+ end
593
+
594
+ private
595
+
596
+ # Impala doesn't handle the DEFAULT keyword used in inserts, as all default
597
+ # values in Impala are NULL, so just use a NULL value.
598
+ def insert_empty_columns_values
599
+ [[columns.last], [nil]]
600
+ end
601
+
602
+ def literal_true
603
+ BOOL_TRUE
604
+ end
605
+
606
+ def literal_false
607
+ BOOL_FALSE
608
+ end
609
+
610
+ def insert_into_sql(sql)
611
+ sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ')
612
+ identifier_append(sql, unaliased_identifier(@opts[:from].first))
613
+ end
614
+
615
+ # Double backslashes in all strings, and escape all apostrophes with
616
+ # backslashes.
617
+ def literal_string_append(sql, s)
618
+ sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
619
+ end
620
+
621
+ # Impala doesn't support esacping of identifiers, so you can't use backtick in
622
+ # an identifier name.
623
+ def quoted_identifier_append(sql, name)
624
+ sql << BACKTICK << name.to_s << BACKTICK
625
+ end
626
+
627
+ # Don't include a LIMIT clause if there is no FROM clause. In general,
628
+ # such queries can only return 1 row.
629
+ def select_limit_sql(sql)
630
+ return unless opts[:from]
631
+ super
632
+ end
633
+ end
634
+ end
635
+ end