sequel_impala 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +50 -0
- data/LICENSE +463 -0
- data/README.md +45 -0
- data/Rakefile +39 -0
- data/lib/driver/commons-collections-3.2.1.jar +0 -0
- data/lib/driver/commons-configuration-1.10.jar +0 -0
- data/lib/driver/commons-logging-1.2.jar +0 -0
- data/lib/driver/hadoop-auth-2.9.0.jar +0 -0
- data/lib/driver/hadoop-common-2.9.0.jar +0 -0
- data/lib/driver/hadoop-core-2.6.0.jar +0 -0
- data/lib/driver/hive-exec-1.1.0.jar +0 -0
- data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
- data/lib/driver/hive-metastore-1.1.0.jar +0 -0
- data/lib/driver/hive-service-1.1.0.jar +0 -0
- data/lib/driver/httpclient-4.3.jar +0 -0
- data/lib/driver/httpcore-4.3.jar +0 -0
- data/lib/driver/libfb303-0.9.0.jar +0 -0
- data/lib/driver/log4j-1.2.17.jar +0 -0
- data/lib/driver/slf4j-api-1.7.5.jar +0 -0
- data/lib/driver/stax2-api-3.1.4.jar +0 -0
- data/lib/driver/woodstox-core-asl-4.4.1.jar +0 -0
- data/lib/impala.rb +55 -0
- data/lib/impala/connection.rb +180 -0
- data/lib/impala/cursor.rb +200 -0
- data/lib/impala/progress_reporter.rb +40 -0
- data/lib/impala/protocol.rb +8 -0
- data/lib/impala/protocol/beeswax_constants.rb +15 -0
- data/lib/impala/protocol/beeswax_service.rb +747 -0
- data/lib/impala/protocol/beeswax_types.rb +193 -0
- data/lib/impala/protocol/exec_stats_constants.rb +13 -0
- data/lib/impala/protocol/exec_stats_types.rb +133 -0
- data/lib/impala/protocol/facebook_service.rb +706 -0
- data/lib/impala/protocol/fb303_constants.rb +15 -0
- data/lib/impala/protocol/fb303_types.rb +25 -0
- data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
- data/lib/impala/protocol/hive_metastore_types.rb +698 -0
- data/lib/impala/protocol/impala_hive_server2_service.rb +137 -0
- data/lib/impala/protocol/impala_service.rb +443 -0
- data/lib/impala/protocol/impala_service_constants.rb +13 -0
- data/lib/impala/protocol/impala_service_types.rb +192 -0
- data/lib/impala/protocol/status_constants.rb +13 -0
- data/lib/impala/protocol/status_types.rb +46 -0
- data/lib/impala/protocol/t_c_l_i_service.rb +1108 -0
- data/lib/impala/protocol/t_c_l_i_service_constants.rb +72 -0
- data/lib/impala/protocol/t_c_l_i_service_types.rb +1802 -0
- data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
- data/lib/impala/protocol/types_constants.rb +13 -0
- data/lib/impala/protocol/types_types.rb +332 -0
- data/lib/impala/sasl_transport.rb +117 -0
- data/lib/impala/thrift_patch.rb +31 -0
- data/lib/impala/version.rb +3 -0
- data/lib/jdbc/hive2.rb +52 -0
- data/lib/jdbc/impala.rb +50 -0
- data/lib/rbhive.rb +8 -0
- data/lib/rbhive/connection.rb +150 -0
- data/lib/rbhive/explain_result.rb +46 -0
- data/lib/rbhive/result_set.rb +37 -0
- data/lib/rbhive/schema_definition.rb +86 -0
- data/lib/rbhive/t_c_l_i_connection.rb +466 -0
- data/lib/rbhive/t_c_l_i_result_set.rb +3 -0
- data/lib/rbhive/t_c_l_i_schema_definition.rb +87 -0
- data/lib/rbhive/table_schema.rb +122 -0
- data/lib/rbhive/version.rb +3 -0
- data/lib/sequel/adapters/impala.rb +220 -0
- data/lib/sequel/adapters/jdbc/hive2.rb +36 -0
- data/lib/sequel/adapters/jdbc/impala.rb +38 -0
- data/lib/sequel/adapters/rbhive.rb +177 -0
- data/lib/sequel/adapters/shared/impala.rb +808 -0
- data/lib/sequel/extensions/csv_to_parquet.rb +166 -0
- data/lib/thrift/facebook_service.rb +700 -0
- data/lib/thrift/fb303_constants.rb +9 -0
- data/lib/thrift/fb303_types.rb +19 -0
- data/lib/thrift/hive_metastore_constants.rb +41 -0
- data/lib/thrift/hive_metastore_types.rb +630 -0
- data/lib/thrift/hive_service_constants.rb +13 -0
- data/lib/thrift/hive_service_types.rb +72 -0
- data/lib/thrift/queryplan_constants.rb +13 -0
- data/lib/thrift/queryplan_types.rb +261 -0
- data/lib/thrift/sasl_client_transport.rb +161 -0
- data/lib/thrift/serde_constants.rb +92 -0
- data/lib/thrift/serde_types.rb +7 -0
- data/lib/thrift/t_c_l_i_service.rb +1054 -0
- data/lib/thrift/t_c_l_i_service_constants.rb +72 -0
- data/lib/thrift/t_c_l_i_service_types.rb +1768 -0
- data/lib/thrift/thrift_hive.rb +508 -0
- data/lib/thrift/thrift_hive_metastore.rb +3856 -0
- data/spec/database_test.rb +56 -0
- data/spec/dataset_test.rb +1268 -0
- data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
- data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
- data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
- data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
- data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
- data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
- data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
- data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
- data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/files/reversible_migrations/001_reversible.rb +5 -0
- data/spec/files/reversible_migrations/002_reversible.rb +5 -0
- data/spec/files/reversible_migrations/003_reversible.rb +5 -0
- data/spec/files/reversible_migrations/004_reversible.rb +5 -0
- data/spec/files/reversible_migrations/005_reversible.rb +10 -0
- data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/impala_test.rb +290 -0
- data/spec/migrator_test.rb +240 -0
- data/spec/plugin_test.rb +91 -0
- data/spec/prepared_statement_test.rb +327 -0
- data/spec/schema_test.rb +356 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/timezone_test.rb +86 -0
- data/spec/type_test.rb +99 -0
- metadata +294 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
require 'sequel/adapters/shared/impala'
|
|
2
|
+
|
|
3
|
+
Sequel::JDBC.load_driver('com.cloudera.impala.jdbc41.Driver', :Impala)
|
|
4
|
+
|
|
5
|
+
module Sequel
|
|
6
|
+
module JDBC
|
|
7
|
+
Sequel.synchronize do
|
|
8
|
+
DATABASE_SETUP[:impala] = proc do |db|
|
|
9
|
+
db.extend(Sequel::JDBC::Impala::DatabaseMethods)
|
|
10
|
+
db.extend_datasets(Sequel::Impala::DatasetMethods)
|
|
11
|
+
|
|
12
|
+
# Explicitly disconnect at exit, which can fix issues where
|
|
13
|
+
# existing without disconnecting causes problems.
|
|
14
|
+
at_exit{db.disconnect}
|
|
15
|
+
|
|
16
|
+
com.cloudera.impala.jdbc41.Driver
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
module Impala
|
|
21
|
+
module DatabaseMethods
|
|
22
|
+
include Sequel::Impala::DatabaseMethods
|
|
23
|
+
|
|
24
|
+
# Recognize wrapped and unwrapped java.net.SocketExceptions as disconnect errors
|
|
25
|
+
def disconnect_error?(exception, opts)
|
|
26
|
+
super || exception.message =~ /\A(Java::JavaSql::SQLException: )?org\.apache\.thrift\.transport\.TTransportException: java\.net\.SocketException/
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def disconnect_connection(c)
|
|
30
|
+
super
|
|
31
|
+
rescue java.sql.SQLException
|
|
32
|
+
nil
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
require 'rbhive'
|
|
2
|
+
require 'sequel/adapters/shared/impala'
|
|
3
|
+
|
|
4
|
+
module Sequel
|
|
5
|
+
module Rbhive
|
|
6
|
+
class Database < Sequel::Database
|
|
7
|
+
include Impala::DatabaseMethods
|
|
8
|
+
|
|
9
|
+
NullLogger = Object.new
|
|
10
|
+
def NullLogger.info(str)
|
|
11
|
+
nil
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
to_i = lambda(&:to_i)
|
|
15
|
+
CONVERSION_PROCS = [
|
|
16
|
+
nil, # 0 => %q"BOOLEAN",
|
|
17
|
+
nil, # 1 => %q"TINYINT",
|
|
18
|
+
nil, # 2 => %q"SMALLINT",
|
|
19
|
+
nil, # 3 => %q"INT",
|
|
20
|
+
nil, # 4 => %q"BIGINT",
|
|
21
|
+
nil, # 5 => %q"FLOAT",
|
|
22
|
+
nil, # 6 => %q"DOUBLE",
|
|
23
|
+
nil, # 7 => %q"STRING",
|
|
24
|
+
nil, # 8 => %q"TIMESTAMP",
|
|
25
|
+
nil, # 9 => %q"BINARY",
|
|
26
|
+
nil, # 10 => %q"ARRAY",
|
|
27
|
+
nil, # 11 => %q"MAP",
|
|
28
|
+
nil, # 12 => %q"STRUCT",
|
|
29
|
+
nil, # 13 => %q"UNIONTYPE",
|
|
30
|
+
lambda{|v| BigDecimal.new(v)}, # 15 => %q"DECIMAL",
|
|
31
|
+
nil, # 16 => %q"NULL",
|
|
32
|
+
lambda{|v| Date.new(*v[0...10].split('-'))}, # 17 => %q"DATE",
|
|
33
|
+
nil, # 18 => %q"VARCHAR",
|
|
34
|
+
nil, # 19 => %q"CHAR",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
attr_reader :conversion_procs
|
|
38
|
+
|
|
39
|
+
# Exception classes used by Impala.
|
|
40
|
+
RbhiveExceptions = [
|
|
41
|
+
RBHive::TCLIConnectionError,
|
|
42
|
+
::Thrift::TransportException,
|
|
43
|
+
IOError
|
|
44
|
+
].freeze
|
|
45
|
+
|
|
46
|
+
DisconnectExceptions = [
|
|
47
|
+
::Thrift::TransportException,
|
|
48
|
+
IOError
|
|
49
|
+
].freeze
|
|
50
|
+
|
|
51
|
+
set_adapter_scheme :rbhive
|
|
52
|
+
|
|
53
|
+
# Connect to the Impala server. Currently, only the :host and :port options
|
|
54
|
+
# are respected, and they default to 'localhost' and 21000, respectively.
|
|
55
|
+
def connect(server)
|
|
56
|
+
opts = server_opts(server)
|
|
57
|
+
opts[:hive_version] ||= 12
|
|
58
|
+
conn = RBHive::TCLIConnection.new(opts[:host]||'localhost', opts[:port]||21050, opts, opts[:hive_logger] || NullLogger)
|
|
59
|
+
conn.open
|
|
60
|
+
conn.open_session
|
|
61
|
+
force_database(conn, opts[:database])
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def database_error_classes
|
|
65
|
+
RbhiveExceptions
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def disconnect_connection(connection)
|
|
69
|
+
connection.close_session if connection.session
|
|
70
|
+
connection.close
|
|
71
|
+
rescue *DisconnectExceptions
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def execute(sql, opts=OPTS)
|
|
75
|
+
synchronize(opts[:server]) do |c|
|
|
76
|
+
begin
|
|
77
|
+
puts sql
|
|
78
|
+
r = log_connection_yield(sql, c){c.execute(sql)}
|
|
79
|
+
yield(c, r) if block_given?
|
|
80
|
+
nil
|
|
81
|
+
rescue *RbhiveExceptions => e
|
|
82
|
+
raise_error(e)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def adapter_initialize
|
|
90
|
+
@conversion_procs = CONVERSION_PROCS.dup
|
|
91
|
+
@conversion_procs[8] = method(:to_application_timestamp)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def connection_execute_method
|
|
95
|
+
:execute
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def dataset_class_default
|
|
99
|
+
Dataset
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Impala raises IOError if it detects a problem on the connection, and
|
|
103
|
+
# in most cases that results in an unusable connection, so treat it as a
|
|
104
|
+
# disconnect error so Sequel will reconnect.
|
|
105
|
+
def disconnect_error?(exception, opts)
|
|
106
|
+
case exception
|
|
107
|
+
when *DisconnectExceptions
|
|
108
|
+
true
|
|
109
|
+
else
|
|
110
|
+
super
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Use DESCRIBE to get the column names and types for the table.
|
|
115
|
+
def schema_parse_table(table_name, opts)
|
|
116
|
+
m = output_identifier_meth(opts[:dataset])
|
|
117
|
+
|
|
118
|
+
table = if opts[:schema]
|
|
119
|
+
Sequel.qualify(opts[:schema], table_name)
|
|
120
|
+
else
|
|
121
|
+
Sequel.identifier(table_name)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
describe(table, opts).map do |row|
|
|
125
|
+
row[:db_type] = row[:type]
|
|
126
|
+
row[:type] = schema_column_type(row[:db_type])
|
|
127
|
+
row[:default] = nil
|
|
128
|
+
row[:primary_key] = false
|
|
129
|
+
[m.call(row.delete(:name)), row]
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
class Dataset < Sequel::Dataset
|
|
135
|
+
include Impala::DatasetMethods
|
|
136
|
+
|
|
137
|
+
APOS = "'".freeze
|
|
138
|
+
STRING_ESCAPES = {
|
|
139
|
+
"\\" => "\\\\".freeze,
|
|
140
|
+
"'" => "\\'".freeze,
|
|
141
|
+
"\n" => "\\n".freeze,
|
|
142
|
+
"\r" => "\\r".freeze,
|
|
143
|
+
"\0" => "\\0".freeze,
|
|
144
|
+
"\b" => "\\b".freeze,
|
|
145
|
+
"\04" => "\\Z".freeze,
|
|
146
|
+
# Impala is supposed to support this, but using it
|
|
147
|
+
# breaks things to the point of returning bad data.
|
|
148
|
+
# If you don't do this, the tabs in the input
|
|
149
|
+
# get converted to spaces, but that's better than the
|
|
150
|
+
# alternative.
|
|
151
|
+
# "\t" => "\\t".freeze,
|
|
152
|
+
}.freeze
|
|
153
|
+
STRING_ESCAPE_RE = /(#{Regexp.union(STRING_ESCAPES.keys)})/
|
|
154
|
+
|
|
155
|
+
def fetch_rows(sql)
|
|
156
|
+
execute(sql) do |conn, result|
|
|
157
|
+
op_handle = result.operationHandle
|
|
158
|
+
columns, type_nums = conn.get_column_info(op_handle)
|
|
159
|
+
self.columns = columns.map!{|c| output_identifier(c)}
|
|
160
|
+
conversion_procs = db.conversion_procs
|
|
161
|
+
convertors = conversion_procs.values_at(*type_nums)
|
|
162
|
+
#cursor.typecast_map['timestamp'] = db.method(:to_application_timestamp)
|
|
163
|
+
conn.yield_hash_rows(op_handle, columns, convertors) do |row|
|
|
164
|
+
yield row
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
private
|
|
170
|
+
|
|
171
|
+
def literal_string_append(sql, s)
|
|
172
|
+
sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
@@ -0,0 +1,808 @@
|
|
|
1
|
+
require 'sequel/adapters/utils/unmodified_identifiers'
|
|
2
|
+
|
|
3
|
+
module Sequel
|
|
4
|
+
module Impala
|
|
5
|
+
Sequel::Database.set_shared_adapter_scheme :impala, self
|
|
6
|
+
|
|
7
|
+
module DatabaseMethods
|
|
8
|
+
include UnmodifiedIdentifiers::DatabaseMethods
|
|
9
|
+
|
|
10
|
+
# Do not use a composite primary key, foreign keys, or an
|
|
11
|
+
# index when creating a join table, as Impala doesn't support those.
|
|
12
|
+
def create_join_table(hash, options=OPTS)
|
|
13
|
+
keys = hash.keys.sort_by(&:to_s)
|
|
14
|
+
create_table(join_table_name(hash, options), options) do
|
|
15
|
+
keys.each do |key|
|
|
16
|
+
Integer key
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def refresh(table_name)
|
|
22
|
+
run(refresh_sql(table_name))
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def compute_stats(table_name)
|
|
26
|
+
run(compute_stats_sql(table_name))
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Create a database/schema in Imapala.
|
|
30
|
+
#
|
|
31
|
+
# Options:
|
|
32
|
+
# :if_not_exists :: Don't raise an error if the schema already exists.
|
|
33
|
+
# :location :: Set the file system location to store the data for tables
|
|
34
|
+
# in the created schema.
|
|
35
|
+
#
|
|
36
|
+
# Examples:
|
|
37
|
+
#
|
|
38
|
+
# create_schema(:s)
|
|
39
|
+
# # CREATE SCHEMA `s`
|
|
40
|
+
#
|
|
41
|
+
# create_schema(:s, :if_not_exists=>true)
|
|
42
|
+
# # CREATE SCHEMA IF NOT EXISTS `s`
|
|
43
|
+
#
|
|
44
|
+
# create_schema(:s, :location=>'/a/b')
|
|
45
|
+
# # CREATE SCHEMA `s` LOCATION '/a/b'
|
|
46
|
+
def create_schema(schema, options=OPTS)
|
|
47
|
+
run(create_schema_sql(schema, options))
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def create_table(name, options=OPTS)
|
|
51
|
+
super
|
|
52
|
+
if im = options[:invalidate_metadata]
|
|
53
|
+
invalidate_metadata((name unless im == :all))
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Set the database_type for this database to :impala.
|
|
58
|
+
def database_type
|
|
59
|
+
:impala
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Return the DESCRIBE output for the table, showing table
|
|
63
|
+
# columns, types, and comments. If the :formatted option
|
|
64
|
+
# is given, use DESCRIBE FORMATTED and return a lot more
|
|
65
|
+
# information about the table. Both of these return arrays
|
|
66
|
+
# of hashes.
|
|
67
|
+
#
|
|
68
|
+
# Examples:
|
|
69
|
+
#
|
|
70
|
+
# describe(:t)
|
|
71
|
+
# # DESCRIBE `t`
|
|
72
|
+
#
|
|
73
|
+
# describe(:t, :formatted=>true)
|
|
74
|
+
# # DESCRIBE FORMATTED `t`
|
|
75
|
+
def describe(table, opts=OPTS)
|
|
76
|
+
if ds = opts[:dataset]
|
|
77
|
+
ds = ds.naked
|
|
78
|
+
else
|
|
79
|
+
ds = dataset
|
|
80
|
+
end
|
|
81
|
+
ds.with_sql("DESCRIBE #{'FORMATTED ' if opts[:formatted]} ?", table).all
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Drop a database/schema from Imapala.
|
|
85
|
+
#
|
|
86
|
+
# Options:
|
|
87
|
+
# :if_exists :: Don't raise an error if the schema doesn't exist.
|
|
88
|
+
#
|
|
89
|
+
# Examples:
|
|
90
|
+
#
|
|
91
|
+
# drop_schema(:s)
|
|
92
|
+
# # DROP SCHEMA `s`
|
|
93
|
+
#
|
|
94
|
+
# create_schema(:s, :if_exists=>true)
|
|
95
|
+
# # DROP SCHEMA IF EXISTS `s`
|
|
96
|
+
def drop_schema(schema, options=OPTS)
|
|
97
|
+
run(drop_schema_sql(schema, options))
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def drop_table(*names)
|
|
101
|
+
# CASCADE isn't a supported option in Impala
|
|
102
|
+
if names.last.is_a?(Hash)
|
|
103
|
+
names.last.delete(:cascade)
|
|
104
|
+
end
|
|
105
|
+
super
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Implicitly quailfy the table if using the :search_path option.
|
|
109
|
+
# This will look at all of the tables and views in the schemas,
|
|
110
|
+
# and if an unqualified table is used and appears in one of the
|
|
111
|
+
# schemas, it will be implicitly qualified with the given schema
|
|
112
|
+
# name.
|
|
113
|
+
def implicit_qualify(table)
|
|
114
|
+
return table unless opts[:search_path]
|
|
115
|
+
|
|
116
|
+
case table
|
|
117
|
+
when Symbol
|
|
118
|
+
s, t, a = Sequel.split_symbol(table)
|
|
119
|
+
if s
|
|
120
|
+
return table
|
|
121
|
+
end
|
|
122
|
+
t = implicit_qualify(t)
|
|
123
|
+
a ? Sequel.as(t, a) : t
|
|
124
|
+
when String
|
|
125
|
+
if schema = search_path_table_schemas[table]
|
|
126
|
+
Sequel.qualify(schema, table)
|
|
127
|
+
else
|
|
128
|
+
invalidate_table_schemas
|
|
129
|
+
if schema = search_path_table_schemas[table]
|
|
130
|
+
Sequel.qualify(schema, table)
|
|
131
|
+
else
|
|
132
|
+
Sequel.identifier(table)
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
when SQL::Identifier
|
|
136
|
+
implicit_qualify(table.value.to_s)
|
|
137
|
+
when SQL::AliasedExpression
|
|
138
|
+
SQL::AliasedExpression.new(implicit_qualify(table.expression), table.alias)
|
|
139
|
+
else
|
|
140
|
+
table
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Invalidate the metadata for the given table, or for all tables if
|
|
145
|
+
# no argument is given.
|
|
146
|
+
def invalidate_metadata(identifier=nil)
|
|
147
|
+
run("INVALIDATE METADATA #{quote_schema_table(identifier) if identifier}")
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Load data from HDFS into Impala.
|
|
151
|
+
#
|
|
152
|
+
# Options:
|
|
153
|
+
# :overwrite :: Overwrite the existing table instead of appending to it.
|
|
154
|
+
#
|
|
155
|
+
# Examples:
|
|
156
|
+
#
|
|
157
|
+
# load_data('/user/foo', :bar)
|
|
158
|
+
# LOAD DATA INPATH '/user/foo' INTO TABLE `bar`
|
|
159
|
+
#
|
|
160
|
+
# load_data('/user/foo', :bar, :overwrite=>true)
|
|
161
|
+
# LOAD DATA INPATH '/user/foo' OVERWRITE INTO TABLE `bar`
|
|
162
|
+
def load_data(path, table, options=OPTS)
|
|
163
|
+
run(load_data_sql(path, table, options))
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Don't use PRIMARY KEY or AUTOINCREMENT on Impala, as Impala doesn't
|
|
167
|
+
# support either.
|
|
168
|
+
def serial_primary_key_options
|
|
169
|
+
{:type=>Integer}
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Impala supports CREATE TABLE IF NOT EXISTS.
|
|
173
|
+
def supports_create_table_if_not_exists?
|
|
174
|
+
true
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Impala does not support foreign keys.
|
|
178
|
+
def supports_foreign_key_parsing?
|
|
179
|
+
false
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Impala does not support indexes.
|
|
183
|
+
def supports_index_parsing?
|
|
184
|
+
false
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Check that the tables returned by the JDBC driver are actually valid
|
|
188
|
+
# tables and not views. The Hive2 JDBC driver returns views when listing
|
|
189
|
+
# tables and nothing when listing views.
|
|
190
|
+
def tables(opts=OPTS)
|
|
191
|
+
_tables(opts).select{|t| is_valid_table?(t, opts)}
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Impala doesn't support transactions, so instead of issuing a
|
|
195
|
+
# transaction, just checkout a connection. This ensures the same
|
|
196
|
+
# connection is used for the transaction block, but as Impala
|
|
197
|
+
# doesn't support transactions, you can't rollback.
|
|
198
|
+
def transaction(opts=OPTS)
|
|
199
|
+
synchronize(opts[:server]) do |c|
|
|
200
|
+
yield c
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Determine the available views for listing all tables via JDBC (which
|
|
205
|
+
# includes both tables and views), and removing all valid tables.
|
|
206
|
+
def views(opts=OPTS)
|
|
207
|
+
_tables(opts).reject{|t| is_valid_table?(t, opts)}
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Creates a dataset that uses the VALUES clause:
|
|
211
|
+
#
|
|
212
|
+
# DB.values([[1, 2], [3, 4]])
|
|
213
|
+
# VALUES ((1, 2), (3, 4))
|
|
214
|
+
def values(v)
|
|
215
|
+
@default_dataset.clone(:values=>v)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def invalidate_table_schemas
|
|
219
|
+
@search_path_table_schemas = nil
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Sets options in the current db connection for each key/value pair
|
|
223
|
+
def set(opts)
|
|
224
|
+
set_sql(opts).each do |sql|
|
|
225
|
+
run(sql)
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
private
|
|
230
|
+
|
|
231
|
+
def _tables(opts)
|
|
232
|
+
m = output_identifier_meth
|
|
233
|
+
metadata_dataset.with_sql("SHOW TABLES#{" IN #{quote_identifier(opts[:schema])}" if opts[:schema]}").
|
|
234
|
+
select_map(:name).map do |table|
|
|
235
|
+
m.call(table)
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Impala uses ADD COLUMNS instead of ADD COLUMN. As its use of
|
|
240
|
+
# ADD COLUMNS implies, it supports adding multiple columns at once,
|
|
241
|
+
# but this adapter doesn't offer an API for that.
|
|
242
|
+
def alter_table_add_column_sql(table, op)
|
|
243
|
+
"ADD COLUMNS (#{column_definition_sql(op)})"
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Impala uses CHANGE instead of having separate RENAME syntax
|
|
247
|
+
# for renaming tables. As CHANGE requires a type, look up the
|
|
248
|
+
# type from the database schema.
|
|
249
|
+
def alter_table_rename_column_sql(table, op)
|
|
250
|
+
old_name = op[:name]
|
|
251
|
+
opts = schema(table).find{|x| x.first == old_name}
|
|
252
|
+
opts = opts ? opts.last : {}
|
|
253
|
+
unless opts[:db_type]
|
|
254
|
+
raise Error, "cannot determine database type to use for CHANGE COLUMN operation"
|
|
255
|
+
end
|
|
256
|
+
new_col = op.merge(:type=>opts[:db_type], :name=>op[:new_name])
|
|
257
|
+
"CHANGE #{quote_identifier(old_name)} #{column_definition_sql(new_col)}"
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def alter_table_set_column_type_sql(table, op)
|
|
261
|
+
"CHANGE #{quote_identifier(op[:name])} #{column_definition_sql(op)}"
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Add COMMENT when defining the column, if :comment is present.
|
|
265
|
+
def column_definition_comment_sql(sql, column)
|
|
266
|
+
sql << " COMMENT #{literal(column[:comment])}" if column[:comment]
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def column_definition_order
|
|
270
|
+
[:comment]
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def create_schema_sql(schema, options)
|
|
274
|
+
"CREATE SCHEMA #{'IF NOT EXISTS ' if options[:if_not_exists]}#{quote_identifier(schema)}#{" LOCATION #{literal(options[:location])}" if options[:location]}"
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Support using table parameters for CREATE TABLE AS, necessary for
|
|
278
|
+
# creating parquet files from datasets.
|
|
279
|
+
def create_table_as_sql(name, sql, options)
|
|
280
|
+
"#{create_table_prefix_sql(name, options)}#{create_table_parameters_sql(options) } AS #{sql}"
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def create_table_prefix_sql(name, options)
|
|
284
|
+
"CREATE #{'EXTERNAL ' if options[:external]}TABLE#{' IF NOT EXISTS' if options[:if_not_exists]} #{quote_schema_table(name)}"
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def create_table_sql(name, generator, options)
|
|
288
|
+
sql = super
|
|
289
|
+
sql += create_table_parameters_sql(options)
|
|
290
|
+
sql
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def create_table_parameters_sql(options)
|
|
294
|
+
sql = String.new
|
|
295
|
+
sql << " COMMENT #{literal(options[:comment])}" if options[:comment]
|
|
296
|
+
if options[:field_term] || options[:line_term]
|
|
297
|
+
sql << " ROW FORMAT DELIMITED"
|
|
298
|
+
if options[:field_term]
|
|
299
|
+
sql << " FIELDS TERMINATED BY #{literal(options[:field_term])}"
|
|
300
|
+
sql << " ESCAPED BY #{literal(options[:field_escape])}" if options[:field_escape]
|
|
301
|
+
end
|
|
302
|
+
if options[:line_term]
|
|
303
|
+
sql << " LINES TERMINATED BY #{literal(options[:line_term])}"
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
sql << " STORED AS #{options[:stored_as]}" if options[:stored_as]
|
|
307
|
+
sql << " LOCATION #{literal(options[:location])}" if options[:location]
|
|
308
|
+
sql
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def refresh_sql(table_name)
|
|
312
|
+
"REFRESH #{quote_schema_table(table_name)}"
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def compute_stats_sql(table_name)
|
|
316
|
+
"COMPUTE STATS #{quote_schema_table(table_name)}"
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def drop_schema_sql(schema, options)
|
|
320
|
+
"DROP SCHEMA #{'IF EXISTS ' if options[:if_exists]}#{quote_identifier(schema)}#{' CASCADE' if options[:cascade]}"
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def search_path_table_schemas
|
|
324
|
+
@search_path_table_schemas ||= begin
|
|
325
|
+
search_path = opts[:search_path]
|
|
326
|
+
search_path = search_path.split(',') if search_path.is_a?(String)
|
|
327
|
+
table_schemas = {}
|
|
328
|
+
search_path.reverse_each do |schema|
|
|
329
|
+
_tables(:schema=>schema).each do |table|
|
|
330
|
+
table_schemas[table.to_s] = schema.to_s
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
table_schemas
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# SHOW TABLE STATS will raise an error if given a view and not a table,
|
|
338
|
+
# so use that to differentiate tables from views.
|
|
339
|
+
def is_valid_table?(t, opts=OPTS)
|
|
340
|
+
t = Sequel.qualify(opts[:schema], t) if opts[:schema]
|
|
341
|
+
rows = describe(t, :formatted=>true)
|
|
342
|
+
if row = rows.find{|r| r[:name].to_s.strip == 'Table Type:'}
|
|
343
|
+
row[:type].to_s.strip !~ /VIEW/
|
|
344
|
+
end
|
|
345
|
+
rescue Sequel::DatabaseError
|
|
346
|
+
# This can be raised for Hive tables that Impala returns via SHOW TABLES,
|
|
347
|
+
# but which it raises an exception when you try to DESCRIBE them.
|
|
348
|
+
false
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def load_data_sql(path, table, options)
|
|
352
|
+
"LOAD DATA INPATH #{literal(path)}#{' OVERWRITE' if options[:overwrite]} INTO TABLE #{literal(table)}"
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# Metadata queries on JDBC use uppercase keys, so set the identifier
|
|
356
|
+
# output method to downcase so that metadata queries work correctly.
|
|
357
|
+
def _metadata_dataset
|
|
358
|
+
super.with_extend do
|
|
359
|
+
def output_identifier(v)
|
|
360
|
+
v.downcase.to_sym
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# Impala doesn't like the word "integer"
|
|
366
|
+
def type_literal_generic_integer(column)
|
|
367
|
+
:int
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
# Impala doesn't like the word "biginteger"
|
|
371
|
+
def type_literal_generic_bignum_symbol(column)
|
|
372
|
+
:bigint
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# Impala doesn't like the word "biginteger"
|
|
376
|
+
def type_literal_generic_bignum(column)
|
|
377
|
+
:bigint
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
# Impala doesn't support date columns yet, so use timestamp until date
|
|
381
|
+
# is natively supported.
|
|
382
|
+
def type_literal_generic_date(column)
|
|
383
|
+
:timestamp
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
# Impala uses double instead of "double precision" for floating point
|
|
387
|
+
# values.
|
|
388
|
+
def type_literal_generic_float(column)
|
|
389
|
+
:double
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Impala uses decimal instead of numeric for arbitrary precision
|
|
393
|
+
# numeric values.
|
|
394
|
+
def type_literal_generic_numeric(column)
|
|
395
|
+
column[:size] ? "decimal(#{Array(column[:size]).join(', ')})" : :decimal
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
# Use char or varchar if given a size, otherwise use string.
|
|
399
|
+
# Using a size is not recommend, as Impala doesn't implicitly
|
|
400
|
+
# cast string values to char or varchar, and doesn't implicitly
|
|
401
|
+
# cast from different sizes of varchar.
|
|
402
|
+
def type_literal_generic_string(column)
|
|
403
|
+
if size = column[:size]
|
|
404
|
+
"#{'var' unless column[:fixed]}char(#{size})"
|
|
405
|
+
else
|
|
406
|
+
:string
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
def set_sql(opts)
|
|
411
|
+
opts.map { |k, v| "SET #{k}=#{v}" }
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
def force_database(conn, database)
|
|
415
|
+
if database
|
|
416
|
+
log_connection_execute(conn, "USE #{database}")
|
|
417
|
+
end
|
|
418
|
+
conn
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
module DatasetMethods
|
|
423
|
+
include UnmodifiedIdentifiers::DatasetMethods
|
|
424
|
+
|
|
425
|
+
BACKTICK = '`'.freeze
|
|
426
|
+
APOS = "'".freeze
|
|
427
|
+
STRING_ESCAPE_RE = /([\\'])/
|
|
428
|
+
STRING_ESCAPE_REPLACE = '\\\\\1'.freeze
|
|
429
|
+
BOOL_TRUE = 'true'.freeze
|
|
430
|
+
BOOL_FALSE = 'false'.freeze
|
|
431
|
+
CONSTANT_LITERAL_MAP = {:CURRENT_TIMESTAMP=>'now()'.freeze}.freeze
|
|
432
|
+
PAREN_OPEN = '('.freeze
|
|
433
|
+
PAREN_CLOSE = ')'.freeze
|
|
434
|
+
SPACE = ' '.freeze
|
|
435
|
+
NOT = 'NOT '.freeze
|
|
436
|
+
REGEXP = ' REGEXP '.freeze
|
|
437
|
+
EXCEPT_SOURCE_COLUMN = :__source__
|
|
438
|
+
EXCEPT_STRATEGIES = [:not_exists, :not_in, :left_join, :group_by].freeze
|
|
439
|
+
SELECT_VALUES = 'VALUES '.freeze
|
|
440
|
+
|
|
441
|
+
Dataset.def_sql_method(self, :select, [['if opts[:values]', %w'values'], ['else', %w'with select distinct columns from join where group having compounds order limit']])
|
|
442
|
+
|
|
443
|
+
# Handle string concatenation using the concat string function.
|
|
444
|
+
# Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as
|
|
445
|
+
# Impala doesn't support escaping LIKE metacharacters.
|
|
446
|
+
# Support regexps on Impala using the REGEXP operator.
|
|
447
|
+
# For cast insensitive regexps, cast both values to uppercase first.
|
|
448
|
+
def complex_expression_sql_append(sql, op, args)
|
|
449
|
+
case op
|
|
450
|
+
when :'||'
|
|
451
|
+
literal_append(sql, Sequel.function(:concat, *args))
|
|
452
|
+
when :LIKE, :'NOT LIKE'
|
|
453
|
+
sql << PAREN_OPEN
|
|
454
|
+
literal_append(sql, args.at(0))
|
|
455
|
+
sql << SPACE << op.to_s << SPACE
|
|
456
|
+
literal_append(sql, args.at(1))
|
|
457
|
+
sql << PAREN_CLOSE
|
|
458
|
+
when :~, :'!~', :'~*', :'!~*'
|
|
459
|
+
if op == :'~*' || op == :'!~*'
|
|
460
|
+
args = args.map{|a| Sequel.function(:upper, a)}
|
|
461
|
+
end
|
|
462
|
+
sql << NOT if op == :'!~' || op == :'!~*'
|
|
463
|
+
sql << PAREN_OPEN
|
|
464
|
+
literal_append(sql, args.at(0))
|
|
465
|
+
sql << REGEXP
|
|
466
|
+
literal_append(sql, args.at(1))
|
|
467
|
+
sql << PAREN_CLOSE
|
|
468
|
+
else
|
|
469
|
+
super
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
# Use now() for current timestamp, as Impala doesn't support
|
|
474
|
+
# CURRENT_TIMESTAMP.
|
|
475
|
+
def constant_sql_append(sql, constant)
|
|
476
|
+
sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s)
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
# Use the addition operator combined with interval types to
|
|
480
|
+
# handle date arithmetic when using the date_arithmetic
|
|
481
|
+
# extension.
|
|
482
|
+
def date_add_sql_append(sql, da)
|
|
483
|
+
h = da.interval
|
|
484
|
+
expr = da.expr
|
|
485
|
+
intervals = []
|
|
486
|
+
each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit|
|
|
487
|
+
intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}")
|
|
488
|
+
end
|
|
489
|
+
if intervals.empty?
|
|
490
|
+
return literal_append(sql, Sequel.cast(expr, Time))
|
|
491
|
+
else
|
|
492
|
+
intervals.unshift(Sequel.cast(expr, Time))
|
|
493
|
+
return complex_expression_sql_append(sql, :+, intervals)
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# DELETE is emulated on Impala and doesn't return the number of
|
|
498
|
+
# modified rows.
|
|
499
|
+
def delete
|
|
500
|
+
super
|
|
501
|
+
nil
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
# Emulate DELETE using INSERT OVERWRITE selecting all columns from
|
|
505
|
+
# the table, with a reversed condition used for WHERE.
|
|
506
|
+
def delete_sql
|
|
507
|
+
return @opts[:prepared_sql] if @opts[:prepared_sql]
|
|
508
|
+
sql = @opts[:append_sql] || sql_string_origin
|
|
509
|
+
sql << "INSERT OVERWRITE "
|
|
510
|
+
source_list_append(sql, opts[:from])
|
|
511
|
+
sql << " SELECT * FROM "
|
|
512
|
+
source_list_append(sql, opts[:from])
|
|
513
|
+
if where = opts[:where]
|
|
514
|
+
sql << " WHERE NOT ("
|
|
515
|
+
literal_append(sql, where)
|
|
516
|
+
sql << ")"
|
|
517
|
+
else
|
|
518
|
+
sql << " WHERE false"
|
|
519
|
+
end
|
|
520
|
+
sql
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
# Implicitly qualify tables if using the :search_path database option.
|
|
524
|
+
def from(*)
|
|
525
|
+
ds = super
|
|
526
|
+
ds.clone(:from => ds.opts[:from].map{|t| db.implicit_qualify(t)})
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Implicitly qualify tables if using the :search_path database option.
|
|
530
|
+
def join_table(type, table, expr=nil, options=OPTS, &block)
|
|
531
|
+
super(type, db.implicit_qualify(table), expr, options, &block)
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
# Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns
|
|
535
|
+
# from the table, with WHERE false.
|
|
536
|
+
def truncate_sql
|
|
537
|
+
unfiltered.delete_sql
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
# Don't remove an order, because that breaks things when offsets
|
|
541
|
+
# are used, as Impala requires an order when using an offset.
|
|
542
|
+
def empty?
|
|
543
|
+
get(Sequel::SQL::AliasedExpression.new(1, :one)).nil?
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
# Emulate EXCEPT using a chosen strategy and checking for values in only the first table.
|
|
547
|
+
def except(other, opts=OPTS)
|
|
548
|
+
raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all]
|
|
549
|
+
raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false
|
|
550
|
+
|
|
551
|
+
strategy, *keys = @opts[:except_strategy]
|
|
552
|
+
ds = from_self(:alias=>:t1)
|
|
553
|
+
|
|
554
|
+
ds = case strategy
|
|
555
|
+
when :not_exists
|
|
556
|
+
ds.exclude(other.
|
|
557
|
+
from_self(:alias=>:t2).
|
|
558
|
+
where(keys.map{|key| [Sequel.qualify(:t1, key), Sequel.qualify(:t2, key)]}).
|
|
559
|
+
select(nil).
|
|
560
|
+
exists)
|
|
561
|
+
when :not_in
|
|
562
|
+
raise Sequel::Error, ":not_in EXCEPT strategy only supports a single key" unless keys.length == 1
|
|
563
|
+
key = keys.first
|
|
564
|
+
ds.exclude(Sequel.qualify(:t1, key)=>other.from_self(:alias=>:t2).select(key))
|
|
565
|
+
when :left_join
|
|
566
|
+
ds.left_join(other.from_self(:alias=>:t2).as(:t2), keys.map{|key| [key, key]}).
|
|
567
|
+
where(Sequel.or(keys.map{|key| [Sequel.qualify(:t2, key), nil]})).
|
|
568
|
+
select_all(:t1)
|
|
569
|
+
else
|
|
570
|
+
cols = columns
|
|
571
|
+
rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
|
|
572
|
+
ds.select_group(*cols).
|
|
573
|
+
select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
|
|
574
|
+
union(rhs, all: true).
|
|
575
|
+
select_group(*cols).
|
|
576
|
+
having{{count.function.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
ds.from_self(opts)
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
# The strategy to use for EXCEPT emulation. By default, uses a GROUP BY emulation,
|
|
583
|
+
# as that doesn't require you provide a key column, but you can use this to choose
|
|
584
|
+
# a NOT EXISTS, NOT IN, or LEFT JOIN emulation, providing the unique key column.
|
|
585
|
+
def except_strategy(strategy, *keys)
|
|
586
|
+
raise Sequel::Error, "invalid EXCEPT strategy: #{strategy.inspect}" unless EXCEPT_STRATEGIES.include?(strategy)
|
|
587
|
+
clone(:except_strategy=>[strategy, *keys])
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
# Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:
|
|
591
|
+
#
|
|
592
|
+
# DB[:table].insert_overwrite.insert(DB[:other])
|
|
593
|
+
# # INSERT OVERWRITE table SELECT * FROM other
|
|
594
|
+
def insert_overwrite
|
|
595
|
+
clone(:insert_overwrite=>true)
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
# Impala does not support INSERT DEFAULT VALUES.
|
|
599
|
+
def insert_supports_empty_values?
|
|
600
|
+
false
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
# Emulate INTERSECT using a join and checking for values in both tables.
|
|
604
|
+
def intersect(other, opts=OPTS)
|
|
605
|
+
raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all]
|
|
606
|
+
raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false
|
|
607
|
+
raise(Error, "Attempt to INTERSECT on dataset with no columns: #{inspect}") if columns.empty?
|
|
608
|
+
raise(Error, "Attempt to INTERSECT other dataset with no columns: #{other.inspect}") if other.columns.empty?
|
|
609
|
+
|
|
610
|
+
cols = columns.zip(other.columns)
|
|
611
|
+
from_self(alias: :l)
|
|
612
|
+
.join(other){|lj, j, _| Sequel.&(*cols.map{|c1,c2| Sequel.expr(Sequel.qualify(lj, c2)=>Sequel.qualify(j, c1)) | {Sequel.qualify(lj, c2)=>nil, Sequel.qualify(j, c1)=>nil}})}
|
|
613
|
+
.select_all(:l)
|
|
614
|
+
.distinct
|
|
615
|
+
.from_self(opts)
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
# Impala supports non-recursive common table expressions.
|
|
619
|
+
def supports_cte?(type=:select)
|
|
620
|
+
true
|
|
621
|
+
end
|
|
622
|
+
|
|
623
|
+
def supports_cte_in_subqueries?
|
|
624
|
+
true
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
# Impala doesn't support derived column lists when aliasing
|
|
628
|
+
# tables.
|
|
629
|
+
def supports_derived_column_lists?
|
|
630
|
+
false
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
# Impala doesn't support EXCEPT or INTERSECT, but support is emulated for them.
|
|
634
|
+
# However, EXCEPT ALL and INTERSECT ALL are not emulated.
|
|
635
|
+
def supports_intersect_except_all?
|
|
636
|
+
false
|
|
637
|
+
end
|
|
638
|
+
|
|
639
|
+
# Impala only support IS NULL, not IS TRUE or IS FALSE.
|
|
640
|
+
def supports_is_true?
|
|
641
|
+
false
|
|
642
|
+
end
|
|
643
|
+
|
|
644
|
+
# Impala doesn't support IN when used with multiple columns.
|
|
645
|
+
def supports_multiple_column_in?
|
|
646
|
+
false
|
|
647
|
+
end
|
|
648
|
+
|
|
649
|
+
# Impala supports regexps using the REGEXP operator.
|
|
650
|
+
def supports_regexp?
|
|
651
|
+
true
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
# Impala supports window functions.
|
|
655
|
+
def supports_window_functions?
|
|
656
|
+
true
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
# Create a parquet file from this dataset. +table+ should
|
|
660
|
+
# be the table name to create. To specify a path for the
|
|
661
|
+
# parquet file, use the :location option.
|
|
662
|
+
#
|
|
663
|
+
# Examples:
|
|
664
|
+
#
|
|
665
|
+
# DB[:t].to_parquet(:p)
|
|
666
|
+
# # CREATE TABLE `p` STORED AS parquet AS
|
|
667
|
+
# # SELECT * FROM `t`
|
|
668
|
+
#
|
|
669
|
+
# DB[:t].to_parquet(:p, :location=>'/a/b')
|
|
670
|
+
# # CREATE TABLE `p` STORED AS parquet LOCATION '/a/b'
|
|
671
|
+
# # SELECT * FROM `t`
|
|
672
|
+
def to_parquet(table, options=OPTS)
|
|
673
|
+
db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet))
|
|
674
|
+
end
|
|
675
|
+
|
|
676
|
+
# UPDATE is emulated on Impala, and returns nil instead of the number of
|
|
677
|
+
# modified rows
|
|
678
|
+
def update(values=OPTS)
|
|
679
|
+
super
|
|
680
|
+
nil
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
# Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used
|
|
684
|
+
# in the given +values+, use a CASE statement. In the CASE statement,
|
|
685
|
+
# set the value to the new value if the row matches WHERE conditions of
|
|
686
|
+
# the current dataset, otherwise use the existing value.
|
|
687
|
+
def update_sql(values)
|
|
688
|
+
sql = String.new
|
|
689
|
+
sql << "INSERT OVERWRITE "
|
|
690
|
+
source_list_append(sql, opts[:from])
|
|
691
|
+
sql << " SELECT "
|
|
692
|
+
comma = false
|
|
693
|
+
|
|
694
|
+
if where = opts[:where]
|
|
695
|
+
where = Sequel.lit(literal(where))
|
|
696
|
+
else
|
|
697
|
+
where = true
|
|
698
|
+
end
|
|
699
|
+
|
|
700
|
+
select_all.columns.each do |c|
|
|
701
|
+
if comma
|
|
702
|
+
sql << comma
|
|
703
|
+
else
|
|
704
|
+
comma = ', '
|
|
705
|
+
end
|
|
706
|
+
|
|
707
|
+
if values.has_key?(c)
|
|
708
|
+
new_value = values[c]
|
|
709
|
+
literal_append(sql, Sequel.case({where=>new_value}, c).as(c))
|
|
710
|
+
else
|
|
711
|
+
quote_identifier_append(sql, c)
|
|
712
|
+
end
|
|
713
|
+
end
|
|
714
|
+
sql << " FROM "
|
|
715
|
+
source_list_append(sql, opts[:from])
|
|
716
|
+
sql
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
def with(name, dataset, opts={})
|
|
720
|
+
if has_cte?(dataset)
|
|
721
|
+
s, ds = hoist_cte(dataset)
|
|
722
|
+
s.with(name, ds, opts)
|
|
723
|
+
else
|
|
724
|
+
super
|
|
725
|
+
end
|
|
726
|
+
end
|
|
727
|
+
|
|
728
|
+
def with_recursive(name, nonrecursive, recursive, opts={})
|
|
729
|
+
if has_cte?(nonrecursive)
|
|
730
|
+
s, ds = hoist_cte(nonrecursive)
|
|
731
|
+
s.with_recursive(name, ds, recursive, opts)
|
|
732
|
+
elsif has_cte?(recursive)
|
|
733
|
+
s, ds = hoist_cte(recursive)
|
|
734
|
+
s.with_recursive(name, nonrecursive, ds, opts)
|
|
735
|
+
else
|
|
736
|
+
super
|
|
737
|
+
end
|
|
738
|
+
end
|
|
739
|
+
|
|
740
|
+
protected
|
|
741
|
+
|
|
742
|
+
# Add the dataset to the list of compounds
|
|
743
|
+
def compound_clone(type, dataset, opts)
|
|
744
|
+
if has_cte?(dataset)
|
|
745
|
+
s, ds = hoist_cte(dataset)
|
|
746
|
+
s.compound_clone(type, ds, opts)
|
|
747
|
+
else
|
|
748
|
+
super
|
|
749
|
+
end
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
private
|
|
753
|
+
|
|
754
|
+
def has_cte?(ds)
|
|
755
|
+
ds.is_a?(Dataset) && ds.opts[:with]
|
|
756
|
+
end
|
|
757
|
+
|
|
758
|
+
# Impala doesn't handle the DEFAULT keyword used in inserts, as all default
|
|
759
|
+
# values in Impala are NULL, so just use a NULL value.
|
|
760
|
+
def insert_empty_columns_values
|
|
761
|
+
[[columns.last], [nil]]
|
|
762
|
+
end
|
|
763
|
+
|
|
764
|
+
def literal_true
|
|
765
|
+
BOOL_TRUE
|
|
766
|
+
end
|
|
767
|
+
|
|
768
|
+
def literal_false
|
|
769
|
+
BOOL_FALSE
|
|
770
|
+
end
|
|
771
|
+
|
|
772
|
+
def insert_into_sql(sql)
|
|
773
|
+
sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ')
|
|
774
|
+
identifier_append(sql, unaliased_identifier(@opts[:from].first))
|
|
775
|
+
end
|
|
776
|
+
|
|
777
|
+
# Double backslashes in all strings, and escape all apostrophes with
|
|
778
|
+
# backslashes.
|
|
779
|
+
def literal_string_append(sql, s)
|
|
780
|
+
sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
def multi_insert_sql_strategy
|
|
784
|
+
:values
|
|
785
|
+
end
|
|
786
|
+
|
|
787
|
+
# Impala doesn't support esacping of identifiers, so you can't use backtick in
|
|
788
|
+
# an identifier name.
|
|
789
|
+
def quoted_identifier_append(sql, name)
|
|
790
|
+
sql << BACKTICK << name.to_s << BACKTICK
|
|
791
|
+
end
|
|
792
|
+
|
|
793
|
+
# Don't include a LIMIT clause if there is no FROM clause. In general,
|
|
794
|
+
# such queries can only return 1 row.
|
|
795
|
+
def select_limit_sql(sql)
|
|
796
|
+
return unless opts[:from]
|
|
797
|
+
super
|
|
798
|
+
end
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
# Support VALUES clause instead of the SELECT clause to return rows.
|
|
802
|
+
def select_values_sql(sql)
|
|
803
|
+
sql << SELECT_VALUES
|
|
804
|
+
expression_list_append(sql, opts[:values])
|
|
805
|
+
end
|
|
806
|
+
end
|
|
807
|
+
end
|
|
808
|
+
end
|