sequel-impala 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +3 -0
- data/LICENSE +462 -0
- data/README.rdoc +39 -0
- data/Rakefile +39 -0
- data/lib/driver/commons-logging-1.2.jar +0 -0
- data/lib/driver/hadoop-common-2.6.0.jar +0 -0
- data/lib/driver/hadoop-core-2.6.0.jar +0 -0
- data/lib/driver/hive-exec-1.1.0.jar +0 -0
- data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
- data/lib/driver/hive-metastore-1.1.0.jar +0 -0
- data/lib/driver/hive-service-1.1.0.jar +0 -0
- data/lib/driver/httpclient-4.3.jar +0 -0
- data/lib/driver/httpcore-4.3.jar +0 -0
- data/lib/driver/libfb303-0.9.0.jar +0 -0
- data/lib/driver/slf4j-api-1.7.5.jar +0 -0
- data/lib/impala.rb +47 -0
- data/lib/impala/connection.rb +117 -0
- data/lib/impala/cursor.rb +157 -0
- data/lib/impala/protocol.rb +8 -0
- data/lib/impala/protocol/beeswax_constants.rb +15 -0
- data/lib/impala/protocol/beeswax_service.rb +766 -0
- data/lib/impala/protocol/beeswax_types.rb +193 -0
- data/lib/impala/protocol/cli_service_constants.rb +60 -0
- data/lib/impala/protocol/cli_service_types.rb +1452 -0
- data/lib/impala/protocol/facebook_service.rb +706 -0
- data/lib/impala/protocol/fb303_constants.rb +15 -0
- data/lib/impala/protocol/fb303_types.rb +25 -0
- data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
- data/lib/impala/protocol/hive_metastore_types.rb +698 -0
- data/lib/impala/protocol/impala_hive_server2_service.rb +29 -0
- data/lib/impala/protocol/impala_service.rb +377 -0
- data/lib/impala/protocol/impala_service_constants.rb +13 -0
- data/lib/impala/protocol/impala_service_types.rb +90 -0
- data/lib/impala/protocol/status_constants.rb +13 -0
- data/lib/impala/protocol/status_types.rb +46 -0
- data/lib/impala/protocol/t_c_l_i_service.rb +948 -0
- data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
- data/lib/impala/version.rb +3 -0
- data/lib/jdbc/hive2.rb +46 -0
- data/lib/sequel/adapters/impala.rb +123 -0
- data/lib/sequel/adapters/jdbc/hive2.rb +26 -0
- data/lib/sequel/adapters/shared/impala.rb +635 -0
- data/lib/sequel/extensions/csv_to_parquet.rb +112 -0
- data/spec/database_test.rb +56 -0
- data/spec/dataset_test.rb +1268 -0
- data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
- data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
- data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
- data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
- data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
- data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
- data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
- data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
- data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/files/reversible_migrations/001_reversible.rb +5 -0
- data/spec/files/reversible_migrations/002_reversible.rb +5 -0
- data/spec/files/reversible_migrations/003_reversible.rb +5 -0
- data/spec/files/reversible_migrations/004_reversible.rb +5 -0
- data/spec/files/reversible_migrations/005_reversible.rb +10 -0
- data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/impala_test.rb +285 -0
- data/spec/migrator_test.rb +240 -0
- data/spec/plugin_test.rb +91 -0
- data/spec/prepared_statement_test.rb +327 -0
- data/spec/schema_test.rb +356 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/timezone_test.rb +86 -0
- data/spec/type_test.rb +99 -0
- metadata +239 -0
data/lib/jdbc/hive2.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
warn 'jdbc-hive2 is only for use with JRuby' if (JRUBY_VERSION.nil? rescue true)
|
2
|
+
|
3
|
+
module Jdbc
|
4
|
+
module Hive2
|
5
|
+
DRIVER_VERSION = '1.1.0'
|
6
|
+
VERSION = DRIVER_VERSION + '.0'
|
7
|
+
|
8
|
+
def self.driver_jar
|
9
|
+
%W(
|
10
|
+
driver/libfb303-0.9.0.jar
|
11
|
+
driver/slf4j-api-1.7.5.jar
|
12
|
+
driver/hadoop-common-2.6.0.jar
|
13
|
+
driver/hadoop-core-2.6.0.jar
|
14
|
+
driver/commons-logging-1.2.jar
|
15
|
+
driver/hive-exec-1.1.0.jar
|
16
|
+
driver/hive-jdbc-1.1.0.jar
|
17
|
+
driver/hive-metastore-1.1.0.jar
|
18
|
+
driver/hive-service-1.1.0.jar
|
19
|
+
driver/httpcore-4.3.jar
|
20
|
+
driver/httpclient-4.3.jar
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.load_driver(method = :load)
|
25
|
+
# case version
|
26
|
+
# when 11
|
27
|
+
# when 12
|
28
|
+
# when :cdh5
|
29
|
+
# else # 11
|
30
|
+
# end
|
31
|
+
driver_jar.each do |jar|
|
32
|
+
send method, jar
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.driver_name
|
37
|
+
'org.apache.hive.jdbc.HiveDriver'
|
38
|
+
end
|
39
|
+
|
40
|
+
if defined?(JRUBY_VERSION) && # enable backwards-compat behavior
|
41
|
+
(Java::JavaLang::Boolean.get_boolean('jdbc.driver.autoload'))
|
42
|
+
warn "autoloading jdbc driver on require 'jdbc/hive2'" if $VERBOSE
|
43
|
+
load_driver :require
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'impala'
|
2
|
+
require 'sequel/adapters/shared/impala'
|
3
|
+
|
4
|
+
module Sequel
|
5
|
+
module Impala
|
6
|
+
class Database < Sequel::Database
|
7
|
+
include DatabaseMethods
|
8
|
+
|
9
|
+
# Exception classes used by Impala.
|
10
|
+
ImpalaExceptions = [
|
11
|
+
::Impala::Error,
|
12
|
+
::Impala::Protocol::Beeswax::BeeswaxException,
|
13
|
+
IOError
|
14
|
+
].freeze
|
15
|
+
|
16
|
+
set_adapter_scheme :impala
|
17
|
+
|
18
|
+
# Connect to the Impala server. Currently, only the :host and :port options
|
19
|
+
# are respected, and they default to 'localhost' and 21000, respectively.
|
20
|
+
def connect(server)
|
21
|
+
opts = server_opts(server)
|
22
|
+
::Impala.connect(opts[:host]||'localhost', (opts[:port]||21000).to_i)
|
23
|
+
end
|
24
|
+
|
25
|
+
def database_error_classes
|
26
|
+
ImpalaExceptions
|
27
|
+
end
|
28
|
+
|
29
|
+
def disconnect_connection(c)
|
30
|
+
c.close
|
31
|
+
end
|
32
|
+
|
33
|
+
def execute(sql, opts=OPTS)
|
34
|
+
synchronize(opts[:server]) do |c|
|
35
|
+
begin
|
36
|
+
cursor = log_yield(sql){c.execute(sql)}
|
37
|
+
yield cursor if block_given?
|
38
|
+
nil
|
39
|
+
rescue *ImpalaExceptions => e
|
40
|
+
raise_error(e)
|
41
|
+
ensure
|
42
|
+
cursor.close if cursor && cursor.open?
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def connection_execute_method
|
50
|
+
:query
|
51
|
+
end
|
52
|
+
|
53
|
+
# Impala raises IOError if it detects a problem on the connection, and
|
54
|
+
# in most cases that results in an unusable connection, so treat it as a
|
55
|
+
# disconnect error so Sequel will reconnect.
|
56
|
+
def disconnect_error?(exception, opts)
|
57
|
+
exception.is_a?(IOError) || super
|
58
|
+
end
|
59
|
+
|
60
|
+
# Use DESCRIBE to get the column names and types for the table.
|
61
|
+
def schema_parse_table(table_name, opts)
|
62
|
+
m = output_identifier_meth(opts[:dataset])
|
63
|
+
|
64
|
+
table = if opts[:schema]
|
65
|
+
Sequel.qualify(opts[:schema], table_name)
|
66
|
+
else
|
67
|
+
Sequel.identifier(table_name)
|
68
|
+
end
|
69
|
+
|
70
|
+
describe(table, opts).map do |row|
|
71
|
+
row[:db_type] = row[:type]
|
72
|
+
row[:type] = schema_column_type(row[:db_type])
|
73
|
+
row[:default] = nil
|
74
|
+
row[:primary_key] = false
|
75
|
+
[m.call(row.delete(:name)), row]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class Dataset < Sequel::Dataset
|
81
|
+
include DatasetMethods
|
82
|
+
|
83
|
+
Database::DatasetClass = self
|
84
|
+
|
85
|
+
APOS = "'".freeze
|
86
|
+
STRING_ESCAPES = {
|
87
|
+
"\\" => "\\\\".freeze,
|
88
|
+
"'" => "\\'".freeze,
|
89
|
+
"\n" => "\\n".freeze,
|
90
|
+
"\r" => "\\r".freeze,
|
91
|
+
"\0" => "\\0".freeze,
|
92
|
+
"\b" => "\\b".freeze,
|
93
|
+
"\04" => "\\Z".freeze,
|
94
|
+
# Impala is supposed to support this, but using it
|
95
|
+
# breaks things to the point of returning bad data.
|
96
|
+
# If you don't do this, the tabs in the input
|
97
|
+
# get converted to spaces, but that's better than the
|
98
|
+
# alternative.
|
99
|
+
# "\t" => "\\t".freeze,
|
100
|
+
}.freeze
|
101
|
+
STRING_ESCAPE_RE = /(#{Regexp.union(STRING_ESCAPES.keys)})/
|
102
|
+
|
103
|
+
def fetch_rows(sql)
|
104
|
+
execute(sql) do |cursor|
|
105
|
+
@columns = cursor.columns.map!{|c| output_identifier(c)}
|
106
|
+
cursor.typecast_map['timestamp'] = db.method(:to_application_timestamp)
|
107
|
+
cursor.each do |row|
|
108
|
+
yield row
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# Unlike the jdbc/hive2 driver, the impala driver requires you escape
|
116
|
+
# some values in string literals to get correct results, but not the
|
117
|
+
# tab character or things break.
|
118
|
+
def literal_string_append(sql, s)
|
119
|
+
sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'sequel/adapters/shared/impala'
|
2
|
+
|
3
|
+
Sequel::JDBC.load_driver('org.apache.hive.jdbc.HiveDriver', :Hive2)
|
4
|
+
|
5
|
+
module Sequel
|
6
|
+
module JDBC
|
7
|
+
Sequel.synchronize do
|
8
|
+
DATABASE_SETUP[:hive2] = proc do |db|
|
9
|
+
db.extend(Sequel::JDBC::Hive2::DatabaseMethods)
|
10
|
+
db.dataset_class = Sequel::JDBC::Hive2::Dataset
|
11
|
+
org.apache.hive.jdbc.HiveDriver
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module Hive2
|
16
|
+
module DatabaseMethods
|
17
|
+
extend Sequel::Database::ResetIdentifierMangling
|
18
|
+
include Sequel::Impala::DatabaseMethods
|
19
|
+
end
|
20
|
+
|
21
|
+
class Dataset < JDBC::Dataset
|
22
|
+
include Sequel::Impala::DatasetMethods
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,635 @@
|
|
1
|
+
module Sequel
|
2
|
+
module Impala
|
3
|
+
module DatabaseMethods
|
4
|
+
# Do not use a composite primary key, foreign keys, or an
|
5
|
+
# index when creating a join table, as Impala doesn't support those.
|
6
|
+
def create_join_table(hash, options=OPTS)
|
7
|
+
keys = hash.keys.sort_by(&:to_s)
|
8
|
+
create_table(join_table_name(hash, options), options) do
|
9
|
+
keys.each do |key|
|
10
|
+
Integer key
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# Create a database/schema in Imapala.
|
16
|
+
#
|
17
|
+
# Options:
|
18
|
+
# :if_not_exists :: Don't raise an error if the schema already exists.
|
19
|
+
# :location :: Set the file system location to store the data for tables
|
20
|
+
# in the created schema.
|
21
|
+
#
|
22
|
+
# Examples:
|
23
|
+
#
|
24
|
+
# create_schema(:s)
|
25
|
+
# # CREATE SCHEMA `s`
|
26
|
+
#
|
27
|
+
# create_schema(:s, :if_not_exists=>true)
|
28
|
+
# # CREATE SCHEMA IF NOT EXISTS `s`
|
29
|
+
#
|
30
|
+
# create_schema(:s, :location=>'/a/b')
|
31
|
+
# # CREATE SCHEMA `s` LOCATION '/a/b'
|
32
|
+
def create_schema(schema, options=OPTS)
|
33
|
+
run(create_schema_sql(schema, options))
|
34
|
+
end
|
35
|
+
|
36
|
+
# Set the database_type for this database to :impala.
|
37
|
+
def database_type
|
38
|
+
:impala
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return the DESCRIBE output for the table, showing table
|
42
|
+
# columns, types, and comments. If the :formatted option
|
43
|
+
# is given, use DESCRIBE FORMATTED and return a lot more
|
44
|
+
# information about the table. Both of these return arrays
|
45
|
+
# of hashes.
|
46
|
+
#
|
47
|
+
# Examples:
|
48
|
+
#
|
49
|
+
# describe(:t)
|
50
|
+
# # DESCRIBE `t`
|
51
|
+
#
|
52
|
+
# describe(:t, :formatted=>true)
|
53
|
+
# # DESCRIBE FORMATTED `t`
|
54
|
+
def describe(table, opts=OPTS)
|
55
|
+
if ds = opts[:dataset]
|
56
|
+
ds = ds.naked
|
57
|
+
else
|
58
|
+
ds = dataset.clone
|
59
|
+
ds.identifier_input_method = identifier_input_method
|
60
|
+
end
|
61
|
+
ds.identifier_output_method = nil
|
62
|
+
ds.with_sql("DESCRIBE #{'FORMATTED ' if opts[:formatted]} ?", table).all
|
63
|
+
end
|
64
|
+
|
65
|
+
# Drop a database/schema from Imapala.
|
66
|
+
#
|
67
|
+
# Options:
|
68
|
+
# :if_exists :: Don't raise an error if the schema doesn't exist.
|
69
|
+
#
|
70
|
+
# Examples:
|
71
|
+
#
|
72
|
+
# drop_schema(:s)
|
73
|
+
# # DROP SCHEMA `s`
|
74
|
+
#
|
75
|
+
# create_schema(:s, :if_exists=>true)
|
76
|
+
# # DROP SCHEMA IF EXISTS `s`
|
77
|
+
def drop_schema(schema, options=OPTS)
|
78
|
+
run(drop_schema_sql(schema, options))
|
79
|
+
end
|
80
|
+
|
81
|
+
# Implicitly quailfy the table if using the :search_path option.
|
82
|
+
# This will look at all of the tables and views in the schemas,
|
83
|
+
# and if an unqualified table is used and appears in one of the
|
84
|
+
# schemas, it will be implicitly qualified with the given schema
|
85
|
+
# name.
|
86
|
+
def implicit_qualify(table)
|
87
|
+
return table unless opts[:search_path]
|
88
|
+
|
89
|
+
case table
|
90
|
+
when Symbol
|
91
|
+
s, t, a = Sequel.split_symbol(table)
|
92
|
+
if s
|
93
|
+
return table
|
94
|
+
end
|
95
|
+
t = implicit_qualify(t)
|
96
|
+
a ? Sequel.as(t, a) : t
|
97
|
+
when String
|
98
|
+
if schema = search_path_table_schemas[table]
|
99
|
+
Sequel.qualify(schema, table)
|
100
|
+
else
|
101
|
+
table
|
102
|
+
end
|
103
|
+
when SQL::Identifier
|
104
|
+
implicit_qualify(table.value.to_s)
|
105
|
+
when SQL::AliasedExpression
|
106
|
+
SQL::AliasedExpression.new(implicit_qualify(table), v.alias)
|
107
|
+
else
|
108
|
+
table
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Load data from HDFS into Impala.
|
113
|
+
#
|
114
|
+
# Options:
|
115
|
+
# :overwrite :: Overwrite the existing table instead of appending to it.
|
116
|
+
#
|
117
|
+
# Examples:
|
118
|
+
#
|
119
|
+
# load_data('/user/foo', :bar)
|
120
|
+
# LOAD DATA INPATH '/user/foo' INTO TABLE `bar`
|
121
|
+
#
|
122
|
+
# load_data('/user/foo', :bar, :overwrite=>true)
|
123
|
+
# LOAD DATA INPATH '/user/foo' OVERWRITE INTO TABLE `bar`
|
124
|
+
def load_data(path, table, options=OPTS)
|
125
|
+
run(load_data_sql(path, table, options))
|
126
|
+
end
|
127
|
+
|
128
|
+
# Don't use PRIMARY KEY or AUTOINCREMENT on Impala, as Impala doesn't
|
129
|
+
# support either.
|
130
|
+
def serial_primary_key_options
|
131
|
+
{:type=>Integer}
|
132
|
+
end
|
133
|
+
|
134
|
+
# Impala supports CREATE TABLE IF NOT EXISTS.
|
135
|
+
def supports_create_table_if_not_exists?
|
136
|
+
true
|
137
|
+
end
|
138
|
+
|
139
|
+
# Impala does not support foreign keys.
|
140
|
+
def supports_foreign_key_parsing?
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
# Impala does not support indexes.
|
145
|
+
def supports_index_parsing?
|
146
|
+
false
|
147
|
+
end
|
148
|
+
|
149
|
+
# Check that the tables returned by the JDBC driver are actually valid
|
150
|
+
# tables and not views. The Hive2 JDBC driver returns views when listing
|
151
|
+
# tables and nothing when listing views.
|
152
|
+
def tables(opts=OPTS)
|
153
|
+
_tables(opts).select{|t| is_valid_table?(t)}
|
154
|
+
end
|
155
|
+
|
156
|
+
# Impala doesn't support transactions, so instead of issuing a
|
157
|
+
# transaction, just checkout a connection. This ensures the same
|
158
|
+
# connection is used for the transaction block, but as Impala
|
159
|
+
# doesn't support transactions, you can't rollback.
|
160
|
+
def transaction(opts=OPTS)
|
161
|
+
synchronize(opts[:server]) do |c|
|
162
|
+
yield c
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Determine the available views for listing all tables via JDBC (which
|
167
|
+
# includes both tables and views), and removing all valid tables.
|
168
|
+
def views(opts=OPTS)
|
169
|
+
_tables(opts).reject{|t| is_valid_table?(t)}
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
def _tables(opts)
|
175
|
+
m = output_identifier_meth
|
176
|
+
metadata_dataset.with_sql("SHOW TABLES#{" IN #{quote_identifier(opts[:schema])}" if opts[:schema]}").
|
177
|
+
select_map(:name).map do |table|
|
178
|
+
m.call(table)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Impala uses ADD COLUMNS instead of ADD COLUMN. As its use of
|
183
|
+
# ADD COLUMNS implies, it supports adding multiple columns at once,
|
184
|
+
# but this adapter doesn't offer an API for that.
|
185
|
+
def alter_table_add_column_sql(table, op)
|
186
|
+
"ADD COLUMNS (#{column_definition_sql(op)})"
|
187
|
+
end
|
188
|
+
|
189
|
+
# Impala uses CHANGE instead of having separate RENAME syntax
|
190
|
+
# for renaming tables. As CHANGE requires a type, look up the
|
191
|
+
# type from the database schema.
|
192
|
+
def alter_table_rename_column_sql(table, op)
|
193
|
+
old_name = op[:name]
|
194
|
+
opts = schema(table).find{|x| x.first == old_name}
|
195
|
+
opts = opts ? opts.last : {}
|
196
|
+
unless opts[:db_type]
|
197
|
+
raise Error, "cannot determine database type to use for CHANGE COLUMN operation"
|
198
|
+
end
|
199
|
+
new_col = op.merge(:type=>opts[:db_type], :name=>op[:new_name])
|
200
|
+
"CHANGE #{quote_identifier(old_name)} #{column_definition_sql(new_col)}"
|
201
|
+
end
|
202
|
+
|
203
|
+
def alter_table_set_column_type_sql(table, op)
|
204
|
+
"CHANGE #{quote_identifier(op[:name])} #{column_definition_sql(op)}"
|
205
|
+
end
|
206
|
+
|
207
|
+
# Add COMMENT when defining the column, if :comment is present.
|
208
|
+
def column_definition_comment_sql(sql, column)
|
209
|
+
sql << " COMMENT #{literal(column[:comment])}" if column[:comment]
|
210
|
+
end
|
211
|
+
|
212
|
+
def column_definition_order
|
213
|
+
[:comment]
|
214
|
+
end
|
215
|
+
|
216
|
+
def create_schema_sql(schema, options)
|
217
|
+
"CREATE SCHEMA #{'IF NOT EXISTS ' if options[:if_not_exists]}#{quote_identifier(schema)}#{" LOCATION #{literal(options[:location])}" if options[:location]}"
|
218
|
+
end
|
219
|
+
|
220
|
+
# Support using table parameters for CREATE TABLE AS, necessary for
|
221
|
+
# creating parquet files from datasets.
|
222
|
+
def create_table_as_sql(name, sql, options)
|
223
|
+
"#{create_table_prefix_sql(name, options)}#{create_table_parameters_sql(options) } AS #{sql}"
|
224
|
+
end
|
225
|
+
|
226
|
+
def create_table_prefix_sql(name, options)
|
227
|
+
"CREATE #{'EXTERNAL ' if options[:external]}TABLE#{' IF NOT EXISTS' if options[:if_not_exists]} #{quote_schema_table(name)}"
|
228
|
+
end
|
229
|
+
|
230
|
+
def create_table_sql(name, generator, options)
|
231
|
+
sql = super
|
232
|
+
sql << create_table_parameters_sql(options)
|
233
|
+
sql
|
234
|
+
end
|
235
|
+
|
236
|
+
def create_table_parameters_sql(options)
|
237
|
+
sql = ""
|
238
|
+
sql << " COMMENT #{literal(options[:comment])}" if options[:comment]
|
239
|
+
if options[:field_term] || options[:line_term]
|
240
|
+
sql << " ROW FORMAT DELIMITED"
|
241
|
+
if options[:field_term]
|
242
|
+
sql << " FIELDS TERMINATED BY #{literal(options[:field_term])}"
|
243
|
+
sql << " ESCAPED BY #{literal(options[:field_escape])}" if options[:field_escape]
|
244
|
+
end
|
245
|
+
if options[:line_term]
|
246
|
+
sql << " LINES TERMINATED BY #{literal(options[:line_term])}"
|
247
|
+
end
|
248
|
+
end
|
249
|
+
sql << " STORED AS #{options[:stored_as]}" if options[:stored_as]
|
250
|
+
sql << " LOCATION #{literal(options[:location])}" if options[:location]
|
251
|
+
sql
|
252
|
+
end
|
253
|
+
|
254
|
+
def drop_schema_sql(schema, options)
|
255
|
+
"DROP SCHEMA #{'IF EXISTS ' if options[:if_exists]}#{quote_identifier(schema)}"
|
256
|
+
end
|
257
|
+
|
258
|
+
# Impala folds identifiers to lowercase, quoted or not, and is actually
|
259
|
+
# case insensitive, so don't use an identifier input or output method.
|
260
|
+
def identifier_input_method_default
|
261
|
+
nil
|
262
|
+
end
|
263
|
+
def identifier_output_method_default
|
264
|
+
nil
|
265
|
+
end
|
266
|
+
|
267
|
+
def search_path_table_schemas
|
268
|
+
@search_path_table_schemas ||= begin
|
269
|
+
search_path = opts[:search_path]
|
270
|
+
search_path = search_path.split(',') if search_path.is_a?(String)
|
271
|
+
table_schemas = {}
|
272
|
+
search_path.reverse_each do |schema|
|
273
|
+
_tables(:schema=>schema).each do |table|
|
274
|
+
table_schemas[table.to_s] = schema.to_s
|
275
|
+
end
|
276
|
+
end
|
277
|
+
table_schemas
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# SHOW TABLE STATS will raise an error if given a view and not a table,
|
282
|
+
# so use that to differentiate tables from views.
|
283
|
+
def is_valid_table?(t)
|
284
|
+
rows = describe(t, :formatted=>true)
|
285
|
+
if row = rows.find{|r| r[:name].to_s.strip == 'Table Type:'}
|
286
|
+
row[:type].to_s.strip !~ /VIEW/
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
def load_data_sql(path, table, options)
|
291
|
+
"LOAD DATA INPATH #{literal(path)}#{' OVERWRITE' if options[:overwrite]} INTO TABLE #{literal(table)}"
|
292
|
+
end
|
293
|
+
|
294
|
+
# Metadata queries on JDBC use uppercase keys, so set the identifier
|
295
|
+
# output method to downcase so that metadata queries work correctly.
|
296
|
+
def metadata_dataset
|
297
|
+
@metadata_dataset ||= (
|
298
|
+
ds = dataset;
|
299
|
+
ds.identifier_input_method = identifier_input_method_default;
|
300
|
+
ds.identifier_output_method = :downcase;
|
301
|
+
ds
|
302
|
+
)
|
303
|
+
end
|
304
|
+
|
305
|
+
# Impala doesn't support date columns yet, so use timestamp until date
|
306
|
+
# is natively supported.
|
307
|
+
def type_literal_generic_date(column)
|
308
|
+
:timestamp
|
309
|
+
end
|
310
|
+
|
311
|
+
# Impala uses double instead of "double precision" for floating point
|
312
|
+
# values.
|
313
|
+
def type_literal_generic_float(column)
|
314
|
+
:double
|
315
|
+
end
|
316
|
+
|
317
|
+
# Impala uses decimal instead of numeric for arbitrary precision
|
318
|
+
# numeric values.
|
319
|
+
def type_literal_generic_numeric(column)
|
320
|
+
column[:size] ? "decimal(#{Array(column[:size]).join(', ')})" : :decimal
|
321
|
+
end
|
322
|
+
|
323
|
+
# Use char or varchar if given a size, otherwise use string.
|
324
|
+
# Using a size is not recommend, as Impala doesn't implicitly
|
325
|
+
# cast string values to char or varchar, and doesn't implicitly
|
326
|
+
# cast from different sizes of varchar.
|
327
|
+
def type_literal_generic_string(column)
|
328
|
+
if size = column[:size]
|
329
|
+
"#{'var' unless column[:fixed]}char(#{size})"
|
330
|
+
else
|
331
|
+
:string
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
module DatasetMethods
|
337
|
+
BACKTICK = '`'.freeze
|
338
|
+
APOS = "'".freeze
|
339
|
+
STRING_ESCAPE_RE = /([\\'])/
|
340
|
+
STRING_ESCAPE_REPLACE = '\\\\\1'.freeze
|
341
|
+
BOOL_TRUE = 'true'.freeze
|
342
|
+
BOOL_FALSE = 'false'.freeze
|
343
|
+
CONSTANT_LITERAL_MAP = {:CURRENT_TIMESTAMP=>'now()'.freeze}.freeze
|
344
|
+
PAREN_OPEN = Dataset::PAREN_OPEN
|
345
|
+
PAREN_CLOSE = Dataset::PAREN_CLOSE
|
346
|
+
SPACE = Dataset::SPACE
|
347
|
+
NOT = 'NOT '.freeze
|
348
|
+
REGEXP = ' REGEXP '.freeze
|
349
|
+
EXCEPT_SOURCE_COLUMN = :__source__
|
350
|
+
|
351
|
+
Dataset.def_sql_method(self, :select, %w'with select distinct columns from join where group having compounds order limit')
|
352
|
+
|
353
|
+
# Handle string concatenation using the concat string function.
|
354
|
+
# Don't use the ESCAPE syntax when using LIKE/NOT LIKE, as
|
355
|
+
# Impala doesn't support escaping LIKE metacharacters.
|
356
|
+
# Support regexps on Impala using the REGEXP operator.
|
357
|
+
# For cast insensitive regexps, cast both values to uppercase first.
|
358
|
+
def complex_expression_sql_append(sql, op, args)
|
359
|
+
case op
|
360
|
+
when :'||'
|
361
|
+
literal_append(sql, Sequel.function(:concat, *args))
|
362
|
+
when :LIKE, :'NOT LIKE'
|
363
|
+
sql << PAREN_OPEN
|
364
|
+
literal_append(sql, args.at(0))
|
365
|
+
sql << SPACE << op.to_s << SPACE
|
366
|
+
literal_append(sql, args.at(1))
|
367
|
+
sql << PAREN_CLOSE
|
368
|
+
when :~, :'!~', :'~*', :'!~*'
|
369
|
+
if op == :'~*' || op == :'!~*'
|
370
|
+
args = args.map{|a| Sequel.function(:upper, a)}
|
371
|
+
end
|
372
|
+
sql << NOT if op == :'!~' || op == :'!~*'
|
373
|
+
sql << PAREN_OPEN
|
374
|
+
literal_append(sql, args.at(0))
|
375
|
+
sql << REGEXP
|
376
|
+
literal_append(sql, args.at(1))
|
377
|
+
sql << PAREN_CLOSE
|
378
|
+
else
|
379
|
+
super
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
# Use now() for current timestamp, as Impala doesn't support
|
384
|
+
# CURRENT_TIMESTAMP.
|
385
|
+
def constant_sql_append(sql, constant)
|
386
|
+
sql << CONSTANT_LITERAL_MAP.fetch(constant, constant.to_s)
|
387
|
+
end
|
388
|
+
|
389
|
+
# Use the addition operator combined with interval types to
|
390
|
+
# handle date arithmetic when using the date_arithmetic
|
391
|
+
# extension.
|
392
|
+
def date_add_sql_append(sql, da)
|
393
|
+
h = da.interval
|
394
|
+
expr = da.expr
|
395
|
+
intervals = []
|
396
|
+
each_valid_interval_unit(h, Sequel::SQL::DateAdd::DatasetMethods::DEF_DURATION_UNITS) do |value, sql_unit|
|
397
|
+
intervals << Sequel.lit("INTERVAL #{value} #{sql_unit}")
|
398
|
+
end
|
399
|
+
if intervals.empty?
|
400
|
+
return literal_append(sql, Sequel.cast(expr, Time))
|
401
|
+
else
|
402
|
+
intervals.unshift(Sequel.cast(expr, Time))
|
403
|
+
return complex_expression_sql_append(sql, :+, intervals)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
# DELETE is emulated on Impala and doesn't return the number of
|
408
|
+
# modified rows.
|
409
|
+
def delete
|
410
|
+
super
|
411
|
+
nil
|
412
|
+
end
|
413
|
+
|
414
|
+
# Emulate DELETE using INSERT OVERWRITE selecting all columns from
|
415
|
+
# the table, with a reversed condition used for WHERE.
|
416
|
+
def delete_sql
|
417
|
+
sql = "INSERT OVERWRITE "
|
418
|
+
source_list_append(sql, opts[:from])
|
419
|
+
sql << " SELECT * FROM "
|
420
|
+
source_list_append(sql, opts[:from])
|
421
|
+
if where = opts[:where]
|
422
|
+
sql << " WHERE NOT ("
|
423
|
+
literal_append(sql, where)
|
424
|
+
sql << ")"
|
425
|
+
else
|
426
|
+
sql << " WHERE false"
|
427
|
+
end
|
428
|
+
sql
|
429
|
+
end
|
430
|
+
|
431
|
+
# Implicitly qualify tables if using the :search_path database option.
|
432
|
+
def from(*)
|
433
|
+
ds = super
|
434
|
+
ds.opts[:from].map!{|t| db.implicit_qualify(t)}
|
435
|
+
ds
|
436
|
+
end
|
437
|
+
|
438
|
+
# Implicitly qualify tables if using the :search_path database option.
|
439
|
+
def join_table(type, table, expr=nil, options=OPTS, &block)
|
440
|
+
super(type, db.implicit_qualify(table), expr, options, &block)
|
441
|
+
end
|
442
|
+
|
443
|
+
# Emulate TRUNCATE by using INSERT OVERWRITE selecting all columns
|
444
|
+
# from the table, with WHERE false.
|
445
|
+
def truncate_sql
|
446
|
+
ds = clone
|
447
|
+
ds.opts.delete(:where)
|
448
|
+
ds.delete_sql
|
449
|
+
end
|
450
|
+
|
451
|
+
# Don't remove an order, because that breaks things when offsets
|
452
|
+
# are used, as Impala requires an order when using an offset.
|
453
|
+
def empty?
|
454
|
+
get(Sequel::SQL::AliasedExpression.new(1, :one)).nil?
|
455
|
+
end
|
456
|
+
|
457
|
+
# Emulate INTERSECT using a UNION ALL and checking for values in only the first table.
|
458
|
+
def except(other, opts=OPTS)
|
459
|
+
raise(InvalidOperation, "EXCEPT ALL not supported") if opts[:all]
|
460
|
+
raise(InvalidOperation, "The :from_self=>false option to except is not supported") if opts[:from_self] == false
|
461
|
+
cols = columns
|
462
|
+
rhs = other.from_self.select_group(*other.columns).select_append(Sequel.expr(2).as(EXCEPT_SOURCE_COLUMN))
|
463
|
+
from_self.
|
464
|
+
select_group(*cols).
|
465
|
+
select_append(Sequel.expr(1).as(EXCEPT_SOURCE_COLUMN)).
|
466
|
+
union(rhs, all: true).
|
467
|
+
select_group(*cols).
|
468
|
+
having{{count{}.* => 1, min(EXCEPT_SOURCE_COLUMN) => 1}}.
|
469
|
+
from_self(opts)
|
470
|
+
end
|
471
|
+
|
472
|
+
# Use INSERT OVERWRITE instead of INSERT INTO when inserting into this dataset:
|
473
|
+
#
|
474
|
+
# DB[:table].insert_overwrite.insert(DB[:other])
|
475
|
+
# # INSERT OVERWRITE table SELECT * FROM other
|
476
|
+
def insert_overwrite
|
477
|
+
clone(:insert_overwrite=>true)
|
478
|
+
end
|
479
|
+
|
480
|
+
# Impala does not support INSERT DEFAULT VALUES.
|
481
|
+
def insert_supports_empty_values?
|
482
|
+
false
|
483
|
+
end
|
484
|
+
|
485
|
+
# Emulate INTERSECT using a UNION ALL and checking for values in both tables.
|
486
|
+
def intersect(other, opts=OPTS)
|
487
|
+
raise(InvalidOperation, "INTERSECT ALL not supported") if opts[:all]
|
488
|
+
raise(InvalidOperation, "The :from_self=>false option to intersect is not supported") if opts[:from_self] == false
|
489
|
+
cols = columns
|
490
|
+
from_self.
|
491
|
+
select_group(*cols).
|
492
|
+
union(other.from_self.select_group(*other.columns), all: true).
|
493
|
+
select_group(*cols).
|
494
|
+
having{count{}.* > 1}.
|
495
|
+
from_self(opts)
|
496
|
+
end
|
497
|
+
|
498
|
+
# Impala supports non-recursive common table expressions.
|
499
|
+
def supports_cte?(type=:select)
|
500
|
+
true
|
501
|
+
end
|
502
|
+
|
503
|
+
# Impala doesn't support derived column lists when aliasing
|
504
|
+
# tables.
|
505
|
+
def supports_derived_column_lists?
|
506
|
+
false
|
507
|
+
end
|
508
|
+
|
509
|
+
# Impala doesn't support EXCEPT or INTERSECT, but support is emulated for them.
|
510
|
+
# However, EXCEPT ALL and INTERSECT ALL are not emulated.
|
511
|
+
def supports_intersect_except_all?
|
512
|
+
false
|
513
|
+
end
|
514
|
+
|
515
|
+
# Impala only support IS NULL, not IS TRUE or IS FALSE.
|
516
|
+
def supports_is_true?
|
517
|
+
false
|
518
|
+
end
|
519
|
+
|
520
|
+
# Impala doesn't support IN when used with multiple columns.
|
521
|
+
def supports_multiple_column_in?
|
522
|
+
false
|
523
|
+
end
|
524
|
+
|
525
|
+
# Impala supports regexps using the REGEXP operator.
|
526
|
+
def supports_regexp?
|
527
|
+
true
|
528
|
+
end
|
529
|
+
|
530
|
+
# Impala supports window functions.
|
531
|
+
def supports_window_functions?
|
532
|
+
true
|
533
|
+
end
|
534
|
+
|
535
|
+
# Create a parquet file from this dataset. +table+ should
|
536
|
+
# be the table name to create. To specify a path for the
|
537
|
+
# parquet file, use the :location option.
|
538
|
+
#
|
539
|
+
# Examples:
|
540
|
+
#
|
541
|
+
# DB[:t].to_parquet(:p)
|
542
|
+
# # CREATE TABLE `p` STORED AS parquet AS
|
543
|
+
# # SELECT * FROM `t`
|
544
|
+
#
|
545
|
+
# DB[:t].to_parquet(:p, :location=>'/a/b')
|
546
|
+
# # CREATE TABLE `p` STORED AS parquet LOCATION '/a/b'
|
547
|
+
# # SELECT * FROM `t`
|
548
|
+
def to_parquet(table, options=OPTS)
|
549
|
+
db.create_table(table, options.merge(:as=>self, :stored_as=>:parquet))
|
550
|
+
end
|
551
|
+
|
552
|
+
# UPDATE is emulated on Impala, and returns nil instead of the number of
|
553
|
+
# modified rows
|
554
|
+
def update(values=OPTS)
|
555
|
+
super
|
556
|
+
nil
|
557
|
+
end
|
558
|
+
|
559
|
+
# Emulate UPDATE using INSERT OVERWRITE AS SELECT. For all columns used
|
560
|
+
# in the given +values+, use a CASE statement. In the CASE statement,
|
561
|
+
# set the value to the new value if the row matches WHERE conditions of
|
562
|
+
# the current dataset, otherwise use the existing value.
|
563
|
+
def update_sql(values)
|
564
|
+
sql = "INSERT OVERWRITE "
|
565
|
+
source_list_append(sql, opts[:from])
|
566
|
+
sql << " SELECT "
|
567
|
+
comma = false
|
568
|
+
|
569
|
+
if where = opts[:where]
|
570
|
+
where = Sequel.lit(literal(where))
|
571
|
+
else
|
572
|
+
where = true
|
573
|
+
end
|
574
|
+
|
575
|
+
select_all.columns.each do |c|
|
576
|
+
if comma
|
577
|
+
sql << comma
|
578
|
+
else
|
579
|
+
comma = ', '
|
580
|
+
end
|
581
|
+
|
582
|
+
if values.has_key?(c)
|
583
|
+
new_value = values[c]
|
584
|
+
literal_append(sql, Sequel.case({where=>new_value}, c).as(c))
|
585
|
+
else
|
586
|
+
quote_identifier_append(sql, c)
|
587
|
+
end
|
588
|
+
end
|
589
|
+
sql << " FROM "
|
590
|
+
source_list_append(sql, opts[:from])
|
591
|
+
sql
|
592
|
+
end
|
593
|
+
|
594
|
+
private
|
595
|
+
|
596
|
+
# Impala doesn't handle the DEFAULT keyword used in inserts, as all default
|
597
|
+
# values in Impala are NULL, so just use a NULL value.
|
598
|
+
def insert_empty_columns_values
|
599
|
+
[[columns.last], [nil]]
|
600
|
+
end
|
601
|
+
|
602
|
+
def literal_true
|
603
|
+
BOOL_TRUE
|
604
|
+
end
|
605
|
+
|
606
|
+
def literal_false
|
607
|
+
BOOL_FALSE
|
608
|
+
end
|
609
|
+
|
610
|
+
def insert_into_sql(sql)
|
611
|
+
sql << (@opts[:insert_overwrite] ? ' OVERWRITE ' : ' INTO ')
|
612
|
+
identifier_append(sql, unaliased_identifier(@opts[:from].first))
|
613
|
+
end
|
614
|
+
|
615
|
+
# Double backslashes in all strings, and escape all apostrophes with
|
616
|
+
# backslashes.
|
617
|
+
def literal_string_append(sql, s)
|
618
|
+
sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE, STRING_ESCAPE_REPLACE) << APOS
|
619
|
+
end
|
620
|
+
|
621
|
+
# Impala doesn't support esacping of identifiers, so you can't use backtick in
|
622
|
+
# an identifier name.
|
623
|
+
def quoted_identifier_append(sql, name)
|
624
|
+
sql << BACKTICK << name.to_s << BACKTICK
|
625
|
+
end
|
626
|
+
|
627
|
+
# Don't include a LIMIT clause if there is no FROM clause. In general,
|
628
|
+
# such queries can only return 1 row.
|
629
|
+
def select_limit_sql(sql)
|
630
|
+
return unless opts[:from]
|
631
|
+
super
|
632
|
+
end
|
633
|
+
end
|
634
|
+
end
|
635
|
+
end
|