sequel_impala 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +50 -0
- data/LICENSE +463 -0
- data/README.md +45 -0
- data/Rakefile +39 -0
- data/lib/driver/commons-collections-3.2.1.jar +0 -0
- data/lib/driver/commons-configuration-1.10.jar +0 -0
- data/lib/driver/commons-logging-1.2.jar +0 -0
- data/lib/driver/hadoop-auth-2.9.0.jar +0 -0
- data/lib/driver/hadoop-common-2.9.0.jar +0 -0
- data/lib/driver/hadoop-core-2.6.0.jar +0 -0
- data/lib/driver/hive-exec-1.1.0.jar +0 -0
- data/lib/driver/hive-jdbc-1.1.0.jar +0 -0
- data/lib/driver/hive-metastore-1.1.0.jar +0 -0
- data/lib/driver/hive-service-1.1.0.jar +0 -0
- data/lib/driver/httpclient-4.3.jar +0 -0
- data/lib/driver/httpcore-4.3.jar +0 -0
- data/lib/driver/libfb303-0.9.0.jar +0 -0
- data/lib/driver/log4j-1.2.17.jar +0 -0
- data/lib/driver/slf4j-api-1.7.5.jar +0 -0
- data/lib/driver/stax2-api-3.1.4.jar +0 -0
- data/lib/driver/woodstox-core-asl-4.4.1.jar +0 -0
- data/lib/impala.rb +55 -0
- data/lib/impala/connection.rb +180 -0
- data/lib/impala/cursor.rb +200 -0
- data/lib/impala/progress_reporter.rb +40 -0
- data/lib/impala/protocol.rb +8 -0
- data/lib/impala/protocol/beeswax_constants.rb +15 -0
- data/lib/impala/protocol/beeswax_service.rb +747 -0
- data/lib/impala/protocol/beeswax_types.rb +193 -0
- data/lib/impala/protocol/exec_stats_constants.rb +13 -0
- data/lib/impala/protocol/exec_stats_types.rb +133 -0
- data/lib/impala/protocol/facebook_service.rb +706 -0
- data/lib/impala/protocol/fb303_constants.rb +15 -0
- data/lib/impala/protocol/fb303_types.rb +25 -0
- data/lib/impala/protocol/hive_metastore_constants.rb +53 -0
- data/lib/impala/protocol/hive_metastore_types.rb +698 -0
- data/lib/impala/protocol/impala_hive_server2_service.rb +137 -0
- data/lib/impala/protocol/impala_service.rb +443 -0
- data/lib/impala/protocol/impala_service_constants.rb +13 -0
- data/lib/impala/protocol/impala_service_types.rb +192 -0
- data/lib/impala/protocol/status_constants.rb +13 -0
- data/lib/impala/protocol/status_types.rb +46 -0
- data/lib/impala/protocol/t_c_l_i_service.rb +1108 -0
- data/lib/impala/protocol/t_c_l_i_service_constants.rb +72 -0
- data/lib/impala/protocol/t_c_l_i_service_types.rb +1802 -0
- data/lib/impala/protocol/thrift_hive_metastore.rb +4707 -0
- data/lib/impala/protocol/types_constants.rb +13 -0
- data/lib/impala/protocol/types_types.rb +332 -0
- data/lib/impala/sasl_transport.rb +117 -0
- data/lib/impala/thrift_patch.rb +31 -0
- data/lib/impala/version.rb +3 -0
- data/lib/jdbc/hive2.rb +52 -0
- data/lib/jdbc/impala.rb +50 -0
- data/lib/rbhive.rb +8 -0
- data/lib/rbhive/connection.rb +150 -0
- data/lib/rbhive/explain_result.rb +46 -0
- data/lib/rbhive/result_set.rb +37 -0
- data/lib/rbhive/schema_definition.rb +86 -0
- data/lib/rbhive/t_c_l_i_connection.rb +466 -0
- data/lib/rbhive/t_c_l_i_result_set.rb +3 -0
- data/lib/rbhive/t_c_l_i_schema_definition.rb +87 -0
- data/lib/rbhive/table_schema.rb +122 -0
- data/lib/rbhive/version.rb +3 -0
- data/lib/sequel/adapters/impala.rb +220 -0
- data/lib/sequel/adapters/jdbc/hive2.rb +36 -0
- data/lib/sequel/adapters/jdbc/impala.rb +38 -0
- data/lib/sequel/adapters/rbhive.rb +177 -0
- data/lib/sequel/adapters/shared/impala.rb +808 -0
- data/lib/sequel/extensions/csv_to_parquet.rb +166 -0
- data/lib/thrift/facebook_service.rb +700 -0
- data/lib/thrift/fb303_constants.rb +9 -0
- data/lib/thrift/fb303_types.rb +19 -0
- data/lib/thrift/hive_metastore_constants.rb +41 -0
- data/lib/thrift/hive_metastore_types.rb +630 -0
- data/lib/thrift/hive_service_constants.rb +13 -0
- data/lib/thrift/hive_service_types.rb +72 -0
- data/lib/thrift/queryplan_constants.rb +13 -0
- data/lib/thrift/queryplan_types.rb +261 -0
- data/lib/thrift/sasl_client_transport.rb +161 -0
- data/lib/thrift/serde_constants.rb +92 -0
- data/lib/thrift/serde_types.rb +7 -0
- data/lib/thrift/t_c_l_i_service.rb +1054 -0
- data/lib/thrift/t_c_l_i_service_constants.rb +72 -0
- data/lib/thrift/t_c_l_i_service_types.rb +1768 -0
- data/lib/thrift/thrift_hive.rb +508 -0
- data/lib/thrift/thrift_hive_metastore.rb +3856 -0
- data/spec/database_test.rb +56 -0
- data/spec/dataset_test.rb +1268 -0
- data/spec/files/bad_down_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_down_migration/002_create_alt_advanced.rb +4 -0
- data/spec/files/bad_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/bad_timestamped_migrations/1273253853_3_create_users.rb +3 -0
- data/spec/files/bad_up_migration/001_create_alt_basic.rb +4 -0
- data/spec/files/bad_up_migration/002_create_alt_advanced.rb +3 -0
- data/spec/files/convert_to_timestamp_migrations/001_create_sessions.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/002_create_nodes.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/003_3_create_users.rb +4 -0
- data/spec/files/convert_to_timestamp_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/convert_to_timestamp_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_nodes.rb +9 -0
- data/spec/files/duplicate_timestamped_migrations/1273253853_create_users.rb +4 -0
- data/spec/files/integer_migrations/001_create_sessions.rb +9 -0
- data/spec/files/integer_migrations/002_create_nodes.rb +9 -0
- data/spec/files/integer_migrations/003_3_create_users.rb +4 -0
- data/spec/files/interleaved_timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253850_create_artists.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253852_create_albums.rb +9 -0
- data/spec/files/interleaved_timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/files/reversible_migrations/001_reversible.rb +5 -0
- data/spec/files/reversible_migrations/002_reversible.rb +5 -0
- data/spec/files/reversible_migrations/003_reversible.rb +5 -0
- data/spec/files/reversible_migrations/004_reversible.rb +5 -0
- data/spec/files/reversible_migrations/005_reversible.rb +10 -0
- data/spec/files/timestamped_migrations/1273253849_create_sessions.rb +9 -0
- data/spec/files/timestamped_migrations/1273253851_create_nodes.rb +9 -0
- data/spec/files/timestamped_migrations/1273253853_3_create_users.rb +4 -0
- data/spec/impala_test.rb +290 -0
- data/spec/migrator_test.rb +240 -0
- data/spec/plugin_test.rb +91 -0
- data/spec/prepared_statement_test.rb +327 -0
- data/spec/schema_test.rb +356 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/timezone_test.rb +86 -0
- data/spec/type_test.rb +99 -0
- metadata +294 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
|
|
3
|
+
module RBHive
|
|
4
|
+
class TCLISchemaDefinition
|
|
5
|
+
attr_reader :schema
|
|
6
|
+
|
|
7
|
+
NAN = Float::NAN rescue 0.0/0.0
|
|
8
|
+
INFINITY = Float::INFINITY rescue 1.0/0.0
|
|
9
|
+
TYPES = {
|
|
10
|
+
:boolean => :to_s,
|
|
11
|
+
:string => :to_s,
|
|
12
|
+
:float => :to_f,
|
|
13
|
+
:double => :to_f,
|
|
14
|
+
:int => :to_i,
|
|
15
|
+
:bigint => :to_i,
|
|
16
|
+
:smallint => :to_i,
|
|
17
|
+
:tinyint => :to_i,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
def initialize(schema, example_row)
|
|
21
|
+
@schema = schema
|
|
22
|
+
@example_row = example_row ? example_row.colVals : []
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def column_names
|
|
26
|
+
@column_names ||= begin
|
|
27
|
+
schema_names = @schema.columns.map {|c| c.columnName }
|
|
28
|
+
|
|
29
|
+
# In rare cases Hive can return two identical column names
|
|
30
|
+
# consider SELECT a.foo, b.foo...
|
|
31
|
+
# in this case you get two columns called foo with no disambiguation.
|
|
32
|
+
# as a (far from ideal) solution we detect this edge case and rename them
|
|
33
|
+
# a.foo => foo1, b.foo => foo2
|
|
34
|
+
# otherwise we will trample one of the columns during Hash mapping.
|
|
35
|
+
s = Hash.new(0)
|
|
36
|
+
schema_names.map! { |c| s[c] += 1; s[c] > 1 ? "#{c}---|---#{s[c]}" : c }
|
|
37
|
+
schema_names.map! { |c| s[c] > 1 ? "#{c}---|---1" : c }
|
|
38
|
+
schema_names.map! { |c| c.gsub('---|---', '_').to_sym }
|
|
39
|
+
|
|
40
|
+
# Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries
|
|
41
|
+
# For now we will call them :_p1, :_p2, etc. to avoid collisions.
|
|
42
|
+
offset = 0
|
|
43
|
+
while schema_names.length < @example_row.length
|
|
44
|
+
schema_names.push(:"_p#{offset+=1}")
|
|
45
|
+
end
|
|
46
|
+
schema_names
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def column_type_map
|
|
51
|
+
@column_type_map ||= column_names.inject({}) do |hsh, c|
|
|
52
|
+
definition = @schema.columns.find {|s| s.columnName.to_sym == c }
|
|
53
|
+
# If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings
|
|
54
|
+
type = TYPE_NAMES[definition.typeDesc.types.first.primitiveEntry.type].downcase rescue nil
|
|
55
|
+
hsh[c] = definition && type ? type.to_sym : :string
|
|
56
|
+
hsh
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def coerce_row(row)
|
|
61
|
+
column_names.zip(row.colVals.map(&:get_value).map(&:value)).inject({}) do |hsh, (column_name, value)|
|
|
62
|
+
hsh[column_name] = coerce_column(column_name, value)
|
|
63
|
+
hsh
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def coerce_column(column_name, value)
|
|
68
|
+
type = column_type_map[column_name]
|
|
69
|
+
return INFINITY if (type != :string && value == "Infinity")
|
|
70
|
+
return NAN if (type != :string && value == "NaN")
|
|
71
|
+
return coerce_complex_value(value) if type.to_s =~ /^array/
|
|
72
|
+
conversion_method = TYPES[type]
|
|
73
|
+
conversion_method ? value.send(conversion_method) : value
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def coerce_row_to_array(row)
|
|
77
|
+
column_names.map { |n| row[n] }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def coerce_complex_value(value)
|
|
81
|
+
return nil if value.nil?
|
|
82
|
+
return nil if value.length == 0
|
|
83
|
+
return nil if value == 'null'
|
|
84
|
+
JSON.parse(value)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
module RBHive
|
|
2
|
+
class TableSchema
|
|
3
|
+
attr_accessor :name
|
|
4
|
+
attr_reader :columns, :partitions
|
|
5
|
+
def initialize(name, comment=nil, options={}, &blk)
|
|
6
|
+
@name, @comment = name, comment
|
|
7
|
+
@location = options[:location] || nil
|
|
8
|
+
@field_sep = options[:field_sep] || "\t"
|
|
9
|
+
@line_sep = options[:line_sep] || "\n"
|
|
10
|
+
@collection_sep = options[:collection_sep] || "|"
|
|
11
|
+
@stored_as = options[:stored_as] || :textfile
|
|
12
|
+
@columns = []
|
|
13
|
+
@partitions = []
|
|
14
|
+
@serde_name = nil
|
|
15
|
+
@serde_properties = {}
|
|
16
|
+
instance_eval(&blk) if blk
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def column(name, type, comment=nil)
|
|
20
|
+
@columns << Column.new(name, type, comment)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def partition(name, type, comment=nil)
|
|
24
|
+
@partitions << Column.new(name, type, comment)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def serde(name, properties={})
|
|
28
|
+
@serde_name = name
|
|
29
|
+
@serde_properties = properties
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def create_table_statement()
|
|
33
|
+
%[CREATE #{external}TABLE #{table_statement}
|
|
34
|
+
ROW FORMAT #{row_format_statement}
|
|
35
|
+
STORED AS #{stored_as}
|
|
36
|
+
#{location}]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def stored_as
|
|
40
|
+
@stored_as.to_s.upcase
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def row_format_statement
|
|
44
|
+
if @serde_name
|
|
45
|
+
serde_statement
|
|
46
|
+
else
|
|
47
|
+
delimited_statement
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def delimited_statement
|
|
52
|
+
%(DELIMITED
|
|
53
|
+
FIELDS TERMINATED BY '#{@field_sep}'
|
|
54
|
+
COLLECTION ITEMS TERMINATED BY '#{@collection_sep}'
|
|
55
|
+
LINES TERMINATED BY '#{@line_sep}')
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def serde_statement
|
|
59
|
+
%(SERDE '#{@serde_name}'\n#{serde_properties_statement})
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def serde_properties_statement
|
|
63
|
+
return '' unless @serde_properties.any?
|
|
64
|
+
kvs = @serde_properties.map { |k,v| %("#{k}" = "#{v}") }.join(",\n")
|
|
65
|
+
%(WITH SERDEPROPERTIES (#{kvs}))
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def replace_columns_statement
|
|
69
|
+
alter_columns_statement("REPLACE")
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def add_columns_statement
|
|
73
|
+
alter_columns_statement("ADD")
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def to_s
|
|
77
|
+
table_statement
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def external
|
|
83
|
+
@location.nil? ? '' : 'EXTERNAL '
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def table_statement
|
|
87
|
+
comment_string = (@comment.nil? ? '' : " COMMENT '#{@comment}'")
|
|
88
|
+
%[`#{@name}` #{column_statement}#{comment_string}\n#{partition_statement}]
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def location
|
|
92
|
+
@location.nil? ? '' : "LOCATION '#{@location}'"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def alter_columns_statement(add_or_replace)
|
|
96
|
+
%[ALTER TABLE `#{name}` #{add_or_replace} COLUMNS #{column_statement}]
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def column_statement
|
|
100
|
+
cols = @columns.join(",\n")
|
|
101
|
+
"(\n#{cols}\n)"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def partition_statement
|
|
105
|
+
return "" if @partitions.nil? || @partitions.empty?
|
|
106
|
+
cols = @partitions.join(",\n")
|
|
107
|
+
"PARTITIONED BY (\n#{cols}\n)"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
class Column
|
|
111
|
+
attr_reader :name, :type, :comment
|
|
112
|
+
def initialize(name, type, comment=nil)
|
|
113
|
+
@name, @type, @comment = name, type, comment
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def to_s
|
|
117
|
+
comment_string = @comment.nil? ? '' : " COMMENT '#{@comment}'"
|
|
118
|
+
"`#{@name}` #{@type.to_s.upcase}#{comment_string}"
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
require 'impala'
|
|
2
|
+
require 'sequel/adapters/shared/impala'
|
|
3
|
+
|
|
4
|
+
module Sequel
|
|
5
|
+
module Impala
|
|
6
|
+
class Database < Sequel::Database
|
|
7
|
+
RECORD_QUERY_PROFILE = Object.new.freeze
|
|
8
|
+
|
|
9
|
+
include DatabaseMethods
|
|
10
|
+
|
|
11
|
+
# Exception classes used by Impala.
|
|
12
|
+
ImpalaExceptions = [
|
|
13
|
+
::Impala::InvalidQueryError,
|
|
14
|
+
::Impala::ConnectionError,
|
|
15
|
+
::Impala::CursorError,
|
|
16
|
+
::Impala::ParsingError,
|
|
17
|
+
::Impala::Protocol::Beeswax::BeeswaxException,
|
|
18
|
+
::Thrift::TransportException,
|
|
19
|
+
IOError
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
DisconnectExceptions = [
|
|
23
|
+
::Thrift::TransportException,
|
|
24
|
+
IOError
|
|
25
|
+
].freeze
|
|
26
|
+
|
|
27
|
+
set_adapter_scheme :impala
|
|
28
|
+
|
|
29
|
+
# Connect to the Impala server. Currently, only the :host and :port options
|
|
30
|
+
# are respected, and they default to 'localhost' and 21000, respectively.
|
|
31
|
+
def connect(server)
|
|
32
|
+
opts = server_opts(server)
|
|
33
|
+
force_database(::Impala.connect(opts[:host]||'localhost', (opts[:port]||21000).to_i, opts), opts[:database])
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def database_error_classes
|
|
37
|
+
ImpalaExceptions
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def disconnect_connection(c)
|
|
41
|
+
log_info("Closing connection: #{c}")
|
|
42
|
+
c.close
|
|
43
|
+
rescue *DisconnectExceptions
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def execute(sql, opts=OPTS)
|
|
47
|
+
synchronize(opts[:server]) do |c|
|
|
48
|
+
# here's my super-hack to get DDL calls to record their profiles and query_ids
|
|
49
|
+
opts = self.opts.select { |k, v| [:query_id_name, :profile_name].include?(k) }.merge(opts)
|
|
50
|
+
begin
|
|
51
|
+
cursor = record_query_id(opts) do
|
|
52
|
+
log_connection_yield(sql, c) do
|
|
53
|
+
c.execute(sql){}
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
yield cursor if block_given?
|
|
57
|
+
nil
|
|
58
|
+
rescue *ImpalaExceptions => e
|
|
59
|
+
raise_error(e)
|
|
60
|
+
ensure
|
|
61
|
+
record_profile(cursor, opts)
|
|
62
|
+
log_info("Closing cursor: #{cursor.inspect}")
|
|
63
|
+
log_query_url(cursor.handle) if cursor && cursor.handle
|
|
64
|
+
cursor.close if cursor && cursor.open?
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def log_query_url(handle)
|
|
70
|
+
log_info(sprintf(ENV['SEQUEL_IMPALA_QUERY_URL'], query_id: handle.id)) if ENV['SEQUEL_IMPALA_QUERY_URL']
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def query_id_and_profile(query_id_name=:default, profile_name=:default)
|
|
74
|
+
key = RECORD_QUERY_PROFILE
|
|
75
|
+
prev_profile_name = prev_query_id_name = nil
|
|
76
|
+
begin
|
|
77
|
+
Sequel.synchronize do
|
|
78
|
+
prev_query_id_name = @query_ids[key]
|
|
79
|
+
prev_profile_name = @runtime_profiles[key]
|
|
80
|
+
@query_ids[key] = query_id_name
|
|
81
|
+
@runtime_profiles[key] = profile_name
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
yield
|
|
85
|
+
ensure
|
|
86
|
+
Sequel.synchronize do
|
|
87
|
+
@query_ids[key] = prev_query_id_name
|
|
88
|
+
@runtime_profiles[key] = prev_profile_name
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def profile_for(profile_name=:default)
|
|
94
|
+
Sequel.synchronize{@runtime_profiles[profile_name]}
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def query_id_for(query_id_name=:default)
|
|
98
|
+
Sequel.synchronize{@query_ids[query_id_name]}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
def dataset_class_default
|
|
104
|
+
Dataset
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def record_profile(cursor, opts)
|
|
108
|
+
if cursor && (profile_name = opts[:profile_name] || Sequel.synchronize{@runtime_profiles[RECORD_QUERY_PROFILE]})
|
|
109
|
+
profile = cursor.runtime_profile
|
|
110
|
+
Sequel.synchronize{@runtime_profiles[profile_name] = profile}
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def record_query_id(opts = OPTS)
|
|
115
|
+
query_id_name = opts[:query_id_name] || Sequel.synchronize{@query_ids[RECORD_QUERY_PROFILE]}
|
|
116
|
+
start = Time.now if query_id_name
|
|
117
|
+
|
|
118
|
+
cursor = yield
|
|
119
|
+
|
|
120
|
+
if cursor && query_id_name
|
|
121
|
+
h = { query_id: cursor.handle.id, start_time: start }
|
|
122
|
+
Sequel.synchronize{ @query_ids[query_id_name] = h }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
cursor
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def adapter_initialize
|
|
129
|
+
@runtime_profiles = {}
|
|
130
|
+
@query_ids = {}
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def connection_execute_method
|
|
134
|
+
:query
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Impala raises IOError if it detects a problem on the connection, and
|
|
138
|
+
# in most cases that results in an unusable connection, so treat it as a
|
|
139
|
+
# disconnect error so Sequel will reconnect.
|
|
140
|
+
def disconnect_error?(exception, opts)
|
|
141
|
+
case exception
|
|
142
|
+
when *DisconnectExceptions
|
|
143
|
+
true
|
|
144
|
+
else
|
|
145
|
+
super
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Use DESCRIBE to get the column names and types for the table.
|
|
150
|
+
def schema_parse_table(table_name, opts)
|
|
151
|
+
m = output_identifier_meth(opts[:dataset])
|
|
152
|
+
|
|
153
|
+
table = if opts[:schema]
|
|
154
|
+
Sequel.qualify(opts[:schema], table_name)
|
|
155
|
+
else
|
|
156
|
+
Sequel.identifier(table_name)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
describe(table, opts).map do |row|
|
|
160
|
+
row[:db_type] = row[:type]
|
|
161
|
+
row[:type] = schema_column_type(row[:db_type])
|
|
162
|
+
row[:default] = nil
|
|
163
|
+
row[:primary_key] = false
|
|
164
|
+
[m.call(row.delete(:name)), row]
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
class Dataset < Sequel::Dataset
|
|
170
|
+
include DatasetMethods
|
|
171
|
+
|
|
172
|
+
APOS = "'".freeze
|
|
173
|
+
STRING_ESCAPES = {
|
|
174
|
+
"\\" => "\\\\".freeze,
|
|
175
|
+
"'" => "\\'".freeze,
|
|
176
|
+
"\n" => "\\n".freeze,
|
|
177
|
+
"\r" => "\\r".freeze,
|
|
178
|
+
"\0" => "\\0".freeze,
|
|
179
|
+
"\b" => "\\b".freeze,
|
|
180
|
+
"\04" => "\\Z".freeze,
|
|
181
|
+
# Impala is supposed to support this, but using it
|
|
182
|
+
# breaks things to the point of returning bad data.
|
|
183
|
+
# If you don't do this, the tabs in the input
|
|
184
|
+
# get converted to spaces, but that's better than the
|
|
185
|
+
# alternative.
|
|
186
|
+
# "\t" => "\\t".freeze,
|
|
187
|
+
}.freeze
|
|
188
|
+
STRING_ESCAPE_RE = /(#{Regexp.union(STRING_ESCAPES.keys)})/
|
|
189
|
+
|
|
190
|
+
def fetch_rows(sql)
|
|
191
|
+
execute(sql, @opts) do |cursor|
|
|
192
|
+
self.columns = cursor.columns.map!{|c| output_identifier(c)}
|
|
193
|
+
cursor.typecast_map['timestamp'] = db.method(:to_application_timestamp)
|
|
194
|
+
cursor.each do |row|
|
|
195
|
+
yield row
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
self
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def profile(profile_name=:default)
|
|
203
|
+
clone(:profile_name => profile_name)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def query_id(query_id_name=:default)
|
|
207
|
+
clone(:query_id_name => query_id_name)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
private
|
|
211
|
+
|
|
212
|
+
# Unlike the jdbc/hive2 driver, the impala driver requires you escape
|
|
213
|
+
# some values in string literals to get correct results, but not the
|
|
214
|
+
# tab character or things break.
|
|
215
|
+
def literal_string_append(sql, s)
|
|
216
|
+
sql << APOS << s.to_s.gsub(STRING_ESCAPE_RE){|m| STRING_ESCAPES[m]} << APOS
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
require 'sequel/adapters/shared/impala'
|
|
2
|
+
|
|
3
|
+
Sequel::JDBC.load_driver('org.apache.hive.jdbc.HiveDriver', :Hive2)
|
|
4
|
+
|
|
5
|
+
module Sequel
|
|
6
|
+
module JDBC
|
|
7
|
+
Sequel.synchronize do
|
|
8
|
+
DATABASE_SETUP[:hive2] = proc do |db|
|
|
9
|
+
db.extend(Sequel::JDBC::Hive2::DatabaseMethods)
|
|
10
|
+
db.dataset_class = Sequel::JDBC::Hive2::Dataset
|
|
11
|
+
org.apache.hive.jdbc.HiveDriver
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
module Hive2
|
|
16
|
+
module DatabaseMethods
|
|
17
|
+
include Sequel::Impala::DatabaseMethods
|
|
18
|
+
|
|
19
|
+
# Recognize wrapped and unwrapped java.net.SocketExceptions as disconnect errors
|
|
20
|
+
def disconnect_error?(exception, opts)
|
|
21
|
+
super || exception.message =~ /\A(Java::JavaSql::SQLException: )?org\.apache\.thrift\.transport\.TTransportException: java\.net\.SocketException/
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def disconnect_connection(c)
|
|
25
|
+
super
|
|
26
|
+
rescue java.sql.SQLException
|
|
27
|
+
nil
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
class Dataset < JDBC::Dataset
|
|
32
|
+
include Sequel::Impala::DatasetMethods
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|