dwh 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +36 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE +21 -0
- data/README.md +130 -0
- data/Rakefile +42 -0
- data/docs/DWH/Adapters/Adapter.html +3053 -0
- data/docs/DWH/Adapters/Athena.html +1704 -0
- data/docs/DWH/Adapters/Boolean.html +121 -0
- data/docs/DWH/Adapters/Druid.html +1626 -0
- data/docs/DWH/Adapters/DuckDb.html +2012 -0
- data/docs/DWH/Adapters/MySql.html +1704 -0
- data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +265 -0
- data/docs/DWH/Adapters/OpenAuthorizable.html +1102 -0
- data/docs/DWH/Adapters/Postgres.html +2000 -0
- data/docs/DWH/Adapters/Snowflake.html +1662 -0
- data/docs/DWH/Adapters/SqlServer.html +2084 -0
- data/docs/DWH/Adapters/Trino.html +1835 -0
- data/docs/DWH/Adapters.html +129 -0
- data/docs/DWH/AuthenticationError.html +142 -0
- data/docs/DWH/Behaviors.html +767 -0
- data/docs/DWH/Capabilities.html +748 -0
- data/docs/DWH/Column.html +1115 -0
- data/docs/DWH/ConfigError.html +143 -0
- data/docs/DWH/ConnectionError.html +143 -0
- data/docs/DWH/DWHError.html +138 -0
- data/docs/DWH/ExecutionError.html +143 -0
- data/docs/DWH/Factory.html +1133 -0
- data/docs/DWH/Functions/Arrays.html +505 -0
- data/docs/DWH/Functions/Dates.html +1644 -0
- data/docs/DWH/Functions/ExtractDatePart.html +804 -0
- data/docs/DWH/Functions/Nulls.html +377 -0
- data/docs/DWH/Functions.html +846 -0
- data/docs/DWH/Logger.html +258 -0
- data/docs/DWH/OAuthError.html +138 -0
- data/docs/DWH/Settings.html +658 -0
- data/docs/DWH/StreamingStats.html +804 -0
- data/docs/DWH/Table.html +1260 -0
- data/docs/DWH/TableStats.html +583 -0
- data/docs/DWH/TokenExpiredError.html +142 -0
- data/docs/DWH/UnsupportedCapability.html +135 -0
- data/docs/DWH.html +220 -0
- data/docs/_index.html +471 -0
- data/docs/class_list.html +54 -0
- data/docs/css/common.css +1 -0
- data/docs/css/full_list.css +58 -0
- data/docs/css/style.css +503 -0
- data/docs/file.README.html +210 -0
- data/docs/file.adapters.html +514 -0
- data/docs/file.creating-adapters.html +497 -0
- data/docs/file.getting-started.html +288 -0
- data/docs/file.usage.html +446 -0
- data/docs/file_list.html +79 -0
- data/docs/frames.html +22 -0
- data/docs/guides/adapters.md +445 -0
- data/docs/guides/creating-adapters.md +430 -0
- data/docs/guides/getting-started.md +225 -0
- data/docs/guides/usage.md +378 -0
- data/docs/index.html +210 -0
- data/docs/js/app.js +344 -0
- data/docs/js/full_list.js +242 -0
- data/docs/js/jquery.js +4 -0
- data/docs/method_list.html +2038 -0
- data/docs/top-level-namespace.html +110 -0
- data/lib/dwh/adapters/athena.rb +359 -0
- data/lib/dwh/adapters/druid.rb +267 -0
- data/lib/dwh/adapters/duck_db.rb +235 -0
- data/lib/dwh/adapters/my_sql.rb +235 -0
- data/lib/dwh/adapters/open_authorizable.rb +215 -0
- data/lib/dwh/adapters/postgres.rb +250 -0
- data/lib/dwh/adapters/snowflake.rb +489 -0
- data/lib/dwh/adapters/sql_server.rb +257 -0
- data/lib/dwh/adapters/trino.rb +213 -0
- data/lib/dwh/adapters.rb +363 -0
- data/lib/dwh/behaviors.rb +67 -0
- data/lib/dwh/capabilities.rb +39 -0
- data/lib/dwh/column.rb +79 -0
- data/lib/dwh/errors.rb +29 -0
- data/lib/dwh/factory.rb +125 -0
- data/lib/dwh/functions/arrays.rb +42 -0
- data/lib/dwh/functions/dates.rb +162 -0
- data/lib/dwh/functions/extract_date_part.rb +70 -0
- data/lib/dwh/functions/nulls.rb +31 -0
- data/lib/dwh/functions.rb +86 -0
- data/lib/dwh/logger.rb +50 -0
- data/lib/dwh/settings/athena.yml +77 -0
- data/lib/dwh/settings/base.yml +81 -0
- data/lib/dwh/settings/databricks.yml +51 -0
- data/lib/dwh/settings/druid.yml +59 -0
- data/lib/dwh/settings/duckdb.yml +44 -0
- data/lib/dwh/settings/mysql.yml +67 -0
- data/lib/dwh/settings/postgres.yml +30 -0
- data/lib/dwh/settings/redshift.yml +52 -0
- data/lib/dwh/settings/snowflake.yml +45 -0
- data/lib/dwh/settings/sqlserver.yml +80 -0
- data/lib/dwh/settings/trino.yml +77 -0
- data/lib/dwh/settings.rb +79 -0
- data/lib/dwh/streaming_stats.rb +69 -0
- data/lib/dwh/table.rb +105 -0
- data/lib/dwh/table_stats.rb +51 -0
- data/lib/dwh/version.rb +5 -0
- data/lib/dwh.rb +54 -0
- data/sig/dwh.rbs +4 -0
- metadata +231 -0
@@ -0,0 +1,267 @@
|
|
1
|
+
module DWH
|
2
|
+
module Adapters
|
3
|
+
# Druid adapter.
|
4
|
+
#
|
5
|
+
# Generally, adapters should be created using {DWH::Factory#create DWH.create}. Where a configuration
|
6
|
+
# is passed in as options hash or argument list.
|
7
|
+
#
|
8
|
+
# @example Basic connection with required only options
|
9
|
+
# DWH.create(:druid, {host: 'localhost',port: 8080, protocol: 'http'})
|
10
|
+
#
|
11
|
+
# @example Connect with SSL and basic authorization
|
12
|
+
# DWH.create(:druid, {host: 'localhost',port: 8080, protocol: 'http',
|
13
|
+
# basic_auth: 'BASE_64 encoded authorization key'
|
14
|
+
# })
|
15
|
+
#
|
16
|
+
# @example Sending custom client name and user information
|
17
|
+
# DWH.create(:druid, {host: 'localhost',port: 8080,
|
18
|
+
# client_name: 'Strata CLI', extra_connection_params: {
|
19
|
+
# context: {
|
20
|
+
# user: 'Ajo',
|
21
|
+
# team: 'Engineering'
|
22
|
+
# }
|
23
|
+
# }})
|
24
|
+
class Druid < Adapter
|
25
|
+
DRUID_STATUS = '/status'.freeze
|
26
|
+
DRUID_DATASOURCES = '/druid/coordinator/v1/datasources'.freeze
|
27
|
+
DRUID_SQL = '/druid/v2/sql/'.freeze
|
28
|
+
COLUMNS_FOR_TABLE = '"COLUMN_NAME","DATA_TYPE", "NUMERIC_PRECISION", "NUMERIC_SCALE", "CHARACTER_MAXIMUM_LENGTH"'.freeze
|
29
|
+
|
30
|
+
config :protocol, String, required: true, default: 'http', message: 'must be http or https', allowd: %w[http https]
|
31
|
+
config :host, String, required: true, message: 'server host ip address or domain name'
|
32
|
+
config :port, Integer, required: true, default: 8081, message: 'port to connect to'
|
33
|
+
config :query_timeout, Integer, required: false, default: 600, message: 'query execution timeout in seconds'
|
34
|
+
config :open_timeout, Integer, required: false, default: nil, message: 'how long to wait to connect'
|
35
|
+
config :client_name, String, default: 'DWH Ruby Gem', message: 'client_name will be passed in the context object'
|
36
|
+
config :basic_auth, String, required: false, message: 'authorization key sent in the header'
|
37
|
+
|
38
|
+
# (see Adapter#connection)
|
39
|
+
def connection
|
40
|
+
return @connection if @connection
|
41
|
+
|
42
|
+
@connection = Faraday.new(
|
43
|
+
url: "#{config[:protocol]}://#{config[:host]}:#{config[:port]}",
|
44
|
+
headers: {
|
45
|
+
'Content-Type' => 'application/json',
|
46
|
+
**(config[:basic_auth] ? { 'Authorization' => "Basic #{config[:basic_auth]}" } : {})
|
47
|
+
},
|
48
|
+
request: {
|
49
|
+
timeout: config[:query_timeout],
|
50
|
+
open_timeout: config[:open_timeout],
|
51
|
+
context: {
|
52
|
+
client_name: config[:client_name]
|
53
|
+
}
|
54
|
+
}.merge(extra_connection_params)
|
55
|
+
)
|
56
|
+
|
57
|
+
@connection
|
58
|
+
end
|
59
|
+
|
60
|
+
# (see Adapter#test_connection)
|
61
|
+
def test_connection(raise_exception: false)
|
62
|
+
res = connection.get(DRUID_STATUS)
|
63
|
+
unless res.success?
|
64
|
+
raise ConnectionError, res.body if raise_exception
|
65
|
+
|
66
|
+
false
|
67
|
+
end
|
68
|
+
|
69
|
+
true
|
70
|
+
rescue Faraday::ConnectionFailed => e
|
71
|
+
raise ConnectionError, e.message if raise_exception
|
72
|
+
|
73
|
+
false
|
74
|
+
end
|
75
|
+
|
76
|
+
# (see Adapter#tables)
|
77
|
+
def tables
|
78
|
+
resp = connection.get(DRUID_DATASOURCES) do |req|
|
79
|
+
req.options.timeout = 30
|
80
|
+
end
|
81
|
+
JSON.parse resp.body
|
82
|
+
end
|
83
|
+
|
84
|
+
# Date column will default to __time. If the datasource,
|
85
|
+
# does not have a date column please set it to nil
|
86
|
+
# @param table [String] table name
|
87
|
+
# @param date_column [String] optional date column
|
88
|
+
# @see Adapter#stats
|
89
|
+
def stats(table, date_column: '__time')
|
90
|
+
sql = <<-SQL
|
91
|
+
SELECT
|
92
|
+
count(*) ROW_COUNT
|
93
|
+
#{date_column.nil? ? nil : ", min(#{date_column}) DATE_START"}
|
94
|
+
#{date_column.nil? ? nil : ", max(#{date_column}) DATE_END"}
|
95
|
+
FROM "#{table}"
|
96
|
+
SQL
|
97
|
+
|
98
|
+
result = execute(sql)
|
99
|
+
|
100
|
+
TableStats.new(
|
101
|
+
row_count: result[0][0],
|
102
|
+
date_start: result[0][1],
|
103
|
+
date_end: result[0][2]
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Marks unused segments of a datasource/table as unused
|
108
|
+
# @param table [String] datasource/table name
|
109
|
+
# @param interval [String] date interval in the format of from_date/to_date
|
110
|
+
# as valid ISO timestamps
|
111
|
+
def drop_unused_segments(table, interval)
|
112
|
+
url = "/druid/coordinator/v1/datasources/#{table}/markUnused"
|
113
|
+
|
114
|
+
logger.debug '=== Dropping Segments ==='
|
115
|
+
|
116
|
+
response = connection.post(url) do |req|
|
117
|
+
req.headers['Content-Type'] = 'application/json'
|
118
|
+
req.body = { interval: interval }.to_json
|
119
|
+
end
|
120
|
+
|
121
|
+
logger.debug response.status
|
122
|
+
end
|
123
|
+
|
124
|
+
# (see Adapter#metadata)
|
125
|
+
def metadata(table)
|
126
|
+
sql = <<-SQL
|
127
|
+
SELECT #{COLUMNS_FOR_TABLE} FROM INFORMATION_SCHEMA.COLUMNS
|
128
|
+
WHERE TABLE_SCHEMA = 'druid' AND TABLE_NAME = '#{table}'
|
129
|
+
SQL
|
130
|
+
|
131
|
+
stats = stats(table)
|
132
|
+
db_table = Table.new 'table', table_stats: stats
|
133
|
+
cols = execute(sql, format: :object)
|
134
|
+
st = table_druid_schema_types(table, stats.date_end)
|
135
|
+
|
136
|
+
cols.each do |col|
|
137
|
+
db_table << Column.new(
|
138
|
+
name: col['COLUMN_NAME'],
|
139
|
+
schema_type: st[:metrics].include?(col['COLUMN_NAME']) ? 'measure' : 'dimension',
|
140
|
+
data_type: col['DATA_TYPE'],
|
141
|
+
precision: col['NUMERIC_PRECISION'],
|
142
|
+
scale: col['NUMERIC_SCALE'],
|
143
|
+
max_char_length: col['CHARACTER_MAXIMUM_LENGTH']
|
144
|
+
)
|
145
|
+
end
|
146
|
+
|
147
|
+
db_table
|
148
|
+
end
|
149
|
+
|
150
|
+
# (see Adapter#execute)
|
151
|
+
def execute(sql, format: :array, retries: 0)
|
152
|
+
format = format.to_sym
|
153
|
+
result_format = format == :native ? 'array' : format.to_s
|
154
|
+
resp = with_debug(sql) do
|
155
|
+
with_retry(retries) do
|
156
|
+
connection.post(DRUID_SQL) do |req|
|
157
|
+
req.headers['Content-Type'] = 'application/json'
|
158
|
+
req.body = {
|
159
|
+
query: sql,
|
160
|
+
resultFormat: result_format,
|
161
|
+
context: { sqlTimeZone: 'Etc/UTC' }
|
162
|
+
}.merge(extra_query_params)
|
163
|
+
.to_json
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
raise ExecutionError, "Could not execute #{sql}: \n #{resp.body}" if resp.status != 200
|
169
|
+
|
170
|
+
if format == :native
|
171
|
+
resp
|
172
|
+
else
|
173
|
+
format == :csv ? resp.body : JSON.parse(resp.body)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# (see Adapter#execute_stream)
|
178
|
+
def execute_stream(sql, io, stats: nil, retries: 0)
|
179
|
+
resp = with_debug(sql) do
|
180
|
+
with_retry(retries) do
|
181
|
+
connection.post(DRUID_SQL) do |req|
|
182
|
+
req.headers['Content-Type'] = 'application/json'
|
183
|
+
req.body = {
|
184
|
+
query: sql,
|
185
|
+
resultFormat: 'csv',
|
186
|
+
header: true
|
187
|
+
# added timezone here due to druid bug
|
188
|
+
# where date sub query joins failed without it.
|
189
|
+
# context: { sqlTimeZone: 'Etc/UTC'}
|
190
|
+
}.merge(extra_query_params).to_json
|
191
|
+
|
192
|
+
parseable_row = ''
|
193
|
+
req.options.on_data = proc do |chunk, _|
|
194
|
+
handle_streaming_chunk(io, chunk, stats, parseable_row)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
io.rewind
|
201
|
+
# Raise exception on failed runs
|
202
|
+
raise ExecutionError, io.read unless resp.success?
|
203
|
+
|
204
|
+
io
|
205
|
+
end
|
206
|
+
|
207
|
+
# (see Adapter#stream)
|
208
|
+
def stream(sql, &block)
|
209
|
+
on_data_calls = 0
|
210
|
+
with_debug(sql) do
|
211
|
+
connection.post(DRUID_SQL) do |req|
|
212
|
+
req.headers['Content-Type'] = 'application/json'
|
213
|
+
req.body = { query: sql, resultFormat: 'csv' }.to_json
|
214
|
+
req.options.on_data = proc do |chunk, _chunk_size|
|
215
|
+
block.call chunk.force_encoding('utf-8')
|
216
|
+
on_data_calls += 1
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
on_data_calls
|
222
|
+
end
|
223
|
+
|
224
|
+
protected
|
225
|
+
|
226
|
+
def table_druid_schema_types(table, last_interval_start_date)
|
227
|
+
end_date = last_interval_start_date + 1
|
228
|
+
start_date = last_interval_start_date
|
229
|
+
url_friendly_interval = "#{start_date.strftime('%Y-%m-%d')}_#{end_date.strftime('%Y-%m-%d')}"
|
230
|
+
url = "/druid/coordinator/v1/datasources/#{table}/intervals/#{url_friendly_interval}?full"
|
231
|
+
|
232
|
+
resp = connection.get(url) do |req|
|
233
|
+
req.options.timeout = 30
|
234
|
+
end
|
235
|
+
|
236
|
+
raise ExecutionError, "Could not fetch druid schema types: \n #{resp.body}" if resp.status != 200
|
237
|
+
|
238
|
+
res = JSON.parse(resp.body)
|
239
|
+
meta = res.flatten[1].flatten(4)[1]['metadata']
|
240
|
+
{
|
241
|
+
dimensions: meta['dimensions'].split(','),
|
242
|
+
metrics: meta['metrics'].split(',')
|
243
|
+
}
|
244
|
+
end
|
245
|
+
|
246
|
+
def handle_streaming_chunk(io, chunk, stats, parseable_row)
|
247
|
+
io.write chunk.rstrip.force_encoding('utf-8')
|
248
|
+
|
249
|
+
parseable_row += chunk
|
250
|
+
process_streaming_rows(parseable_row, chunk, stats)
|
251
|
+
end
|
252
|
+
|
253
|
+
def process_streaming_rows(parseable_row, chunk, stats)
|
254
|
+
return if stats.nil? || stats&.limit_reached?
|
255
|
+
|
256
|
+
rows = CSV.parse(parseable_row, skip_blanks: true)
|
257
|
+
rows.each_with_index do |row, index|
|
258
|
+
# skip header rows in stats collector
|
259
|
+
stats << row unless index.zero? && stats.total_rows.zero?
|
260
|
+
end
|
261
|
+
parseable_row.clear
|
262
|
+
rescue CSV::MalformedCSVError
|
263
|
+
logger.debug("Unparseable:\n #{chunk}")
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
module DWH
|
2
|
+
module Adapters
|
3
|
+
# DuckDb adapter.
|
4
|
+
#
|
5
|
+
# This requires the ruby {https://github.com/suketa/ruby-duckdb DuckDb} gem. Installation
|
6
|
+
# is a bit complex. Please follow the guide on the gems page to make sure
|
7
|
+
# you have DuckDb installed as required before installing the gem.
|
8
|
+
#
|
9
|
+
# Generally, adapters should be created using {DWH::Factory#create DWH.create}. Where a configuration
|
10
|
+
# is passed in as options hash or argument list.
|
11
|
+
#
|
12
|
+
# @example Basic connection with required only options
|
13
|
+
# DWH.create(:duckdb, {file: 'path/to/my/duckdb' })
|
14
|
+
#
|
15
|
+
# @example Open in read only mode. {https://duckdb.org/docs/stable/configuration/overview#configuration-reference config docs}
|
16
|
+
# DWH.create(:duckdb, {file: 'path/to/my/duckdb' ,duck_config: { access_mode: "READ_ONLY"}})
|
17
|
+
class DuckDb < Adapter
|
18
|
+
config :file, String, required: true, message: 'path/to/duckdb/db'
|
19
|
+
config :schema, String, required: false, default: 'main', message: 'schema defaults to main'
|
20
|
+
config :duck_config, Hash, required: false, message: 'hash of valid DuckDb configuration options'
|
21
|
+
|
22
|
+
# (see Adapter#connection)
|
23
|
+
def connection
|
24
|
+
return @connection if @connection
|
25
|
+
|
26
|
+
if self.class.databases.key?(config[:file])
|
27
|
+
@db = self.class.databases[config[:file]]
|
28
|
+
else
|
29
|
+
ducked_config = DuckDB::Config.new
|
30
|
+
if config.key?(:duck_config)
|
31
|
+
config[:duck_config].each do |key, val|
|
32
|
+
ducked_config[key.to_s] = val
|
33
|
+
end
|
34
|
+
end
|
35
|
+
@db = DuckDB::Database.open(config[:file], ducked_config)
|
36
|
+
self.class.databases[config[:file]] = @db
|
37
|
+
end
|
38
|
+
|
39
|
+
@connection = @db.connect
|
40
|
+
|
41
|
+
@connection
|
42
|
+
rescue StandardError => e
|
43
|
+
raise ConfigError, e.message
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.databases
|
47
|
+
@databases ||= {}
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.open_databases
|
51
|
+
databases.size
|
52
|
+
end
|
53
|
+
|
54
|
+
# DuckDB is an in process database so we don't want to
|
55
|
+
# open multiple instances of the same db in memory. Rather,
|
56
|
+
# we open one instance but many connections. Use this
|
57
|
+
# method to close them all.
|
58
|
+
def self.close_all
|
59
|
+
databases.each do |key, db|
|
60
|
+
db.close
|
61
|
+
databases.delete(key)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# This disconnects the current connection but
|
66
|
+
# the db is still in process and can be reconnected
|
67
|
+
# to.
|
68
|
+
#
|
69
|
+
# (see Adapter#close)
|
70
|
+
def close
|
71
|
+
connection.disconnect
|
72
|
+
@connection = nil
|
73
|
+
end
|
74
|
+
|
75
|
+
# (see Adapter#test_connection)
|
76
|
+
def test_connection(raise_exception: false)
|
77
|
+
connection
|
78
|
+
true
|
79
|
+
rescue StandardError => e
|
80
|
+
raise ConnectionError, e.message if raise_exception
|
81
|
+
|
82
|
+
false
|
83
|
+
end
|
84
|
+
|
85
|
+
# (see Adapter#tables)
|
86
|
+
def tables(**qualifiers)
|
87
|
+
catalog, schema = qualifiers.values_at(:catalog, :schema)
|
88
|
+
sql = 'SELECT table_name FROM duckdb_tables'
|
89
|
+
|
90
|
+
where = []
|
91
|
+
where << "database_name = '#{catalog}'" if catalog
|
92
|
+
|
93
|
+
where << if schema
|
94
|
+
"schema_name = '#{schema}'"
|
95
|
+
else
|
96
|
+
"schema_name = '#{config[:schema]}'"
|
97
|
+
end
|
98
|
+
|
99
|
+
res = execute("#{sql} WHERE #{where.join(' AND ')}")
|
100
|
+
res.flatten
|
101
|
+
end
|
102
|
+
|
103
|
+
# (see Adapter#stats)
|
104
|
+
def stats(table, date_column: nil, **qualifiers)
|
105
|
+
qualifiers[:schema] = config[:schema] unless qualifiers[:schema]
|
106
|
+
db_table = Table.new table, **qualifiers
|
107
|
+
|
108
|
+
sql = <<-SQL
|
109
|
+
SELECT count(*) ROW_COUNT
|
110
|
+
#{date_column.nil? ? nil : ", min(#{date_column}) DATE_START"}
|
111
|
+
#{date_column.nil? ? nil : ", max(#{date_column}) DATE_END"}
|
112
|
+
FROM #{db_table.fully_qualified_table_name}
|
113
|
+
SQL
|
114
|
+
|
115
|
+
result = execute(sql)
|
116
|
+
TableStats.new(
|
117
|
+
row_count: result.first[0],
|
118
|
+
date_start: result.first[1],
|
119
|
+
date_end: result.first[2]
|
120
|
+
)
|
121
|
+
end
|
122
|
+
|
123
|
+
# (see Adapter#metadata)
|
124
|
+
def metadata(table, **qualifiers)
|
125
|
+
db_table = Table.new table, **qualifiers
|
126
|
+
sql = 'SELECT column_name, data_type, character_maximum_length, numeric_precision,numeric_scale FROM duckdb_columns'
|
127
|
+
|
128
|
+
where = ["table_name = '#{db_table.physical_name}'"]
|
129
|
+
where << "database_name = '#{db_table.catalog}'" if db_table.catalog
|
130
|
+
|
131
|
+
where << if db_table.schema
|
132
|
+
"schema_name = '#{db_table.schema}'"
|
133
|
+
else
|
134
|
+
"schema_name = '#{config[:schema]}'"
|
135
|
+
end
|
136
|
+
|
137
|
+
cols = execute("#{sql} WHERE #{where.join(' AND ')}")
|
138
|
+
cols.each do |col|
|
139
|
+
db_table << Column.new(
|
140
|
+
name: col[0],
|
141
|
+
data_type: col[1],
|
142
|
+
precision: col[3],
|
143
|
+
scale: col[4],
|
144
|
+
max_char_length: col[2]
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
db_table
|
149
|
+
end
|
150
|
+
|
151
|
+
# True if the configuration was setup with a schema.
|
152
|
+
def schema?
|
153
|
+
config[:schema].present?
|
154
|
+
end
|
155
|
+
|
156
|
+
# (see Adapter#execute)
|
157
|
+
def execute(sql, format: :array, retries: 0)
|
158
|
+
begin
|
159
|
+
result = with_debug(sql) { with_retry(retries) { connection.query(sql) } }
|
160
|
+
rescue StandardError => e
|
161
|
+
raise ExecutionError, e.message
|
162
|
+
end
|
163
|
+
|
164
|
+
format = format.downcase if format.is_a?(String)
|
165
|
+
case format.to_sym
|
166
|
+
when :array
|
167
|
+
result.to_a
|
168
|
+
when :object
|
169
|
+
result_to_hash(result)
|
170
|
+
when :csv
|
171
|
+
result_to_csv(result)
|
172
|
+
when :native
|
173
|
+
result
|
174
|
+
else
|
175
|
+
raise UnsupportedCapability, "Unsupported format: #{format} for this #{name}"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# (see Adapter#execute_stream)
|
180
|
+
def execute_stream(sql, io, stats: nil, retries: 0)
|
181
|
+
with_debug(sql) do
|
182
|
+
with_retry(retries) do
|
183
|
+
result = connection.query(sql)
|
184
|
+
io.write(CSV.generate_line(result.columns.map(&:name)))
|
185
|
+
result.each do |row|
|
186
|
+
stats << row unless stats.nil?
|
187
|
+
io.write(CSV.generate_line(row))
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
io.rewind
|
193
|
+
io
|
194
|
+
rescue StandardError => e
|
195
|
+
raise ExecutionError, e.message
|
196
|
+
end
|
197
|
+
|
198
|
+
# (see Adapter#stream)
|
199
|
+
def stream(sql, &block)
|
200
|
+
with_debug(sql) do
|
201
|
+
result = connection.query(sql)
|
202
|
+
result.each do |row|
|
203
|
+
block.call(row)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def valid_config?
|
209
|
+
super
|
210
|
+
require 'duckdb'
|
211
|
+
rescue LoadError
|
212
|
+
raise ConfigError, "Required 'duckdb' gem missing. Please add it to your Gemfile."
|
213
|
+
end
|
214
|
+
|
215
|
+
private
|
216
|
+
|
217
|
+
def result_to_hash(result)
|
218
|
+
columns = result.columns.map(&:name)
|
219
|
+
|
220
|
+
result.each.map do |row|
|
221
|
+
columns.zip(row).to_h
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def result_to_csv(result)
|
226
|
+
CSV.generate do |csv|
|
227
|
+
csv << result.columns.map(&:name)
|
228
|
+
result.each do |row|
|
229
|
+
csv << row
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|