dwh 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +36 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE +21 -0
- data/README.md +130 -0
- data/Rakefile +42 -0
- data/docs/DWH/Adapters/Adapter.html +3053 -0
- data/docs/DWH/Adapters/Athena.html +1704 -0
- data/docs/DWH/Adapters/Boolean.html +121 -0
- data/docs/DWH/Adapters/Druid.html +1626 -0
- data/docs/DWH/Adapters/DuckDb.html +2012 -0
- data/docs/DWH/Adapters/MySql.html +1704 -0
- data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +265 -0
- data/docs/DWH/Adapters/OpenAuthorizable.html +1102 -0
- data/docs/DWH/Adapters/Postgres.html +2000 -0
- data/docs/DWH/Adapters/Snowflake.html +1662 -0
- data/docs/DWH/Adapters/SqlServer.html +2084 -0
- data/docs/DWH/Adapters/Trino.html +1835 -0
- data/docs/DWH/Adapters.html +129 -0
- data/docs/DWH/AuthenticationError.html +142 -0
- data/docs/DWH/Behaviors.html +767 -0
- data/docs/DWH/Capabilities.html +748 -0
- data/docs/DWH/Column.html +1115 -0
- data/docs/DWH/ConfigError.html +143 -0
- data/docs/DWH/ConnectionError.html +143 -0
- data/docs/DWH/DWHError.html +138 -0
- data/docs/DWH/ExecutionError.html +143 -0
- data/docs/DWH/Factory.html +1133 -0
- data/docs/DWH/Functions/Arrays.html +505 -0
- data/docs/DWH/Functions/Dates.html +1644 -0
- data/docs/DWH/Functions/ExtractDatePart.html +804 -0
- data/docs/DWH/Functions/Nulls.html +377 -0
- data/docs/DWH/Functions.html +846 -0
- data/docs/DWH/Logger.html +258 -0
- data/docs/DWH/OAuthError.html +138 -0
- data/docs/DWH/Settings.html +658 -0
- data/docs/DWH/StreamingStats.html +804 -0
- data/docs/DWH/Table.html +1260 -0
- data/docs/DWH/TableStats.html +583 -0
- data/docs/DWH/TokenExpiredError.html +142 -0
- data/docs/DWH/UnsupportedCapability.html +135 -0
- data/docs/DWH.html +220 -0
- data/docs/_index.html +471 -0
- data/docs/class_list.html +54 -0
- data/docs/css/common.css +1 -0
- data/docs/css/full_list.css +58 -0
- data/docs/css/style.css +503 -0
- data/docs/file.README.html +210 -0
- data/docs/file.adapters.html +514 -0
- data/docs/file.creating-adapters.html +497 -0
- data/docs/file.getting-started.html +288 -0
- data/docs/file.usage.html +446 -0
- data/docs/file_list.html +79 -0
- data/docs/frames.html +22 -0
- data/docs/guides/adapters.md +445 -0
- data/docs/guides/creating-adapters.md +430 -0
- data/docs/guides/getting-started.md +225 -0
- data/docs/guides/usage.md +378 -0
- data/docs/index.html +210 -0
- data/docs/js/app.js +344 -0
- data/docs/js/full_list.js +242 -0
- data/docs/js/jquery.js +4 -0
- data/docs/method_list.html +2038 -0
- data/docs/top-level-namespace.html +110 -0
- data/lib/dwh/adapters/athena.rb +359 -0
- data/lib/dwh/adapters/druid.rb +267 -0
- data/lib/dwh/adapters/duck_db.rb +235 -0
- data/lib/dwh/adapters/my_sql.rb +235 -0
- data/lib/dwh/adapters/open_authorizable.rb +215 -0
- data/lib/dwh/adapters/postgres.rb +250 -0
- data/lib/dwh/adapters/snowflake.rb +489 -0
- data/lib/dwh/adapters/sql_server.rb +257 -0
- data/lib/dwh/adapters/trino.rb +213 -0
- data/lib/dwh/adapters.rb +363 -0
- data/lib/dwh/behaviors.rb +67 -0
- data/lib/dwh/capabilities.rb +39 -0
- data/lib/dwh/column.rb +79 -0
- data/lib/dwh/errors.rb +29 -0
- data/lib/dwh/factory.rb +125 -0
- data/lib/dwh/functions/arrays.rb +42 -0
- data/lib/dwh/functions/dates.rb +162 -0
- data/lib/dwh/functions/extract_date_part.rb +70 -0
- data/lib/dwh/functions/nulls.rb +31 -0
- data/lib/dwh/functions.rb +86 -0
- data/lib/dwh/logger.rb +50 -0
- data/lib/dwh/settings/athena.yml +77 -0
- data/lib/dwh/settings/base.yml +81 -0
- data/lib/dwh/settings/databricks.yml +51 -0
- data/lib/dwh/settings/druid.yml +59 -0
- data/lib/dwh/settings/duckdb.yml +44 -0
- data/lib/dwh/settings/mysql.yml +67 -0
- data/lib/dwh/settings/postgres.yml +30 -0
- data/lib/dwh/settings/redshift.yml +52 -0
- data/lib/dwh/settings/snowflake.yml +45 -0
- data/lib/dwh/settings/sqlserver.yml +80 -0
- data/lib/dwh/settings/trino.yml +77 -0
- data/lib/dwh/settings.rb +79 -0
- data/lib/dwh/streaming_stats.rb +69 -0
- data/lib/dwh/table.rb +105 -0
- data/lib/dwh/table_stats.rb +51 -0
- data/lib/dwh/version.rb +5 -0
- data/lib/dwh.rb +54 -0
- data/sig/dwh.rbs +4 -0
- metadata +231 -0
data/lib/dwh/adapters.rb
ADDED
@@ -0,0 +1,363 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require_relative 'settings'
|
3
|
+
require_relative 'capabilities'
|
4
|
+
require_relative 'functions'
|
5
|
+
require_relative 'behaviors'
|
6
|
+
require_relative 'logger'
|
7
|
+
|
8
|
+
module DWH
|
9
|
+
# Encapsulates functionality related to all Adapter implementations
|
10
|
+
module Adapters
|
11
|
+
class Boolean; end
|
12
|
+
|
13
|
+
# @abstract Adapters base class. All adapters should inherit from this class
|
14
|
+
# and implement these core required methods:
|
15
|
+
#
|
16
|
+
# * {#stats} - get table statistics
|
17
|
+
# * {#tables} - list tables
|
18
|
+
# * {#metadata} - get metadata for a specific table
|
19
|
+
# * {#execute} - run query and return result
|
20
|
+
# * {#execute_stream} - run query and stream results to provided IO
|
21
|
+
# * {#stream} - run query and yeild streaming results
|
22
|
+
#
|
23
|
+
# Adapter implementations can declare configuration options,
|
24
|
+
# defaults, and whether it is required. This is a class
|
25
|
+
# level method. They will be validated and a {ConfigError} will be raised
|
26
|
+
# if there is an issue. Methods not implemented will raise {NotImplementedError}
|
27
|
+
#
|
28
|
+
# Additionally, if certain setting need to be overridden you can add a settings
|
29
|
+
# file in a relative directory like so: *settings/my_adapter.yml*. Or, you can specify
|
30
|
+
# an exact settings file location at the class level:
|
31
|
+
#
|
32
|
+
# class MyAdapter < DWH::Adapters::Adapter
|
33
|
+
# settings_file_path "my_dir/my_settings.yml"
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# @example
|
37
|
+
# class MyAdapter < DWH::Adapters::Adapter
|
38
|
+
# config :username, String, required: true, message: "login id of the current user"
|
39
|
+
# config :port, Integer, required: true, default: 5432
|
40
|
+
# end
|
41
|
+
class Adapter
|
42
|
+
extend Settings
|
43
|
+
include Capabilities
|
44
|
+
include Functions
|
45
|
+
include Behaviors
|
46
|
+
include Logger
|
47
|
+
|
48
|
+
# Define the configurations required for the adapter to
|
49
|
+
# connect and query target database.
|
50
|
+
#
|
51
|
+
# @param name [String, Symbol] name of the configuration
|
52
|
+
# @param type [Constant] ruby type of the configuration
|
53
|
+
# @param options [Hash] options for the config
|
54
|
+
# @option options [Boolean] :required Whether option is required
|
55
|
+
# @option options [*] :default The default value
|
56
|
+
# @option options [String] :message The error message or info displayed
|
57
|
+
# @return [Hash]
|
58
|
+
def self.config(name, type, options = {})
|
59
|
+
configuration[name.to_sym] = {
|
60
|
+
type: type,
|
61
|
+
required: options[:required] || false,
|
62
|
+
default: options[:default],
|
63
|
+
message: options[:message] || "Invalid or missing parameter: #{name}",
|
64
|
+
allowed: options[:allowed] || []
|
65
|
+
}
|
66
|
+
|
67
|
+
define_method(name.to_sym) do
|
68
|
+
config[name.to_sym]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# Get the adapter class level configuration settings
|
73
|
+
# @return [Hash]
|
74
|
+
def self.configuration
|
75
|
+
@configuration ||= {}
|
76
|
+
end
|
77
|
+
|
78
|
+
# Instance level configurations
|
79
|
+
# @return [Hash] the actual instance configuration
|
80
|
+
attr_reader :config
|
81
|
+
|
82
|
+
def initialize(config)
|
83
|
+
@config = config.symbolize_keys
|
84
|
+
# Per instance customization of general settings
|
85
|
+
# So you can have multiple connections to Trino
|
86
|
+
# but exhibit diff behavior
|
87
|
+
@settings = self.class.adapter_settings.merge(
|
88
|
+
(config[:settings] || {}).symbolize_keys
|
89
|
+
)
|
90
|
+
|
91
|
+
valid_config?
|
92
|
+
end
|
93
|
+
|
94
|
+
# This is the actual runtime settings used by the adapter
|
95
|
+
# once initialized. During intialization settings could be
|
96
|
+
# overridden. Settings are different from configuration in that
|
97
|
+
# settings control behaviour and syntax while configuration
|
98
|
+
# determines how we connect.
|
99
|
+
# @return [Hash] symbolized hash of settings
|
100
|
+
attr_reader :settings
|
101
|
+
|
102
|
+
# Allows an already instantiated adapter to change its current settings.
|
103
|
+
# this might be useful in situations where behavior needs to be modified
|
104
|
+
# on runtime basis.
|
105
|
+
# @return [Hash] the complete settings with changes merged
|
106
|
+
def alter_settings(changes = {})
|
107
|
+
reset_settings unless @original_settings.nil?
|
108
|
+
@original_settings = @settings
|
109
|
+
@settings.merge!(changes)
|
110
|
+
end
|
111
|
+
|
112
|
+
# This returns settings back to its original state prior to
|
113
|
+
# running alter_settings.
|
114
|
+
# @return [Hash] with original settings
|
115
|
+
def reset_settings
|
116
|
+
@settings = @original_settings if @original_settings
|
117
|
+
end
|
118
|
+
|
119
|
+
# Creates a connection to the target database and returns the
|
120
|
+
# connection object or self
|
121
|
+
def connection
|
122
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
123
|
+
end
|
124
|
+
|
125
|
+
# Tests the connection to the target database and returns true
|
126
|
+
# if successful, or raise Exception or false
|
127
|
+
# connection object or self
|
128
|
+
# @return [Boolean]
|
129
|
+
# @raise [ConnectionError] when a connection cannot be made
|
130
|
+
def test_connection(raise_exception: false)
|
131
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
132
|
+
end
|
133
|
+
|
134
|
+
# Test connection and raise exception if connection
|
135
|
+
# fails.
|
136
|
+
# @return [Boolean]
|
137
|
+
# @raise [ConnectionError]
|
138
|
+
def connect!
|
139
|
+
test_connection(raise_exception: true)
|
140
|
+
end
|
141
|
+
|
142
|
+
# Tests whether the dtabase can be connected
|
143
|
+
# @return [Boolean]
|
144
|
+
def connect?
|
145
|
+
test_connection(raise_exception: false)
|
146
|
+
end
|
147
|
+
|
148
|
+
# Close the connection if it was created.
|
149
|
+
def close
|
150
|
+
@connection&.close
|
151
|
+
@connection = nil
|
152
|
+
end
|
153
|
+
|
154
|
+
# Execute sql on the target database.
|
155
|
+
#
|
156
|
+
# @param sql [String] actual sql
|
157
|
+
# @param format [Symbol, String] return format type
|
158
|
+
# - array returns array of array
|
159
|
+
# - object returns array of Hashes
|
160
|
+
# - csv returns as csv
|
161
|
+
# - native returns the native result from any clients used
|
162
|
+
# - For example: Postgres using pg client will return PG::Result
|
163
|
+
# - Http clients will returns the HTTP response object
|
164
|
+
# @param retries [Integer] number of retries in case of failure. Default is 0
|
165
|
+
# @return [Array<Array>,Hash, CSV, Native]
|
166
|
+
# @raise [ConnectionError, ExecutionError]
|
167
|
+
def execute(sql, format: :array, retries: 0)
|
168
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
169
|
+
end
|
170
|
+
|
171
|
+
# Execute sql and stream responses back. Data is writtent out in CSV format
|
172
|
+
# to the provided IO object.
|
173
|
+
#
|
174
|
+
# @param sql [String] actual sql
|
175
|
+
# @param io [IO] IO object to write records to
|
176
|
+
# @param stats [StreamingStats] collect stats and preview data this is optional
|
177
|
+
# @param retries [Integer] number of retries in case of failure
|
178
|
+
# @return [IO]
|
179
|
+
# @raise [ConnectionError, ExecutionError]
|
180
|
+
def execute_stream(sql, io, stats: nil, retries: 0)
|
181
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
182
|
+
end
|
183
|
+
|
184
|
+
# Executes the given sql and yields the streamed results
|
185
|
+
# to the given block.
|
186
|
+
#
|
187
|
+
# @param sql [String] actual sql
|
188
|
+
# @yield [chunk] Yields a streamed chunk as it streams in. The chunk type
|
189
|
+
# might vary depending on the target db and settings
|
190
|
+
# @raise [ConnectionError, ExecutionError]
|
191
|
+
def stream(sql, &block)
|
192
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
193
|
+
end
|
194
|
+
|
195
|
+
# Returns basic stats of a given table. Will typically include row_count,
|
196
|
+
# date_start, and date_end.
|
197
|
+
#
|
198
|
+
# @param table [String] table name
|
199
|
+
# @param date_column [String] optional date column to use to find range
|
200
|
+
# @option qualifiers [String] :catalog optional catalog or equivalent name space.
|
201
|
+
# will be ignored if the adapter doesn't support
|
202
|
+
# @option qualifiers [String] :schema optional schema to scope to.
|
203
|
+
# will be ignored if the adapter doesn't support
|
204
|
+
# @return [DWH::Table]
|
205
|
+
# @raise [ConnectionError, ExecutionError]
|
206
|
+
#
|
207
|
+
# @example
|
208
|
+
# stats("public.big_table", date_column: "fact_date")
|
209
|
+
# stats("big_table")
|
210
|
+
# stats("big_table",schema: "public")
|
211
|
+
def stats(table, date_column: nil, **qualifiers)
|
212
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
213
|
+
end
|
214
|
+
|
215
|
+
# Get all tables available in the
|
216
|
+
# target db. It will use the default catalog and schema
|
217
|
+
# config only specified here.
|
218
|
+
#
|
219
|
+
# @option qualifiers [String] :catalog optional catalog or equivalent name space.
|
220
|
+
# will be ignored if the adapter doesn't support
|
221
|
+
# @option qualifiers [String] :schema optional schema to scope to.
|
222
|
+
# will be ignored if the adapter doesn't support
|
223
|
+
# @return [Array<String>]
|
224
|
+
def tables(**qualifiers)
|
225
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
226
|
+
end
|
227
|
+
|
228
|
+
# Check if table exists in remote db.
|
229
|
+
#
|
230
|
+
# @option qualifiers [String] :catalog optional catalog or equivalent name space.
|
231
|
+
# will be ignored if the adapter doesn't support
|
232
|
+
# @option qualifiers [String] :schema optional schema to scope to.
|
233
|
+
# will be ignored if the adapter doesn't support
|
234
|
+
# @return [Boolean]
|
235
|
+
def table?(table, **qualifiers)
|
236
|
+
tables(**qualifiers).include?(table)
|
237
|
+
end
|
238
|
+
|
239
|
+
# Get the schema structure of a given a given table_name.
|
240
|
+
# Pass in optional catalog and schema info.
|
241
|
+
#
|
242
|
+
# Example:
|
243
|
+
# metadata("public.big_table")
|
244
|
+
# metadata("big_table")
|
245
|
+
# metadata("big_table",schema: "public")
|
246
|
+
#
|
247
|
+
# @param table [String] - table name
|
248
|
+
# @option qualifiers [String] :catalog optional catalog or equivalent name space.
|
249
|
+
# will be ignored if the adapter doesn't support
|
250
|
+
# @option qualifiers [String] :schema optional schema to scope to.
|
251
|
+
# will be ignored if the adapter doesn't support
|
252
|
+
# @return [DWH::Table]
|
253
|
+
def metadata(table, **qualifiers)
|
254
|
+
raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
|
255
|
+
end
|
256
|
+
|
257
|
+
# Will call the block with retries given by the
|
258
|
+
# max attempts param. If max attempts is 0, it
|
259
|
+
# will just return the block.call
|
260
|
+
#
|
261
|
+
# @param max_attempts [Integer] max number of retries
|
262
|
+
def with_retry(max_attempts = 2, &block)
|
263
|
+
return block.call if max_attempts.zero?
|
264
|
+
|
265
|
+
attempts = 0
|
266
|
+
|
267
|
+
begin
|
268
|
+
attempts += 1
|
269
|
+
block.call
|
270
|
+
rescue StandardError => e
|
271
|
+
if attempts < max_attempts
|
272
|
+
logger.warn "Attempt #{attempts} failed with error: #{e.message}. Retrying..."
|
273
|
+
retry
|
274
|
+
else
|
275
|
+
logger.error "Failed after #{attempts} attempts with error: #{e.message}"
|
276
|
+
raise
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# Wraps an SQL execution with debug logging.
|
282
|
+
# It sill include execution time.
|
283
|
+
#
|
284
|
+
# @param sql [String] actual sql being executed
|
285
|
+
# @return execution results (see #execute)
|
286
|
+
def with_debug(sql, &block)
|
287
|
+
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
288
|
+
|
289
|
+
logger.debug("=== SQL === \n#{sql}")
|
290
|
+
|
291
|
+
result = block.call
|
292
|
+
|
293
|
+
ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
294
|
+
elapsed = ending - starting
|
295
|
+
logger.debug("=== FINISHED SQL (#{elapsed.round(1)} secs) ===")
|
296
|
+
|
297
|
+
result
|
298
|
+
end
|
299
|
+
|
300
|
+
# Adapter name from the class name
|
301
|
+
# @return [String]
|
302
|
+
def adapter_name
|
303
|
+
self.class.name.demodulize
|
304
|
+
end
|
305
|
+
|
306
|
+
# If any extra connection params were passed in the config
|
307
|
+
# object, this will return it.
|
308
|
+
#
|
309
|
+
# @return [Hash] default empty hash
|
310
|
+
def extra_connection_params
|
311
|
+
config[:extra_connection_params] || {}
|
312
|
+
end
|
313
|
+
|
314
|
+
# If the adapter supports it, will pass on extra query params
|
315
|
+
# from the config to the executor.
|
316
|
+
#
|
317
|
+
# @return [Hash] default empty hash
|
318
|
+
def extra_query_params
|
319
|
+
config[:extra_query_params] || {}
|
320
|
+
end
|
321
|
+
|
322
|
+
# For adapters that uses access tokens via
|
323
|
+
# jwt or oauth this will return wether a tokens is
|
324
|
+
# expired
|
325
|
+
def token_expired?
|
326
|
+
@token_expires_at.nil? || Time.now >= @token_expires_at
|
327
|
+
end
|
328
|
+
|
329
|
+
protected
|
330
|
+
|
331
|
+
# Checks if the required configurations and type is passed
|
332
|
+
# when the adapter is initialized.
|
333
|
+
def valid_config?
|
334
|
+
definitions = self.class.configuration
|
335
|
+
|
336
|
+
# Check for missing required parameters
|
337
|
+
missing_params = definitions.select do |name, options|
|
338
|
+
options[:required] && !config.key?(name) && options[:default].nil?
|
339
|
+
end
|
340
|
+
|
341
|
+
if missing_params.any?
|
342
|
+
error_messages = missing_params.map { |name, options| "Missing #{name} param - #{options[:message]}" }
|
343
|
+
raise ConfigError, "#{adapter_name} Adapter: #{error_messages.join(', ')}"
|
344
|
+
end
|
345
|
+
|
346
|
+
definitions.each do |name, opts|
|
347
|
+
unless opts[:type].is_a?(Class)
|
348
|
+
raise ConfigError,
|
349
|
+
"Adapter is not defined properly. Uknown configuration type #{opts[:type]} for #{name}. Should be a class like String, Integer etc."
|
350
|
+
end
|
351
|
+
|
352
|
+
raise ConfigError, "Invalid value. Only allowed: #{opts[:allowed]}." if opts[:allowed].any? && !opts[:allowed].include?(config[name])
|
353
|
+
|
354
|
+
config[name] = opts[:default] if opts[:default] && !config.key?(name)
|
355
|
+
|
356
|
+
if opts[:required] && !config[name].is_a?(opts[:type]) && !opts[:type].is_a?(Boolean)
|
357
|
+
raise ConfigError, "#{name} should be a #{opts[:type]}. Got #{opts[name.to_sym].class.name}"
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module DWH
|
2
|
+
# The Behaviors module will help us write SQL queries that are
|
3
|
+
# optimized for the target database. These are setup primarily for
|
4
|
+
# the purposes of Strata. However, any sql writer can use this to
|
5
|
+
# write better sql.
|
6
|
+
#
|
7
|
+
# For exmaple temp_table_type will tell us what the preferred temporary
|
8
|
+
# table strategy should be. intermediate_measure_filter? will let us know
|
9
|
+
# if an aggregation should be filtered in a CTE, final pass, or both.
|
10
|
+
module Behaviors
|
11
|
+
# In druid when you do specific time range you need
|
12
|
+
# to apply the last hour of the day to the date value
|
13
|
+
# to get all inclusive data for that date.
|
14
|
+
def extend_ending_date_to_last_hour_of_day?
|
15
|
+
settings[:extend_ending_date_to_last_hour_of_day]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Defines how intermediate queries should be handled.
|
19
|
+
# Could be one off cte, subquery, temp (future view, permanent)
|
20
|
+
def temp_table_type
|
21
|
+
settings[:temp_table_type]
|
22
|
+
end
|
23
|
+
|
24
|
+
def temp_table_prefix
|
25
|
+
settings[:temp_table_prefix]
|
26
|
+
end
|
27
|
+
|
28
|
+
# When an array dimension is projected and it is filtered
|
29
|
+
# in the where clause, some db's like Druid needs a
|
30
|
+
# having clause to ensure the projected set matches
|
31
|
+
# the filtered set.
|
32
|
+
def apply_advanced_filtering_on_array_projections?
|
33
|
+
settings[:apply_advanced_filtering_on_array_projections]
|
34
|
+
end
|
35
|
+
|
36
|
+
# When measures from multiple fact universes are combined
|
37
|
+
# they need to be merged in a Merge stage SQL statement.
|
38
|
+
# The components is typically combined by Full Outer Join
|
39
|
+
# but could be modified as needed.
|
40
|
+
def final_pass_measure_join_type
|
41
|
+
settings[:final_pass_measure_join_type]
|
42
|
+
end
|
43
|
+
|
44
|
+
# When a filter on a Date time field can be applied to multiple
|
45
|
+
# tables in the join tree should we apply to all of them or
|
46
|
+
# just the first one.
|
47
|
+
def greedy_apply_date_filters
|
48
|
+
settings[:greedy_apply_date_filters]
|
49
|
+
end
|
50
|
+
|
51
|
+
# Whether to apply a measure filter to intermediate stages and
|
52
|
+
# final pass when appropriate. This is the case of multi universe
|
53
|
+
# query with measure filter. Default behavior is to apply the filter
|
54
|
+
# in the intermediate stage and final pass.
|
55
|
+
def cross_universe_measure_filtering_strategy
|
56
|
+
settings[:cross_universe_measure_filtering_strategy]
|
57
|
+
end
|
58
|
+
|
59
|
+
def intermediate_measure_filter?
|
60
|
+
%w[both intermediate].include?(settings[:cross_universe_measure_filtering_strategy])
|
61
|
+
end
|
62
|
+
|
63
|
+
def final_measure_filter?
|
64
|
+
%w[both final].include?(settings[:cross_universe_measure_filtering_strategy])
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module DWH
|
2
|
+
# This module will handle database features that one might be
|
3
|
+
# interested in. Not every database necessarily supports
|
4
|
+
# all ANSI SQL features. This minimal at this point and focused
|
5
|
+
# on analytical Query use cases for Strata [https://www.strata.site]
|
6
|
+
module Capabilities
|
7
|
+
def supports_table_join?
|
8
|
+
settings[:supports_table_join]
|
9
|
+
end
|
10
|
+
|
11
|
+
def supports_full_join?
|
12
|
+
settings[:supports_full_join]
|
13
|
+
end
|
14
|
+
|
15
|
+
def supports_cross_join?
|
16
|
+
settings[:supports_cross_join]
|
17
|
+
end
|
18
|
+
|
19
|
+
def supports_sub_queries?
|
20
|
+
settings[:supports_sub_queries]
|
21
|
+
end
|
22
|
+
|
23
|
+
def supports_common_table_expressions?
|
24
|
+
settings[:supports_common_table_expressions]
|
25
|
+
end
|
26
|
+
|
27
|
+
def supports_temp_tables?
|
28
|
+
settings[:supports_temp_tables]
|
29
|
+
end
|
30
|
+
|
31
|
+
def supports_window_functions?
|
32
|
+
settings[:supports_window_functions]
|
33
|
+
end
|
34
|
+
|
35
|
+
def supports_array_functions?
|
36
|
+
settings[:supports_array_functions]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/dwh/column.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
module DWH
|
2
|
+
# Captures column metadata for a target table.
|
3
|
+
class Column
|
4
|
+
attr_reader :schema_type, :data_type, :name, :precision, :scale, :max_char_length
|
5
|
+
|
6
|
+
def initialize(name:, data_type:, precision: 0, scale: 0, schema_type: nil, max_char_length: nil)
|
7
|
+
@name = name.downcase
|
8
|
+
@precision = precision.is_a?(String) ? precision.to_i : precision
|
9
|
+
@scale = scale.is_a?(String) ? scale.to_i : scale
|
10
|
+
@data_type = data_type&.downcase
|
11
|
+
@schema_type = schema_type&.downcase
|
12
|
+
@max_char_length = max_char_length
|
13
|
+
end
|
14
|
+
|
15
|
+
def dim?
|
16
|
+
schema_type == 'dimension'
|
17
|
+
end
|
18
|
+
|
19
|
+
def measure?
|
20
|
+
schema_type == 'measure'
|
21
|
+
end
|
22
|
+
|
23
|
+
DEFAULT_RULES = { /[_+]+/ => ' ', /\s+id$/i => ' ID', /desc/i => 'Description' }.freeze
|
24
|
+
def namify(rules = DEFAULT_RULES)
|
25
|
+
named = name.titleize keep_id_suffix: true
|
26
|
+
rules.each do |k, v|
|
27
|
+
named = named.gsub(Regexp.new(k), v)
|
28
|
+
end
|
29
|
+
|
30
|
+
named
|
31
|
+
end
|
32
|
+
|
33
|
+
def normalized_data_type
|
34
|
+
case data_type
|
35
|
+
when /binary/, 'image'
|
36
|
+
'binary'
|
37
|
+
when /varchar/, 'string', /text/, /char/
|
38
|
+
'string'
|
39
|
+
when 'date'
|
40
|
+
'date'
|
41
|
+
when /date_time/, /datetime/, 'time', /timestamp/
|
42
|
+
'date_time'
|
43
|
+
when 'int', 'integer', 'smallint', 'tinyint'
|
44
|
+
'integer'
|
45
|
+
when 'bigint', 'bit_int', 'big_integer'
|
46
|
+
'bigint'
|
47
|
+
when 'decimal', 'double', 'float', 'real', 'dec', 'numeric', 'money'
|
48
|
+
'decimal'
|
49
|
+
when 'boolean', 'bit'
|
50
|
+
'boolean'
|
51
|
+
when 'number'
|
52
|
+
if precision >= 38 && scale.zero?
|
53
|
+
'bigint'
|
54
|
+
elsif scale.positive?
|
55
|
+
'decimal'
|
56
|
+
else
|
57
|
+
'integer'
|
58
|
+
end
|
59
|
+
else
|
60
|
+
'string'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_h
|
65
|
+
{
|
66
|
+
name: name,
|
67
|
+
data_type: data_type,
|
68
|
+
precision: precision,
|
69
|
+
scale: scale,
|
70
|
+
schema_type: schema_type,
|
71
|
+
max_char_length: max_char_length
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_s
|
76
|
+
"<Column:#{name}:#{data_type}>"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/lib/dwh/errors.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
module DWH
|
2
|
+
# Top level Error class for lib.
|
3
|
+
class DWHError < StandardError; end
|
4
|
+
|
5
|
+
# ConfigError catches issues related to how an
|
6
|
+
# adapter was configured and instantiated.
|
7
|
+
class ConfigError < DWHError; end
|
8
|
+
|
9
|
+
# ExecutionError are thrown when there is a failuire
|
10
|
+
# to execute calls against the remote db server.
|
11
|
+
class ExecutionError < DWHError; end
|
12
|
+
|
13
|
+
# Connection erros are thrown when we fail to
|
14
|
+
# obtain a connection for the target database.
|
15
|
+
class ConnectionError < DWHError; end
|
16
|
+
|
17
|
+
# UnspportedCapability are thrown when calling a function
|
18
|
+
# that the target database does not support.
|
19
|
+
class UnsupportedCapability < StandardError; end
|
20
|
+
|
21
|
+
# Handle errors related to OAuth
|
22
|
+
class OAuthError < StandardError; end
|
23
|
+
|
24
|
+
# Handle Token Expirattion
|
25
|
+
class TokenExpiredError < OAuthError; end
|
26
|
+
|
27
|
+
# Hangle auth errors
|
28
|
+
class AuthenticationError < OAuthError; end
|
29
|
+
end
|
data/lib/dwh/factory.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'connection_pool'
|
2
|
+
|
3
|
+
module DWH
|
4
|
+
# Manages adapters. This should be the means by which adapters
|
5
|
+
# are created, loaded, and pooled.
|
6
|
+
module Factory
|
7
|
+
include Logger
|
8
|
+
|
9
|
+
# Register your new adapter.
|
10
|
+
# @param adapter_name [String, Symbol] your adapter name. Could be different from
|
11
|
+
# the class name.
|
12
|
+
# @param adapter_class [Class] actual class of the adapter.
|
13
|
+
def register(adapter_name, adapter_class)
|
14
|
+
raise ConfigError, 'adapter_class should be a class' unless adapter_class.is_a?(Class)
|
15
|
+
|
16
|
+
adapter_class.load_settings
|
17
|
+
adapters[adapter_name.to_sym] = adapter_class
|
18
|
+
end
|
19
|
+
|
20
|
+
# Remove the given adapter from the registry.
|
21
|
+
def unregister(adapter_name)
|
22
|
+
adapters.delete adapter_name.to_sym
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get the adapter.
|
26
|
+
# @param adapter_name [String, Symbol]
|
27
|
+
def get_adapter(adapter_name)
|
28
|
+
raise "Adapter '#{adapter_name}' not found. Did you forget to register it: DWH.register(MyAdapterClass)" unless adapter?(adapter_name)
|
29
|
+
|
30
|
+
adapters[adapter_name.to_sym]
|
31
|
+
end
|
32
|
+
|
33
|
+
# Check if the given adapter is registered
|
34
|
+
# @param adapter_name [String, Symbol]
|
35
|
+
def adapter?(adapter_name)
|
36
|
+
adapters.key?(adapter_name.to_sym)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Get the list of registed adapters
|
40
|
+
def adapters
|
41
|
+
@adapters ||= {}
|
42
|
+
end
|
43
|
+
|
44
|
+
# Current active pools
|
45
|
+
def pools
|
46
|
+
@pools ||= {}
|
47
|
+
end
|
48
|
+
|
49
|
+
# The canonical way of creating an adapter instance
|
50
|
+
# in DWH.
|
51
|
+
# @param adapter_name [String, Symbol]
|
52
|
+
# @param config [Hash] options hash for the target database
|
53
|
+
#
|
54
|
+
# @example connect to MySQL
|
55
|
+
# DWH.create(:mysql, { host: '127.0.0.1', databse: 'mydb', username: 'me', password: 'mypwd', client_name: 'Strata CLI'})
|
56
|
+
# @example connect Trino
|
57
|
+
# DWH.create(:trino, {host: 'localhost', catalog: 'native', username: 'Ajo'})
|
58
|
+
# @example connect to Druid
|
59
|
+
# DWH.create(:druid, {host: 'localhost',port: 8080, protocol: 'http'})
|
60
|
+
def create(adapter_name, config)
|
61
|
+
get_adapter(adapter_name).new(config)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Create a pool of connections for a given name and adapter.
|
65
|
+
# Returns existing pool if it was already created.
|
66
|
+
#
|
67
|
+
# @param name [String] custom name for your pool
|
68
|
+
# @param adapter_name [String, Symbol]
|
69
|
+
# @param config [Hash] connection options
|
70
|
+
# @param timeout [Integer] pool checkout time out
|
71
|
+
# @param size [Integer] size of the pool
|
72
|
+
def pool(name, adapter_name, config, timeout: 5, size: 10)
|
73
|
+
if pools.key?(name)
|
74
|
+
pools[name]
|
75
|
+
else
|
76
|
+
pools[name] = ConnectionPool.new(size: size, timeout: timeout) do
|
77
|
+
create(adapter_name, config)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Shutdown a specific pool or all pools
|
83
|
+
# @param pool [String, ConnectionPool, nil] pool or name of pool
|
84
|
+
# or nil to shut everything down
|
85
|
+
def shutdown(pool = nil)
|
86
|
+
case pool.class
|
87
|
+
when String
|
88
|
+
pools[pool].shutdown { it.close }
|
89
|
+
pools.delete(pool)
|
90
|
+
when Symbol
|
91
|
+
pools[pool.to_s].shutdown { it.close }
|
92
|
+
pools[pool.to_s].delete
|
93
|
+
when ConnectionPool
|
94
|
+
pool.shutdown { it.close }
|
95
|
+
pools.delete(pools.key(pool))
|
96
|
+
else
|
97
|
+
pools.each_value do |val|
|
98
|
+
val.shutdown { c.close }
|
99
|
+
end
|
100
|
+
@pools = {}
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Start reaper that will periodically clean up
|
105
|
+
# unused or idle connections.
|
106
|
+
# @param frequency [Integer] defaults to 300 seconds
|
107
|
+
def start_reaper(frequency = 300)
|
108
|
+
logger.info 'Starting DB Adapter reaper process'
|
109
|
+
Thread.new do
|
110
|
+
loop do
|
111
|
+
pools.each do |name, pool|
|
112
|
+
logger.info "DB POOL FOR #{name} STATS:"
|
113
|
+
pool.with do
|
114
|
+
logger.info "\tSize: #{pool.size}"
|
115
|
+
logger.info "\tIdle: #{pool.available}"
|
116
|
+
logger.info "\tAvailable: #{pool.available}"
|
117
|
+
end
|
118
|
+
pool.reap(frequency) { it.close }
|
119
|
+
end
|
120
|
+
sleep frequency
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|