dwh 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +36 -0
  3. data/CHANGELOG.md +5 -0
  4. data/LICENSE +21 -0
  5. data/README.md +130 -0
  6. data/Rakefile +42 -0
  7. data/docs/DWH/Adapters/Adapter.html +3053 -0
  8. data/docs/DWH/Adapters/Athena.html +1704 -0
  9. data/docs/DWH/Adapters/Boolean.html +121 -0
  10. data/docs/DWH/Adapters/Druid.html +1626 -0
  11. data/docs/DWH/Adapters/DuckDb.html +2012 -0
  12. data/docs/DWH/Adapters/MySql.html +1704 -0
  13. data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +265 -0
  14. data/docs/DWH/Adapters/OpenAuthorizable.html +1102 -0
  15. data/docs/DWH/Adapters/Postgres.html +2000 -0
  16. data/docs/DWH/Adapters/Snowflake.html +1662 -0
  17. data/docs/DWH/Adapters/SqlServer.html +2084 -0
  18. data/docs/DWH/Adapters/Trino.html +1835 -0
  19. data/docs/DWH/Adapters.html +129 -0
  20. data/docs/DWH/AuthenticationError.html +142 -0
  21. data/docs/DWH/Behaviors.html +767 -0
  22. data/docs/DWH/Capabilities.html +748 -0
  23. data/docs/DWH/Column.html +1115 -0
  24. data/docs/DWH/ConfigError.html +143 -0
  25. data/docs/DWH/ConnectionError.html +143 -0
  26. data/docs/DWH/DWHError.html +138 -0
  27. data/docs/DWH/ExecutionError.html +143 -0
  28. data/docs/DWH/Factory.html +1133 -0
  29. data/docs/DWH/Functions/Arrays.html +505 -0
  30. data/docs/DWH/Functions/Dates.html +1644 -0
  31. data/docs/DWH/Functions/ExtractDatePart.html +804 -0
  32. data/docs/DWH/Functions/Nulls.html +377 -0
  33. data/docs/DWH/Functions.html +846 -0
  34. data/docs/DWH/Logger.html +258 -0
  35. data/docs/DWH/OAuthError.html +138 -0
  36. data/docs/DWH/Settings.html +658 -0
  37. data/docs/DWH/StreamingStats.html +804 -0
  38. data/docs/DWH/Table.html +1260 -0
  39. data/docs/DWH/TableStats.html +583 -0
  40. data/docs/DWH/TokenExpiredError.html +142 -0
  41. data/docs/DWH/UnsupportedCapability.html +135 -0
  42. data/docs/DWH.html +220 -0
  43. data/docs/_index.html +471 -0
  44. data/docs/class_list.html +54 -0
  45. data/docs/css/common.css +1 -0
  46. data/docs/css/full_list.css +58 -0
  47. data/docs/css/style.css +503 -0
  48. data/docs/file.README.html +210 -0
  49. data/docs/file.adapters.html +514 -0
  50. data/docs/file.creating-adapters.html +497 -0
  51. data/docs/file.getting-started.html +288 -0
  52. data/docs/file.usage.html +446 -0
  53. data/docs/file_list.html +79 -0
  54. data/docs/frames.html +22 -0
  55. data/docs/guides/adapters.md +445 -0
  56. data/docs/guides/creating-adapters.md +430 -0
  57. data/docs/guides/getting-started.md +225 -0
  58. data/docs/guides/usage.md +378 -0
  59. data/docs/index.html +210 -0
  60. data/docs/js/app.js +344 -0
  61. data/docs/js/full_list.js +242 -0
  62. data/docs/js/jquery.js +4 -0
  63. data/docs/method_list.html +2038 -0
  64. data/docs/top-level-namespace.html +110 -0
  65. data/lib/dwh/adapters/athena.rb +359 -0
  66. data/lib/dwh/adapters/druid.rb +267 -0
  67. data/lib/dwh/adapters/duck_db.rb +235 -0
  68. data/lib/dwh/adapters/my_sql.rb +235 -0
  69. data/lib/dwh/adapters/open_authorizable.rb +215 -0
  70. data/lib/dwh/adapters/postgres.rb +250 -0
  71. data/lib/dwh/adapters/snowflake.rb +489 -0
  72. data/lib/dwh/adapters/sql_server.rb +257 -0
  73. data/lib/dwh/adapters/trino.rb +213 -0
  74. data/lib/dwh/adapters.rb +363 -0
  75. data/lib/dwh/behaviors.rb +67 -0
  76. data/lib/dwh/capabilities.rb +39 -0
  77. data/lib/dwh/column.rb +79 -0
  78. data/lib/dwh/errors.rb +29 -0
  79. data/lib/dwh/factory.rb +125 -0
  80. data/lib/dwh/functions/arrays.rb +42 -0
  81. data/lib/dwh/functions/dates.rb +162 -0
  82. data/lib/dwh/functions/extract_date_part.rb +70 -0
  83. data/lib/dwh/functions/nulls.rb +31 -0
  84. data/lib/dwh/functions.rb +86 -0
  85. data/lib/dwh/logger.rb +50 -0
  86. data/lib/dwh/settings/athena.yml +77 -0
  87. data/lib/dwh/settings/base.yml +81 -0
  88. data/lib/dwh/settings/databricks.yml +51 -0
  89. data/lib/dwh/settings/druid.yml +59 -0
  90. data/lib/dwh/settings/duckdb.yml +44 -0
  91. data/lib/dwh/settings/mysql.yml +67 -0
  92. data/lib/dwh/settings/postgres.yml +30 -0
  93. data/lib/dwh/settings/redshift.yml +52 -0
  94. data/lib/dwh/settings/snowflake.yml +45 -0
  95. data/lib/dwh/settings/sqlserver.yml +80 -0
  96. data/lib/dwh/settings/trino.yml +77 -0
  97. data/lib/dwh/settings.rb +79 -0
  98. data/lib/dwh/streaming_stats.rb +69 -0
  99. data/lib/dwh/table.rb +105 -0
  100. data/lib/dwh/table_stats.rb +51 -0
  101. data/lib/dwh/version.rb +5 -0
  102. data/lib/dwh.rb +54 -0
  103. data/sig/dwh.rbs +4 -0
  104. metadata +231 -0
@@ -0,0 +1,363 @@
1
+ require 'csv'
2
+ require_relative 'settings'
3
+ require_relative 'capabilities'
4
+ require_relative 'functions'
5
+ require_relative 'behaviors'
6
+ require_relative 'logger'
7
+
8
+ module DWH
9
+ # Encapsulates functionality related to all Adapter implementations
10
+ module Adapters
11
+ class Boolean; end
12
+
13
+ # @abstract Adapters base class. All adapters should inherit from this class
14
+ # and implement these core required methods:
15
+ #
16
+ # * {#stats} - get table statistics
17
+ # * {#tables} - list tables
18
+ # * {#metadata} - get metadata for a specific table
19
+ # * {#execute} - run query and return result
20
+ # * {#execute_stream} - run query and stream results to provided IO
21
+ # * {#stream} - run query and yeild streaming results
22
+ #
23
+ # Adapter implementations can declare configuration options,
24
+ # defaults, and whether it is required. This is a class
25
+ # level method. They will be validated and a {ConfigError} will be raised
26
+ # if there is an issue. Methods not implemented will raise {NotImplementedError}
27
+ #
28
+ # Additionally, if certain setting need to be overridden you can add a settings
29
+ # file in a relative directory like so: *settings/my_adapter.yml*. Or, you can specify
30
+ # an exact settings file location at the class level:
31
+ #
32
+ # class MyAdapter < DWH::Adapters::Adapter
33
+ # settings_file_path "my_dir/my_settings.yml"
34
+ # end
35
+ #
36
+ # @example
37
+ # class MyAdapter < DWH::Adapters::Adapter
38
+ # config :username, String, required: true, message: "login id of the current user"
39
+ # config :port, Integer, required: true, default: 5432
40
+ # end
41
+ class Adapter
42
+ extend Settings
43
+ include Capabilities
44
+ include Functions
45
+ include Behaviors
46
+ include Logger
47
+
48
+ # Define the configurations required for the adapter to
49
+ # connect and query target database.
50
+ #
51
+ # @param name [String, Symbol] name of the configuration
52
+ # @param type [Constant] ruby type of the configuration
53
+ # @param options [Hash] options for the config
54
+ # @option options [Boolean] :required Whether option is required
55
+ # @option options [*] :default The default value
56
+ # @option options [String] :message The error message or info displayed
57
+ # @return [Hash]
58
+ def self.config(name, type, options = {})
59
+ configuration[name.to_sym] = {
60
+ type: type,
61
+ required: options[:required] || false,
62
+ default: options[:default],
63
+ message: options[:message] || "Invalid or missing parameter: #{name}",
64
+ allowed: options[:allowed] || []
65
+ }
66
+
67
+ define_method(name.to_sym) do
68
+ config[name.to_sym]
69
+ end
70
+ end
71
+
72
+ # Get the adapter class level configuration settings
73
+ # @return [Hash]
74
+ def self.configuration
75
+ @configuration ||= {}
76
+ end
77
+
78
+ # Instance level configurations
79
+ # @return [Hash] the actual instance configuration
80
+ attr_reader :config
81
+
82
+ def initialize(config)
83
+ @config = config.symbolize_keys
84
+ # Per instance customization of general settings
85
+ # So you can have multiple connections to Trino
86
+ # but exhibit diff behavior
87
+ @settings = self.class.adapter_settings.merge(
88
+ (config[:settings] || {}).symbolize_keys
89
+ )
90
+
91
+ valid_config?
92
+ end
93
+
94
+ # This is the actual runtime settings used by the adapter
95
+ # once initialized. During intialization settings could be
96
+ # overridden. Settings are different from configuration in that
97
+ # settings control behaviour and syntax while configuration
98
+ # determines how we connect.
99
+ # @return [Hash] symbolized hash of settings
100
+ attr_reader :settings
101
+
102
+ # Allows an already instantiated adapter to change its current settings.
103
+ # this might be useful in situations where behavior needs to be modified
104
+ # on runtime basis.
105
+ # @return [Hash] the complete settings with changes merged
106
+ def alter_settings(changes = {})
107
+ reset_settings unless @original_settings.nil?
108
+ @original_settings = @settings
109
+ @settings.merge!(changes)
110
+ end
111
+
112
+ # This returns settings back to its original state prior to
113
+ # running alter_settings.
114
+ # @return [Hash] with original settings
115
+ def reset_settings
116
+ @settings = @original_settings if @original_settings
117
+ end
118
+
119
+ # Creates a connection to the target database and returns the
120
+ # connection object or self
121
+ def connection
122
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
123
+ end
124
+
125
+ # Tests the connection to the target database and returns true
126
+ # if successful, or raise Exception or false
127
+ # connection object or self
128
+ # @return [Boolean]
129
+ # @raise [ConnectionError] when a connection cannot be made
130
+ def test_connection(raise_exception: false)
131
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
132
+ end
133
+
134
+ # Test connection and raise exception if connection
135
+ # fails.
136
+ # @return [Boolean]
137
+ # @raise [ConnectionError]
138
+ def connect!
139
+ test_connection(raise_exception: true)
140
+ end
141
+
142
+ # Tests whether the dtabase can be connected
143
+ # @return [Boolean]
144
+ def connect?
145
+ test_connection(raise_exception: false)
146
+ end
147
+
148
+ # Close the connection if it was created.
149
+ def close
150
+ @connection&.close
151
+ @connection = nil
152
+ end
153
+
154
+ # Execute sql on the target database.
155
+ #
156
+ # @param sql [String] actual sql
157
+ # @param format [Symbol, String] return format type
158
+ # - array returns array of array
159
+ # - object returns array of Hashes
160
+ # - csv returns as csv
161
+ # - native returns the native result from any clients used
162
+ # - For example: Postgres using pg client will return PG::Result
163
+ # - Http clients will returns the HTTP response object
164
+ # @param retries [Integer] number of retries in case of failure. Default is 0
165
+ # @return [Array<Array>,Hash, CSV, Native]
166
+ # @raise [ConnectionError, ExecutionError]
167
+ def execute(sql, format: :array, retries: 0)
168
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
169
+ end
170
+
171
+ # Execute sql and stream responses back. Data is writtent out in CSV format
172
+ # to the provided IO object.
173
+ #
174
+ # @param sql [String] actual sql
175
+ # @param io [IO] IO object to write records to
176
+ # @param stats [StreamingStats] collect stats and preview data this is optional
177
+ # @param retries [Integer] number of retries in case of failure
178
+ # @return [IO]
179
+ # @raise [ConnectionError, ExecutionError]
180
+ def execute_stream(sql, io, stats: nil, retries: 0)
181
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
182
+ end
183
+
184
+ # Executes the given sql and yields the streamed results
185
+ # to the given block.
186
+ #
187
+ # @param sql [String] actual sql
188
+ # @yield [chunk] Yields a streamed chunk as it streams in. The chunk type
189
+ # might vary depending on the target db and settings
190
+ # @raise [ConnectionError, ExecutionError]
191
+ def stream(sql, &block)
192
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
193
+ end
194
+
195
+ # Returns basic stats of a given table. Will typically include row_count,
196
+ # date_start, and date_end.
197
+ #
198
+ # @param table [String] table name
199
+ # @param date_column [String] optional date column to use to find range
200
+ # @option qualifiers [String] :catalog optional catalog or equivalent name space.
201
+ # will be ignored if the adapter doesn't support
202
+ # @option qualifiers [String] :schema optional schema to scope to.
203
+ # will be ignored if the adapter doesn't support
204
+ # @return [DWH::Table]
205
+ # @raise [ConnectionError, ExecutionError]
206
+ #
207
+ # @example
208
+ # stats("public.big_table", date_column: "fact_date")
209
+ # stats("big_table")
210
+ # stats("big_table",schema: "public")
211
+ def stats(table, date_column: nil, **qualifiers)
212
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
213
+ end
214
+
215
+ # Get all tables available in the
216
+ # target db. It will use the default catalog and schema
217
+ # config only specified here.
218
+ #
219
+ # @option qualifiers [String] :catalog optional catalog or equivalent name space.
220
+ # will be ignored if the adapter doesn't support
221
+ # @option qualifiers [String] :schema optional schema to scope to.
222
+ # will be ignored if the adapter doesn't support
223
+ # @return [Array<String>]
224
+ def tables(**qualifiers)
225
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
226
+ end
227
+
228
+ # Check if table exists in remote db.
229
+ #
230
+ # @option qualifiers [String] :catalog optional catalog or equivalent name space.
231
+ # will be ignored if the adapter doesn't support
232
+ # @option qualifiers [String] :schema optional schema to scope to.
233
+ # will be ignored if the adapter doesn't support
234
+ # @return [Boolean]
235
+ def table?(table, **qualifiers)
236
+ tables(**qualifiers).include?(table)
237
+ end
238
+
239
+ # Get the schema structure of a given a given table_name.
240
+ # Pass in optional catalog and schema info.
241
+ #
242
+ # Example:
243
+ # metadata("public.big_table")
244
+ # metadata("big_table")
245
+ # metadata("big_table",schema: "public")
246
+ #
247
+ # @param table [String] - table name
248
+ # @option qualifiers [String] :catalog optional catalog or equivalent name space.
249
+ # will be ignored if the adapter doesn't support
250
+ # @option qualifiers [String] :schema optional schema to scope to.
251
+ # will be ignored if the adapter doesn't support
252
+ # @return [DWH::Table]
253
+ def metadata(table, **qualifiers)
254
+ raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
255
+ end
256
+
257
+ # Will call the block with retries given by the
258
+ # max attempts param. If max attempts is 0, it
259
+ # will just return the block.call
260
+ #
261
+ # @param max_attempts [Integer] max number of retries
262
+ def with_retry(max_attempts = 2, &block)
263
+ return block.call if max_attempts.zero?
264
+
265
+ attempts = 0
266
+
267
+ begin
268
+ attempts += 1
269
+ block.call
270
+ rescue StandardError => e
271
+ if attempts < max_attempts
272
+ logger.warn "Attempt #{attempts} failed with error: #{e.message}. Retrying..."
273
+ retry
274
+ else
275
+ logger.error "Failed after #{attempts} attempts with error: #{e.message}"
276
+ raise
277
+ end
278
+ end
279
+ end
280
+
281
+ # Wraps an SQL execution with debug logging.
282
+ # It sill include execution time.
283
+ #
284
+ # @param sql [String] actual sql being executed
285
+ # @return execution results (see #execute)
286
+ def with_debug(sql, &block)
287
+ starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
288
+
289
+ logger.debug("=== SQL === \n#{sql}")
290
+
291
+ result = block.call
292
+
293
+ ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
294
+ elapsed = ending - starting
295
+ logger.debug("=== FINISHED SQL (#{elapsed.round(1)} secs) ===")
296
+
297
+ result
298
+ end
299
+
300
+ # Adapter name from the class name
301
+ # @return [String]
302
+ def adapter_name
303
+ self.class.name.demodulize
304
+ end
305
+
306
+ # If any extra connection params were passed in the config
307
+ # object, this will return it.
308
+ #
309
+ # @return [Hash] default empty hash
310
+ def extra_connection_params
311
+ config[:extra_connection_params] || {}
312
+ end
313
+
314
+ # If the adapter supports it, will pass on extra query params
315
+ # from the config to the executor.
316
+ #
317
+ # @return [Hash] default empty hash
318
+ def extra_query_params
319
+ config[:extra_query_params] || {}
320
+ end
321
+
322
+ # For adapters that uses access tokens via
323
+ # jwt or oauth this will return wether a tokens is
324
+ # expired
325
+ def token_expired?
326
+ @token_expires_at.nil? || Time.now >= @token_expires_at
327
+ end
328
+
329
+ protected
330
+
331
+ # Checks if the required configurations and type is passed
332
+ # when the adapter is initialized.
333
+ def valid_config?
334
+ definitions = self.class.configuration
335
+
336
+ # Check for missing required parameters
337
+ missing_params = definitions.select do |name, options|
338
+ options[:required] && !config.key?(name) && options[:default].nil?
339
+ end
340
+
341
+ if missing_params.any?
342
+ error_messages = missing_params.map { |name, options| "Missing #{name} param - #{options[:message]}" }
343
+ raise ConfigError, "#{adapter_name} Adapter: #{error_messages.join(', ')}"
344
+ end
345
+
346
+ definitions.each do |name, opts|
347
+ unless opts[:type].is_a?(Class)
348
+ raise ConfigError,
349
+ "Adapter is not defined properly. Uknown configuration type #{opts[:type]} for #{name}. Should be a class like String, Integer etc."
350
+ end
351
+
352
+ raise ConfigError, "Invalid value. Only allowed: #{opts[:allowed]}." if opts[:allowed].any? && !opts[:allowed].include?(config[name])
353
+
354
+ config[name] = opts[:default] if opts[:default] && !config.key?(name)
355
+
356
+ if opts[:required] && !config[name].is_a?(opts[:type]) && !opts[:type].is_a?(Boolean)
357
+ raise ConfigError, "#{name} should be a #{opts[:type]}. Got #{opts[name.to_sym].class.name}"
358
+ end
359
+ end
360
+ end
361
+ end
362
+ end
363
+ end
@@ -0,0 +1,67 @@
1
+ module DWH
2
+ # The Behaviors module will help us write SQL queries that are
3
+ # optimized for the target database. These are setup primarily for
4
+ # the purposes of Strata. However, any sql writer can use this to
5
+ # write better sql.
6
+ #
7
+ # For exmaple temp_table_type will tell us what the preferred temporary
8
+ # table strategy should be. intermediate_measure_filter? will let us know
9
+ # if an aggregation should be filtered in a CTE, final pass, or both.
10
+ module Behaviors
11
+ # In druid when you do specific time range you need
12
+ # to apply the last hour of the day to the date value
13
+ # to get all inclusive data for that date.
14
+ def extend_ending_date_to_last_hour_of_day?
15
+ settings[:extend_ending_date_to_last_hour_of_day]
16
+ end
17
+
18
+ # Defines how intermediate queries should be handled.
19
+ # Could be one off cte, subquery, temp (future view, permanent)
20
+ def temp_table_type
21
+ settings[:temp_table_type]
22
+ end
23
+
24
+ def temp_table_prefix
25
+ settings[:temp_table_prefix]
26
+ end
27
+
28
+ # When an array dimension is projected and it is filtered
29
+ # in the where clause, some db's like Druid needs a
30
+ # having clause to ensure the projected set matches
31
+ # the filtered set.
32
+ def apply_advanced_filtering_on_array_projections?
33
+ settings[:apply_advanced_filtering_on_array_projections]
34
+ end
35
+
36
+ # When measures from multiple fact universes are combined
37
+ # they need to be merged in a Merge stage SQL statement.
38
+ # The components is typically combined by Full Outer Join
39
+ # but could be modified as needed.
40
+ def final_pass_measure_join_type
41
+ settings[:final_pass_measure_join_type]
42
+ end
43
+
44
+ # When a filter on a Date time field can be applied to multiple
45
+ # tables in the join tree should we apply to all of them or
46
+ # just the first one.
47
+ def greedy_apply_date_filters
48
+ settings[:greedy_apply_date_filters]
49
+ end
50
+
51
+ # Whether to apply a measure filter to intermediate stages and
52
+ # final pass when appropriate. This is the case of multi universe
53
+ # query with measure filter. Default behavior is to apply the filter
54
+ # in the intermediate stage and final pass.
55
+ def cross_universe_measure_filtering_strategy
56
+ settings[:cross_universe_measure_filtering_strategy]
57
+ end
58
+
59
+ def intermediate_measure_filter?
60
+ %w[both intermediate].include?(settings[:cross_universe_measure_filtering_strategy])
61
+ end
62
+
63
+ def final_measure_filter?
64
+ %w[both final].include?(settings[:cross_universe_measure_filtering_strategy])
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,39 @@
1
+ module DWH
2
+ # This module will handle database features that one might be
3
+ # interested in. Not every database necessarily supports
4
+ # all ANSI SQL features. This minimal at this point and focused
5
+ # on analytical Query use cases for Strata [https://www.strata.site]
6
+ module Capabilities
7
+ def supports_table_join?
8
+ settings[:supports_table_join]
9
+ end
10
+
11
+ def supports_full_join?
12
+ settings[:supports_full_join]
13
+ end
14
+
15
+ def supports_cross_join?
16
+ settings[:supports_cross_join]
17
+ end
18
+
19
+ def supports_sub_queries?
20
+ settings[:supports_sub_queries]
21
+ end
22
+
23
+ def supports_common_table_expressions?
24
+ settings[:supports_common_table_expressions]
25
+ end
26
+
27
+ def supports_temp_tables?
28
+ settings[:supports_temp_tables]
29
+ end
30
+
31
+ def supports_window_functions?
32
+ settings[:supports_window_functions]
33
+ end
34
+
35
+ def supports_array_functions?
36
+ settings[:supports_array_functions]
37
+ end
38
+ end
39
+ end
data/lib/dwh/column.rb ADDED
@@ -0,0 +1,79 @@
1
+ module DWH
2
+ # Captures column metadata for a target table.
3
+ class Column
4
+ attr_reader :schema_type, :data_type, :name, :precision, :scale, :max_char_length
5
+
6
+ def initialize(name:, data_type:, precision: 0, scale: 0, schema_type: nil, max_char_length: nil)
7
+ @name = name.downcase
8
+ @precision = precision.is_a?(String) ? precision.to_i : precision
9
+ @scale = scale.is_a?(String) ? scale.to_i : scale
10
+ @data_type = data_type&.downcase
11
+ @schema_type = schema_type&.downcase
12
+ @max_char_length = max_char_length
13
+ end
14
+
15
+ def dim?
16
+ schema_type == 'dimension'
17
+ end
18
+
19
+ def measure?
20
+ schema_type == 'measure'
21
+ end
22
+
23
+ DEFAULT_RULES = { /[_+]+/ => ' ', /\s+id$/i => ' ID', /desc/i => 'Description' }.freeze
24
+ def namify(rules = DEFAULT_RULES)
25
+ named = name.titleize keep_id_suffix: true
26
+ rules.each do |k, v|
27
+ named = named.gsub(Regexp.new(k), v)
28
+ end
29
+
30
+ named
31
+ end
32
+
33
+ def normalized_data_type
34
+ case data_type
35
+ when /binary/, 'image'
36
+ 'binary'
37
+ when /varchar/, 'string', /text/, /char/
38
+ 'string'
39
+ when 'date'
40
+ 'date'
41
+ when /date_time/, /datetime/, 'time', /timestamp/
42
+ 'date_time'
43
+ when 'int', 'integer', 'smallint', 'tinyint'
44
+ 'integer'
45
+ when 'bigint', 'bit_int', 'big_integer'
46
+ 'bigint'
47
+ when 'decimal', 'double', 'float', 'real', 'dec', 'numeric', 'money'
48
+ 'decimal'
49
+ when 'boolean', 'bit'
50
+ 'boolean'
51
+ when 'number'
52
+ if precision >= 38 && scale.zero?
53
+ 'bigint'
54
+ elsif scale.positive?
55
+ 'decimal'
56
+ else
57
+ 'integer'
58
+ end
59
+ else
60
+ 'string'
61
+ end
62
+ end
63
+
64
+ def to_h
65
+ {
66
+ name: name,
67
+ data_type: data_type,
68
+ precision: precision,
69
+ scale: scale,
70
+ schema_type: schema_type,
71
+ max_char_length: max_char_length
72
+ }
73
+ end
74
+
75
+ def to_s
76
+ "<Column:#{name}:#{data_type}>"
77
+ end
78
+ end
79
+ end
data/lib/dwh/errors.rb ADDED
@@ -0,0 +1,29 @@
1
+ module DWH
2
+ # Top level Error class for lib.
3
+ class DWHError < StandardError; end
4
+
5
+ # ConfigError catches issues related to how an
6
+ # adapter was configured and instantiated.
7
+ class ConfigError < DWHError; end
8
+
9
+ # ExecutionError are thrown when there is a failuire
10
+ # to execute calls against the remote db server.
11
+ class ExecutionError < DWHError; end
12
+
13
+ # Connection erros are thrown when we fail to
14
+ # obtain a connection for the target database.
15
+ class ConnectionError < DWHError; end
16
+
17
+ # UnspportedCapability are thrown when calling a function
18
+ # that the target database does not support.
19
+ class UnsupportedCapability < StandardError; end
20
+
21
+ # Handle errors related to OAuth
22
+ class OAuthError < StandardError; end
23
+
24
+ # Handle Token Expirattion
25
+ class TokenExpiredError < OAuthError; end
26
+
27
+ # Hangle auth errors
28
+ class AuthenticationError < OAuthError; end
29
+ end
@@ -0,0 +1,125 @@
1
+ require 'connection_pool'
2
+
3
+ module DWH
4
+ # Manages adapters. This should be the means by which adapters
5
+ # are created, loaded, and pooled.
6
+ module Factory
7
+ include Logger
8
+
9
+ # Register your new adapter.
10
+ # @param adapter_name [String, Symbol] your adapter name. Could be different from
11
+ # the class name.
12
+ # @param adapter_class [Class] actual class of the adapter.
13
+ def register(adapter_name, adapter_class)
14
+ raise ConfigError, 'adapter_class should be a class' unless adapter_class.is_a?(Class)
15
+
16
+ adapter_class.load_settings
17
+ adapters[adapter_name.to_sym] = adapter_class
18
+ end
19
+
20
+ # Remove the given adapter from the registry.
21
+ def unregister(adapter_name)
22
+ adapters.delete adapter_name.to_sym
23
+ end
24
+
25
+ # Get the adapter.
26
+ # @param adapter_name [String, Symbol]
27
+ def get_adapter(adapter_name)
28
+ raise "Adapter '#{adapter_name}' not found. Did you forget to register it: DWH.register(MyAdapterClass)" unless adapter?(adapter_name)
29
+
30
+ adapters[adapter_name.to_sym]
31
+ end
32
+
33
+ # Check if the given adapter is registered
34
+ # @param adapter_name [String, Symbol]
35
+ def adapter?(adapter_name)
36
+ adapters.key?(adapter_name.to_sym)
37
+ end
38
+
39
+ # Get the list of registed adapters
40
+ def adapters
41
+ @adapters ||= {}
42
+ end
43
+
44
+ # Current active pools
45
+ def pools
46
+ @pools ||= {}
47
+ end
48
+
49
+ # The canonical way of creating an adapter instance
50
+ # in DWH.
51
+ # @param adapter_name [String, Symbol]
52
+ # @param config [Hash] options hash for the target database
53
+ #
54
+ # @example connect to MySQL
55
+ # DWH.create(:mysql, { host: '127.0.0.1', databse: 'mydb', username: 'me', password: 'mypwd', client_name: 'Strata CLI'})
56
+ # @example connect Trino
57
+ # DWH.create(:trino, {host: 'localhost', catalog: 'native', username: 'Ajo'})
58
+ # @example connect to Druid
59
+ # DWH.create(:druid, {host: 'localhost',port: 8080, protocol: 'http'})
60
+ def create(adapter_name, config)
61
+ get_adapter(adapter_name).new(config)
62
+ end
63
+
64
+ # Create a pool of connections for a given name and adapter.
65
+ # Returns existing pool if it was already created.
66
+ #
67
+ # @param name [String] custom name for your pool
68
+ # @param adapter_name [String, Symbol]
69
+ # @param config [Hash] connection options
70
+ # @param timeout [Integer] pool checkout time out
71
+ # @param size [Integer] size of the pool
72
+ def pool(name, adapter_name, config, timeout: 5, size: 10)
73
+ if pools.key?(name)
74
+ pools[name]
75
+ else
76
+ pools[name] = ConnectionPool.new(size: size, timeout: timeout) do
77
+ create(adapter_name, config)
78
+ end
79
+ end
80
+ end
81
+
82
+ # Shutdown a specific pool or all pools
83
+ # @param pool [String, ConnectionPool, nil] pool or name of pool
84
+ # or nil to shut everything down
85
+ def shutdown(pool = nil)
86
+ case pool.class
87
+ when String
88
+ pools[pool].shutdown { it.close }
89
+ pools.delete(pool)
90
+ when Symbol
91
+ pools[pool.to_s].shutdown { it.close }
92
+ pools[pool.to_s].delete
93
+ when ConnectionPool
94
+ pool.shutdown { it.close }
95
+ pools.delete(pools.key(pool))
96
+ else
97
+ pools.each_value do |val|
98
+ val.shutdown { c.close }
99
+ end
100
+ @pools = {}
101
+ end
102
+ end
103
+
104
+ # Start reaper that will periodically clean up
105
+ # unused or idle connections.
106
+ # @param frequency [Integer] defaults to 300 seconds
107
+ def start_reaper(frequency = 300)
108
+ logger.info 'Starting DB Adapter reaper process'
109
+ Thread.new do
110
+ loop do
111
+ pools.each do |name, pool|
112
+ logger.info "DB POOL FOR #{name} STATS:"
113
+ pool.with do
114
+ logger.info "\tSize: #{pool.size}"
115
+ logger.info "\tIdle: #{pool.available}"
116
+ logger.info "\tAvailable: #{pool.available}"
117
+ end
118
+ pool.reap(frequency) { it.close }
119
+ end
120
+ sleep frequency
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end