dwh 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/README.md +15 -5
- data/Rakefile +1 -1
- data/docs/DWH/Adapters/Adapter.html +33 -27
- data/docs/DWH/Adapters/Athena.html +25 -21
- data/docs/DWH/Adapters/Boolean.html +1 -1
- data/docs/DWH/Adapters/Druid.html +18 -12
- data/docs/DWH/Adapters/DuckDb.html +29 -27
- data/docs/DWH/Adapters/MySql.html +25 -19
- data/docs/DWH/Adapters/OpenAuthorizable/ClassMethods.html +3 -6
- data/docs/DWH/Adapters/OpenAuthorizable.html +5 -10
- data/docs/DWH/Adapters/Postgres.html +27 -23
- data/docs/DWH/Adapters/Snowflake.html +39 -24
- data/docs/DWH/Adapters/SqlServer.html +27 -25
- data/docs/DWH/Adapters/Trino.html +30 -30
- data/docs/DWH/Adapters.html +1 -1
- data/docs/DWH/AuthenticationError.html +1 -1
- data/docs/DWH/Behaviors.html +6 -11
- data/docs/DWH/Capabilities.html +10 -26
- data/docs/DWH/Column.html +7 -15
- data/docs/DWH/ConfigError.html +1 -1
- data/docs/DWH/ConnectionError.html +1 -1
- data/docs/DWH/DWHError.html +1 -1
- data/docs/DWH/ExecutionError.html +1 -1
- data/docs/DWH/Factory.html +1 -1
- data/docs/DWH/Functions/Arrays.html +8 -8
- data/docs/DWH/Functions/Dates.html +5 -7
- data/docs/DWH/Functions/ExtractDatePart.html +13 -25
- data/docs/DWH/Functions/Nulls.html +3 -3
- data/docs/DWH/Functions.html +6 -9
- data/docs/DWH/Logger.html +3 -5
- data/docs/DWH/OAuthError.html +1 -1
- data/docs/DWH/Settings.html +6 -9
- data/docs/DWH/StreamingStats.html +2 -3
- data/docs/DWH/Table.html +14 -26
- data/docs/DWH/TableStats.html +1 -1
- data/docs/DWH/TokenExpiredError.html +1 -1
- data/docs/DWH/UnsupportedCapability.html +1 -1
- data/docs/DWH.html +1 -1
- data/docs/_index.html +1 -1
- data/docs/file.README.html +43 -48
- data/docs/file.adapters.html +318 -343
- data/docs/file.creating-adapters.html +347 -357
- data/docs/file.getting-started.html +143 -151
- data/docs/file.usage.html +257 -278
- data/docs/guides/adapters.md +158 -0
- data/docs/guides/getting-started.md +6 -1
- data/docs/guides/usage.md +33 -1
- data/docs/index.html +43 -48
- data/docs/top-level-namespace.html +1 -1
- data/lib/dwh/adapters/duck_db.rb +1 -1
- data/lib/dwh/adapters/postgres.rb +4 -4
- data/lib/dwh/adapters/redshift.rb +48 -0
- data/lib/dwh/adapters/sql_server.rb +1 -1
- data/lib/dwh/adapters/sqlite.rb +364 -0
- data/lib/dwh/adapters.rb +5 -5
- data/lib/dwh/column.rb +12 -1
- data/lib/dwh/functions/dates.rb +15 -0
- data/lib/dwh/settings/databricks.yml +13 -13
- data/lib/dwh/settings/druid.yml +3 -3
- data/lib/dwh/settings/duckdb.yml +2 -2
- data/lib/dwh/settings/mysql.yml +2 -2
- data/lib/dwh/settings/postgres.yml +11 -11
- data/lib/dwh/settings/redshift.yml +15 -24
- data/lib/dwh/settings/snowflake.yml +15 -15
- data/lib/dwh/settings/sqlite.yml +42 -0
- data/lib/dwh/settings.rb +6 -2
- data/lib/dwh/table.rb +18 -10
- data/lib/dwh/version.rb +1 -1
- data/lib/dwh.rb +4 -4
- metadata +5 -16
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
module DWH
|
|
2
|
+
module Adapters
|
|
3
|
+
# SQLite adapter optimized for analytical workloads.
|
|
4
|
+
#
|
|
5
|
+
# This requires the ruby {https://github.com/sparklemotion/sqlite3-ruby sqlite3} gem.
|
|
6
|
+
#
|
|
7
|
+
# Generally, adapters should be created using {DWH::Factory#create DWH.create}. Where a configuration
|
|
8
|
+
# is passed in as options hash or argument list.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic connection with required only options
|
|
11
|
+
# DWH.create(:sqlite, {file: 'path/to/my/database.db' })
|
|
12
|
+
#
|
|
13
|
+
# @example Open in read only mode
|
|
14
|
+
# DWH.create(:sqlite, {file: 'path/to/my/database.db', readonly: true})
|
|
15
|
+
#
|
|
16
|
+
# @example Configure with custom performance pragmas
|
|
17
|
+
# DWH.create(:sqlite, {file: 'path/to/my/database.db',
|
|
18
|
+
# pragmas: { cache_size: -128000, mmap_size: 268435456 }})
|
|
19
|
+
#
|
|
20
|
+
# @note This adapter enables WAL mode by default for better concurrent read performance.
|
|
21
|
+
# Set `enable_wal: false` to disable this behavior.
|
|
22
|
+
class Sqlite < Adapter
|
|
23
|
+
config :file, String, required: true, message: 'path/to/sqlite/db'
|
|
24
|
+
config :readonly, Boolean, required: false, default: false, message: 'open database in read-only mode'
|
|
25
|
+
config :enable_wal, Boolean, required: false, default: true, message: 'enable WAL mode for better concurrency'
|
|
26
|
+
config :pragmas, Hash, required: false, message: 'hash of PRAGMA statements for performance tuning'
|
|
27
|
+
config :timeout, Integer, required: false, default: 5000, message: 'busy timeout in milliseconds'
|
|
28
|
+
|
|
29
|
+
# Default pragmas optimized for analytical workloads
|
|
30
|
+
DEFAULT_PRAGMAS = {
|
|
31
|
+
cache_size: -64_000, # 64MB cache (negative means KB)
|
|
32
|
+
temp_store: 'MEMORY', # Store temp tables in memory
|
|
33
|
+
mmap_size: 134_217_728, # 128MB memory-mapped I/O
|
|
34
|
+
page_size: 4096, # Standard page size
|
|
35
|
+
synchronous: 'NORMAL' # Faster than FULL, safe with WAL
|
|
36
|
+
}.freeze
|
|
37
|
+
|
|
38
|
+
# (see Adapter#connection)
|
|
39
|
+
def connection
|
|
40
|
+
return @connection if @connection
|
|
41
|
+
|
|
42
|
+
options = build_open_options
|
|
43
|
+
@connection = SQLite3::Database.new(config[:file], options)
|
|
44
|
+
|
|
45
|
+
# Set busy timeout to handle concurrent access
|
|
46
|
+
@connection.busy_timeout(config[:timeout])
|
|
47
|
+
|
|
48
|
+
# Don't return results as hash by default for performance
|
|
49
|
+
@connection.results_as_hash = false
|
|
50
|
+
|
|
51
|
+
# Enable WAL mode for concurrent reads (unless disabled or readonly)
|
|
52
|
+
@connection.execute('PRAGMA journal_mode = WAL') if config[:enable_wal] && !config[:readonly]
|
|
53
|
+
|
|
54
|
+
# Apply default pragmas
|
|
55
|
+
apply_pragmas(DEFAULT_PRAGMAS)
|
|
56
|
+
|
|
57
|
+
# Apply user-specified pragmas (will override defaults)
|
|
58
|
+
apply_pragmas(config[:pragmas]) if config.key?(:pragmas)
|
|
59
|
+
|
|
60
|
+
@connection
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
raise ConfigError, e.message
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# (see Adapter#close)
|
|
66
|
+
def close
|
|
67
|
+
return if @connection.nil?
|
|
68
|
+
|
|
69
|
+
@connection.close unless @connection.closed?
|
|
70
|
+
@connection = nil
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# (see Adapter#test_connection)
|
|
74
|
+
def test_connection(raise_exception: false)
|
|
75
|
+
connection
|
|
76
|
+
connection.execute('SELECT 1')
|
|
77
|
+
true
|
|
78
|
+
rescue StandardError => e
|
|
79
|
+
raise ConnectionError, e.message if raise_exception
|
|
80
|
+
|
|
81
|
+
false
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# (see Adapter#tables)
|
|
85
|
+
def tables(**qualifiers)
|
|
86
|
+
sql = "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name"
|
|
87
|
+
|
|
88
|
+
res = execute(sql)
|
|
89
|
+
res.flatten
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# (see Adapter#stats)
|
|
93
|
+
def stats(table, date_column: nil, **qualifiers)
|
|
94
|
+
db_table = Table.new table, **qualifiers
|
|
95
|
+
|
|
96
|
+
sql = <<-SQL
|
|
97
|
+
SELECT count(*) AS ROW_COUNT
|
|
98
|
+
#{date_column.nil? ? '' : ", min(#{date_column}) AS DATE_START"}
|
|
99
|
+
#{date_column.nil? ? '' : ", max(#{date_column}) AS DATE_END"}
|
|
100
|
+
FROM #{db_table.physical_name}
|
|
101
|
+
SQL
|
|
102
|
+
|
|
103
|
+
result = execute(sql)
|
|
104
|
+
TableStats.new(
|
|
105
|
+
row_count: result.first[0],
|
|
106
|
+
date_start: date_column ? result.first[1] : nil,
|
|
107
|
+
date_end: date_column ? result.first[2] : nil
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# (see Adapter#metadata)
|
|
112
|
+
def metadata(table, **qualifiers)
|
|
113
|
+
db_table = Table.new table, **qualifiers
|
|
114
|
+
|
|
115
|
+
# SQLite uses PRAGMA table_info for metadata
|
|
116
|
+
sql = "PRAGMA table_info(#{db_table.physical_name})"
|
|
117
|
+
|
|
118
|
+
cols = execute(sql)
|
|
119
|
+
cols.each do |col|
|
|
120
|
+
# PRAGMA table_info returns: cid, name, type, notnull, dflt_value, pk
|
|
121
|
+
db_table << Column.new(
|
|
122
|
+
name: col[1],
|
|
123
|
+
data_type: col[2],
|
|
124
|
+
precision: nil,
|
|
125
|
+
scale: nil,
|
|
126
|
+
max_char_length: nil
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
db_table
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# (see Adapter#execute)
|
|
134
|
+
def execute(sql, format: :array, retries: 0)
|
|
135
|
+
begin
|
|
136
|
+
result = with_debug(sql) { with_retry(retries) { connection.execute(sql) } }
|
|
137
|
+
rescue StandardError => e
|
|
138
|
+
raise ExecutionError, e.message
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
format = format.downcase if format.is_a?(String)
|
|
142
|
+
case format.to_sym
|
|
143
|
+
when :array
|
|
144
|
+
result
|
|
145
|
+
when :object
|
|
146
|
+
result_to_hash(sql, result)
|
|
147
|
+
when :csv
|
|
148
|
+
result_to_csv(sql, result)
|
|
149
|
+
when :native
|
|
150
|
+
result
|
|
151
|
+
else
|
|
152
|
+
raise UnsupportedCapability, "Unsupported format: #{format} for this #{name}"
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# (see Adapter#execute_stream)
|
|
157
|
+
def execute_stream(sql, io, stats: nil, retries: 0)
|
|
158
|
+
with_debug(sql) do
|
|
159
|
+
with_retry(retries) do
|
|
160
|
+
stmt = connection.prepare(sql)
|
|
161
|
+
columns = stmt.columns
|
|
162
|
+
|
|
163
|
+
io.write(CSV.generate_line(columns))
|
|
164
|
+
|
|
165
|
+
stmt.execute.each do |row|
|
|
166
|
+
stats << row unless stats.nil?
|
|
167
|
+
io.write(CSV.generate_line(row))
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
stmt.close
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
io.rewind
|
|
175
|
+
io
|
|
176
|
+
rescue StandardError => e
|
|
177
|
+
raise ExecutionError, e.message
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# (see Adapter#stream)
|
|
181
|
+
def stream(sql, &block)
|
|
182
|
+
with_debug(sql) do
|
|
183
|
+
stmt = connection.prepare(sql)
|
|
184
|
+
stmt.execute.each do |row|
|
|
185
|
+
block.call(row)
|
|
186
|
+
end
|
|
187
|
+
stmt.close
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Custom date truncation implementation. SQLite doesn't offer
|
|
192
|
+
# a native DATE_TRUNC function. We use 'start of' modifiers
|
|
193
|
+
# for year, month, and day, and custom logic for quarter and week.
|
|
194
|
+
# @see Dates#truncate_date
|
|
195
|
+
def truncate_date(unit, exp)
|
|
196
|
+
unit = unit.strip.downcase
|
|
197
|
+
|
|
198
|
+
case unit
|
|
199
|
+
when 'year'
|
|
200
|
+
"date(#{exp}, 'start of year')"
|
|
201
|
+
when 'quarter'
|
|
202
|
+
# Calculate quarter start using CASE statement
|
|
203
|
+
# Q1: Jan-Mar (months 1-3) -> start of year
|
|
204
|
+
# Q2: Apr-Jun (months 4-6) -> start of year + 3 months
|
|
205
|
+
# Q3: Jul-Sep (months 7-9) -> start of year + 6 months
|
|
206
|
+
# Q4: Oct-Dec (months 10-12) -> start of year + 9 months
|
|
207
|
+
'(CASE ' \
|
|
208
|
+
"WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 1 AND 3 THEN date(#{exp}, 'start of year') " \
|
|
209
|
+
"WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 4 AND 6 THEN date(#{exp}, 'start of year', '+3 months') " \
|
|
210
|
+
"WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 7 AND 9 THEN date(#{exp}, 'start of year', '+6 months') " \
|
|
211
|
+
"ELSE date(#{exp}, 'start of year', '+9 months') " \
|
|
212
|
+
'END)'
|
|
213
|
+
when 'month'
|
|
214
|
+
"date(#{exp}, 'start of month')"
|
|
215
|
+
when 'week'
|
|
216
|
+
# Use week start day from settings
|
|
217
|
+
gsk("#{settings[:week_start_day].downcase}_week_start_day")
|
|
218
|
+
.gsub(/@exp/i, exp)
|
|
219
|
+
when 'day', 'date'
|
|
220
|
+
"date(#{exp})"
|
|
221
|
+
when 'hour'
|
|
222
|
+
# SQLite datetime returns timestamp, truncate to hour
|
|
223
|
+
"datetime(strftime('%Y-%m-%d %H:00:00', #{exp}))"
|
|
224
|
+
when 'minute'
|
|
225
|
+
"datetime(strftime('%Y-%m-%d %H:%M:00', #{exp}))"
|
|
226
|
+
when 'second'
|
|
227
|
+
"datetime(strftime('%Y-%m-%d %H:%M:%S', #{exp}))"
|
|
228
|
+
else
|
|
229
|
+
raise UnsupportedCapability, "Currently not supporting truncation at #{unit} level"
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# SQLite's strftime doesn't support %A (day name) or %B (month name)
|
|
234
|
+
# We need to implement these using CASE statements based on day/month numbers
|
|
235
|
+
def extract_day_name(exp, abbreviate: false)
|
|
236
|
+
day_num = "CAST(strftime('%w', #{exp}) AS INTEGER)"
|
|
237
|
+
|
|
238
|
+
if abbreviate
|
|
239
|
+
# Abbreviated day names: SUN, MON, TUE, etc.
|
|
240
|
+
"(CASE #{day_num} " \
|
|
241
|
+
"WHEN 0 THEN 'SUN' " \
|
|
242
|
+
"WHEN 1 THEN 'MON' " \
|
|
243
|
+
"WHEN 2 THEN 'TUE' " \
|
|
244
|
+
"WHEN 3 THEN 'WED' " \
|
|
245
|
+
"WHEN 4 THEN 'THU' " \
|
|
246
|
+
"WHEN 5 THEN 'FRI' " \
|
|
247
|
+
"WHEN 6 THEN 'SAT' " \
|
|
248
|
+
'END)'
|
|
249
|
+
else
|
|
250
|
+
# Full day names: SUNDAY, MONDAY, TUESDAY, etc.
|
|
251
|
+
"(CASE #{day_num} " \
|
|
252
|
+
"WHEN 0 THEN 'SUNDAY' " \
|
|
253
|
+
"WHEN 1 THEN 'MONDAY' " \
|
|
254
|
+
"WHEN 2 THEN 'TUESDAY' " \
|
|
255
|
+
"WHEN 3 THEN 'WEDNESDAY' " \
|
|
256
|
+
"WHEN 4 THEN 'THURSDAY' " \
|
|
257
|
+
"WHEN 5 THEN 'FRIDAY' " \
|
|
258
|
+
"WHEN 6 THEN 'SATURDAY' " \
|
|
259
|
+
'END)'
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def extract_month_name(exp, abbreviate: false)
|
|
264
|
+
month_num = "CAST(strftime('%m', #{exp}) AS INTEGER)"
|
|
265
|
+
|
|
266
|
+
if abbreviate
|
|
267
|
+
# Abbreviated month names: JAN, FEB, MAR, etc.
|
|
268
|
+
"(CASE #{month_num} " \
|
|
269
|
+
"WHEN 1 THEN 'JAN' " \
|
|
270
|
+
"WHEN 2 THEN 'FEB' " \
|
|
271
|
+
"WHEN 3 THEN 'MAR' " \
|
|
272
|
+
"WHEN 4 THEN 'APR' " \
|
|
273
|
+
"WHEN 5 THEN 'MAY' " \
|
|
274
|
+
"WHEN 6 THEN 'JUN' " \
|
|
275
|
+
"WHEN 7 THEN 'JUL' " \
|
|
276
|
+
"WHEN 8 THEN 'AUG' " \
|
|
277
|
+
"WHEN 9 THEN 'SEP' " \
|
|
278
|
+
"WHEN 10 THEN 'OCT' " \
|
|
279
|
+
"WHEN 11 THEN 'NOV' " \
|
|
280
|
+
"WHEN 12 THEN 'DEC' " \
|
|
281
|
+
'END)'
|
|
282
|
+
else
|
|
283
|
+
# Full month names: JANUARY, FEBRUARY, MARCH, etc.
|
|
284
|
+
"(CASE #{month_num} " \
|
|
285
|
+
"WHEN 1 THEN 'JANUARY' " \
|
|
286
|
+
"WHEN 2 THEN 'FEBRUARY' " \
|
|
287
|
+
"WHEN 3 THEN 'MARCH' " \
|
|
288
|
+
"WHEN 4 THEN 'APRIL' " \
|
|
289
|
+
"WHEN 5 THEN 'MAY' " \
|
|
290
|
+
"WHEN 6 THEN 'JUNE' " \
|
|
291
|
+
"WHEN 7 THEN 'JULY' " \
|
|
292
|
+
"WHEN 8 THEN 'AUGUST' " \
|
|
293
|
+
"WHEN 9 THEN 'SEPTEMBER' " \
|
|
294
|
+
"WHEN 10 THEN 'OCTOBER' " \
|
|
295
|
+
"WHEN 11 THEN 'NOVEMBER' " \
|
|
296
|
+
"WHEN 12 THEN 'DECEMBER' " \
|
|
297
|
+
'END)'
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# SQLite's CAST(... AS DATE) doesn't work properly - it just extracts the year
|
|
302
|
+
# We need to override cast to use the date() function for DATE types
|
|
303
|
+
def cast(exp, type)
|
|
304
|
+
if type.to_s.downcase == 'date'
|
|
305
|
+
"date(#{exp})"
|
|
306
|
+
else
|
|
307
|
+
super
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def valid_config?
|
|
312
|
+
super
|
|
313
|
+
require 'sqlite3'
|
|
314
|
+
rescue LoadError
|
|
315
|
+
raise ConfigError, "Required 'sqlite3' gem missing. Please add it to your Gemfile."
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
private
|
|
319
|
+
|
|
320
|
+
def build_open_options
|
|
321
|
+
options = {}
|
|
322
|
+
options[:readonly] = true if config[:readonly]
|
|
323
|
+
options
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def apply_pragmas(pragmas)
|
|
327
|
+
return unless pragmas
|
|
328
|
+
|
|
329
|
+
pragmas.each do |pragma, value|
|
|
330
|
+
# Format value appropriately (quote strings, leave numbers/keywords as-is)
|
|
331
|
+
formatted_value = value.is_a?(String) && value.upcase != value ? "'#{value}'" : value
|
|
332
|
+
@connection.execute("PRAGMA #{pragma} = #{formatted_value}")
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def result_to_hash(sql, result)
|
|
337
|
+
return [] if result.empty?
|
|
338
|
+
|
|
339
|
+
# Get column names by preparing statement
|
|
340
|
+
stmt = connection.prepare(sql)
|
|
341
|
+
columns = stmt.columns
|
|
342
|
+
stmt.close
|
|
343
|
+
|
|
344
|
+
result.map do |row|
|
|
345
|
+
columns.zip(row).to_h
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def result_to_csv(sql, result)
|
|
350
|
+
# Get column names by preparing statement
|
|
351
|
+
stmt = connection.prepare(sql)
|
|
352
|
+
columns = stmt.columns
|
|
353
|
+
stmt.close
|
|
354
|
+
|
|
355
|
+
CSV.generate do |csv|
|
|
356
|
+
csv << columns
|
|
357
|
+
result.each do |row|
|
|
358
|
+
csv << row
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
end
|
data/lib/dwh/adapters.rb
CHANGED
|
@@ -80,12 +80,12 @@ module DWH
|
|
|
80
80
|
attr_reader :config
|
|
81
81
|
|
|
82
82
|
def initialize(config)
|
|
83
|
-
@config = config.
|
|
83
|
+
@config = config.transform_keys(&:to_sym)
|
|
84
84
|
# Per instance customization of general settings
|
|
85
85
|
# So you can have multiple connections to Trino
|
|
86
86
|
# but exhibit diff behavior
|
|
87
87
|
@settings = self.class.adapter_settings.merge(
|
|
88
|
-
(config[:settings] || {}).
|
|
88
|
+
(config[:settings] || {}).transform_keys(&:to_sym)
|
|
89
89
|
)
|
|
90
90
|
|
|
91
91
|
valid_config?
|
|
@@ -300,7 +300,7 @@ module DWH
|
|
|
300
300
|
# Adapter name from the class name
|
|
301
301
|
# @return [String]
|
|
302
302
|
def adapter_name
|
|
303
|
-
self.class.name.
|
|
303
|
+
self.class.name.split('::').last.downcase
|
|
304
304
|
end
|
|
305
305
|
|
|
306
306
|
# If any extra connection params were passed in the config
|
|
@@ -335,7 +335,7 @@ module DWH
|
|
|
335
335
|
|
|
336
336
|
# Check for missing required parameters
|
|
337
337
|
missing_params = definitions.select do |name, options|
|
|
338
|
-
options[:required] && !config
|
|
338
|
+
options[:required] && !config[name] && options[:default].nil?
|
|
339
339
|
end
|
|
340
340
|
|
|
341
341
|
if missing_params.any?
|
|
@@ -351,7 +351,7 @@ module DWH
|
|
|
351
351
|
|
|
352
352
|
raise ConfigError, "Invalid value. Only allowed: #{opts[:allowed]}." if opts[:allowed].any? && !opts[:allowed].include?(config[name])
|
|
353
353
|
|
|
354
|
-
config[name] = opts[:default] if opts[:default] && !config
|
|
354
|
+
config[name] = opts[:default] if opts[:default] && !config[name]
|
|
355
355
|
|
|
356
356
|
if opts[:required] && !config[name].is_a?(opts[:type]) && !opts[:type].is_a?(Boolean)
|
|
357
357
|
raise ConfigError, "#{name} should be a #{opts[:type]}. Got #{opts[name.to_sym].class.name}"
|
data/lib/dwh/column.rb
CHANGED
|
@@ -22,7 +22,7 @@ module DWH
|
|
|
22
22
|
|
|
23
23
|
DEFAULT_RULES = { /[_+]+/ => ' ', /\s+id$/i => ' ID', /desc/i => 'Description' }.freeze
|
|
24
24
|
def namify(rules = DEFAULT_RULES)
|
|
25
|
-
named = name
|
|
25
|
+
named = titleize(name)
|
|
26
26
|
rules.each do |k, v|
|
|
27
27
|
named = named.gsub(Regexp.new(k), v)
|
|
28
28
|
end
|
|
@@ -75,5 +75,16 @@ module DWH
|
|
|
75
75
|
def to_s
|
|
76
76
|
"<Column:#{name}:#{data_type}>"
|
|
77
77
|
end
|
|
78
|
+
|
|
79
|
+
def titleize(name)
|
|
80
|
+
# Handle underscores, dashes, and multiple spaces
|
|
81
|
+
# Also preserves existing spacing patterns better
|
|
82
|
+
name.gsub(/[_-]/, ' ') # Convert underscores and dashes to spaces
|
|
83
|
+
.gsub(/\s+/, ' ') # Normalize multiple spaces to single spaces
|
|
84
|
+
.strip # Remove leading/trailing whitespace
|
|
85
|
+
.split(' ') # Split into words
|
|
86
|
+
.map(&:capitalize) # Capitalize each word
|
|
87
|
+
.join(' ') # Join with single spaces
|
|
88
|
+
end
|
|
78
89
|
end
|
|
79
90
|
end
|
data/lib/dwh/functions/dates.rb
CHANGED
|
@@ -124,12 +124,27 @@ module DWH
|
|
|
124
124
|
gsk(:date_literal).gsub(/@val/i, val)
|
|
125
125
|
end
|
|
126
126
|
|
|
127
|
+
# @see #date_literal
|
|
128
|
+
def date_lit(val)
|
|
129
|
+
date_literal(val)
|
|
130
|
+
end
|
|
131
|
+
|
|
127
132
|
# @param val [String, Date, DateTime, Time]
|
|
128
133
|
def date_time_literal(val)
|
|
129
134
|
val = DATE_CLASSES.include?(val.class) ? val.strftime(date_time_format) : val
|
|
130
135
|
gsk(:date_time_literal).gsub(/@val/i, val)
|
|
131
136
|
end
|
|
132
137
|
|
|
138
|
+
# @see #date_time_literal
|
|
139
|
+
def timestamp_lit(val)
|
|
140
|
+
date_time_literal(val)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @see #date_time_literal
|
|
144
|
+
def timestamp_literal(val)
|
|
145
|
+
date_time_literal(val)
|
|
146
|
+
end
|
|
147
|
+
|
|
133
148
|
# The current default week start day. This is how
|
|
134
149
|
# the db is currently setup. Should be either monday or sunday
|
|
135
150
|
def default_week_start_day
|
|
@@ -13,20 +13,20 @@ abbreviated_day_name_format: "EEE"
|
|
|
13
13
|
month_name_format: "MMMM"
|
|
14
14
|
abbreviated_month_name_format: "MMM"
|
|
15
15
|
|
|
16
|
-
date_add: "
|
|
17
|
-
date_diff: "
|
|
18
|
-
date_format_sql: "
|
|
19
|
-
extract_day_of_year: '
|
|
20
|
-
extract_day_of_week: '
|
|
21
|
-
extract_week_of_year: '
|
|
22
|
-
extract_year_month: '
|
|
16
|
+
date_add: "DATE_ADD(@unit, @val, @exp)"
|
|
17
|
+
date_diff: "DATE_DIFF(@unit, @start_exp, @end_exp)"
|
|
18
|
+
date_format_sql: "DATE_FORMAT(@exp, '@format')"
|
|
19
|
+
extract_day_of_year: 'DAYOFYEAR(@exp)'
|
|
20
|
+
extract_day_of_week: 'DAYOFWEEK(@exp)'
|
|
21
|
+
extract_week_of_year: 'WEEKOFYEAR(@exp)'
|
|
22
|
+
extract_year_month: 'CAST(CONCAT(YEAR(@exp), LPAD(MONTH(@exp), 2, "0")) as INT)'
|
|
23
23
|
|
|
24
24
|
cast: "CAST(@exp AS @type)"
|
|
25
25
|
|
|
26
26
|
# string functions
|
|
27
|
-
trim: "
|
|
28
|
-
lower_case: "
|
|
29
|
-
upper_case: "
|
|
27
|
+
trim: "TRIM(@exp)"
|
|
28
|
+
lower_case: "LOWER(@exp)"
|
|
29
|
+
upper_case: "UPPER(@exp)"
|
|
30
30
|
|
|
31
31
|
# null handling
|
|
32
32
|
if_null: "COALESCE(@exp, @when_null)"
|
|
@@ -45,7 +45,7 @@ supports_window_functions: true
|
|
|
45
45
|
extend_ending_date_to_last_hour_of_day: false # druid needs this for inclusive filtering
|
|
46
46
|
|
|
47
47
|
# array operations
|
|
48
|
-
array_in_list: "
|
|
49
|
-
array_exclude_list: "
|
|
50
|
-
array_unnest_join: "LATERAL VIEW
|
|
48
|
+
array_in_list: "EXISTS(@exp, x -> x IN (@list))"
|
|
49
|
+
array_exclude_list: "NOT EXISTS(@exp, x -> x IN (@list))"
|
|
50
|
+
array_unnest_join: "LATERAL VIEW EXPLODE(@exp) AS @alias"
|
|
51
51
|
|
data/lib/dwh/settings/druid.yml
CHANGED
|
@@ -25,9 +25,9 @@ sunday_week_start_day: "TIME_FLOOR(@exp, 'P7D', TIMESTAMP '1970-01-04 00:00:00')
|
|
|
25
25
|
monday_week_start_day: "TIME_FLOOR(@exp, 'P7D', TIMESTAMP '1970-01-05 00:00:00')"
|
|
26
26
|
|
|
27
27
|
# string functions
|
|
28
|
-
trim: "
|
|
29
|
-
lower_case: "
|
|
30
|
-
upper_case: "
|
|
28
|
+
trim: "TRIM(@exp)"
|
|
29
|
+
lower_case: "LOWER(@exp)"
|
|
30
|
+
upper_case: "UPPER(@exp)"
|
|
31
31
|
|
|
32
32
|
# Relevant db capabilities
|
|
33
33
|
supports_table_join: true
|
data/lib/dwh/settings/duckdb.yml
CHANGED
|
@@ -38,7 +38,7 @@ upper_case: "UPPER(@exp)"
|
|
|
38
38
|
create_temp_table_template: "CREATE TEMP TABLE @table AS \n@sql"
|
|
39
39
|
|
|
40
40
|
# array operations
|
|
41
|
-
array_in_list: "
|
|
42
|
-
array_exclude_list: "
|
|
41
|
+
array_in_list: "ARRAY_LENGTH(ARRAY_INTERSECT(@exp, @list)) > 0"
|
|
42
|
+
array_exclude_list: "ARRAY_LENGTH(ARRAY_INTERSECT(@exp, @list)) = 0"
|
|
43
43
|
array_unnest_join: ", LATERAL (SELECT UNNEST(@exp)) AS @alias"
|
|
44
44
|
|
data/lib/dwh/settings/mysql.yml
CHANGED
|
@@ -24,8 +24,8 @@ extract_minute: 'MINUTE(@exp)'
|
|
|
24
24
|
extract_year_month: 'CAST(CONCAT(YEAR(@exp), LPAD(MONTH(@exp), 2, "0")) AS UNSIGNED)'
|
|
25
25
|
default_week_start_day: "sunday"
|
|
26
26
|
week_start_day: "monday"
|
|
27
|
-
sunday_week_start_day: "DATE(DATE_SUB(@exp, INTERVAL
|
|
28
|
-
monday_week_start_day: "DATE(DATE_SUB(@exp, INTERVAL
|
|
27
|
+
sunday_week_start_day: "DATE(DATE_SUB(@exp, INTERVAL DAYOFWEEK(@exp)-1 DAY ))"
|
|
28
|
+
monday_week_start_day: "DATE(DATE_SUB(@exp, INTERVAL DAYOFWEEK(@exp)-2 DAY ))"
|
|
29
29
|
cast: "CAST(@exp AS @type)"
|
|
30
30
|
|
|
31
31
|
# string functions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
date_add: "(@exp + '@val @unit'::interval)"
|
|
3
|
-
date_diff: "
|
|
3
|
+
date_diff: "AGE(@start_exp, @end_exp)"
|
|
4
4
|
date_format_sql: "TO_CHAR(@exp, '@format')"
|
|
5
5
|
date_literal: "'@val'::DATE"
|
|
6
6
|
date_time_literal: "'@val'::TIMESTAMP"
|
|
@@ -12,16 +12,16 @@ abbreviated_month_name_format: "Mon"
|
|
|
12
12
|
sunday_week_start_day: "( DATE_TRUNC('WEEK', @exp + INTERVAL '1 DAY') - INTERVAL '1 DAY' )"
|
|
13
13
|
monday_week_start_day: "( DATE_TRUNC('WEEK', @exp - INTERVAL '1 DAY') + INTERVAL '1 DAY' )"
|
|
14
14
|
|
|
15
|
-
extract_year: '
|
|
16
|
-
extract_month: '
|
|
17
|
-
extract_quarter: '
|
|
18
|
-
extract_day_of_year: '
|
|
19
|
-
extract_day_of_month: '
|
|
20
|
-
extract_day_of_week: '
|
|
21
|
-
extract_week_of_year: '
|
|
22
|
-
extract_hour: '
|
|
23
|
-
extract_minute: '
|
|
24
|
-
extract_year_month: "
|
|
15
|
+
extract_year: 'EXTRACT(year from @exp)'
|
|
16
|
+
extract_month: 'EXTRACT(month from @exp)'
|
|
17
|
+
extract_quarter: 'EXTRACT(quarter from @exp)'
|
|
18
|
+
extract_day_of_year: 'EXTRACT(DOY from @exp)'
|
|
19
|
+
extract_day_of_month: 'EXTRACT(DAY from @exp)'
|
|
20
|
+
extract_day_of_week: 'EXTRACT(DOW from @exp)'
|
|
21
|
+
extract_week_of_year: 'EXTRACT(WEEK from @exp)'
|
|
22
|
+
extract_hour: 'EXTRACT(HOUR from @exp)'
|
|
23
|
+
extract_minute: 'EXTRACT(MINUTE from @exp)'
|
|
24
|
+
extract_year_month: "CAST((EXTRACT(YEAR FROM @exp)::varchar || TO_CHAR(@exp, 'MM')) as INTEGER)"
|
|
25
25
|
|
|
26
26
|
cast: "@exp::@type"
|
|
27
27
|
|
|
@@ -1,28 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
# quotes and string lit
|
|
3
|
-
quote: "\"@exp\""
|
|
4
|
-
string_literal: "'@exp'"
|
|
5
|
-
|
|
6
1
|
# Date Literal Formats
|
|
7
|
-
date_format: "%Y-%m-%d"
|
|
8
|
-
date_time_format: "%Y-%m-%d %H:%M:%S"
|
|
9
|
-
date_time_tz_format: "%Y-%m-%d %H:%M:%S %Z"
|
|
10
|
-
date_type: "string" # alternative is int, integer, dateint
|
|
11
2
|
day_name_format: "Day"
|
|
12
3
|
abbreviated_day_name_format: "Dy"
|
|
13
4
|
month_name_format: "Month"
|
|
14
5
|
abbreviated_month_name_format: "Mon"
|
|
15
6
|
|
|
16
7
|
# Date functions patterns
|
|
17
|
-
current_date: "
|
|
18
|
-
current_time: "
|
|
19
|
-
current_timestamp: "
|
|
20
|
-
truncate_date: "
|
|
21
|
-
date_add: "
|
|
22
|
-
date_diff: "
|
|
8
|
+
current_date: "CURRENT_DATE"
|
|
9
|
+
current_time: "CURRENT_TIME"
|
|
10
|
+
current_timestamp: "CURRENT_TIMESTAMP"
|
|
11
|
+
truncate_date: "DATE_TRUNC('@unit', @exp)"
|
|
12
|
+
date_add: "DATEADD(@unit, @val, @exp)"
|
|
13
|
+
date_diff: "DATEDIFF(@unit, @start_exp, @end_exp)"
|
|
23
14
|
date_format_sql: "TO_CHAR(@exp, '@format')"
|
|
24
|
-
date_literal: "'@val'"
|
|
25
|
-
date_time_literal: "
|
|
15
|
+
date_literal: "'@val'::DATE"
|
|
16
|
+
date_time_literal: "'@val'::TIMESTAMP"
|
|
26
17
|
extract_year: 'EXTRACT(YEAR FROM @exp)'
|
|
27
18
|
extract_month: 'EXTRACT(MONTH FROM @exp)'
|
|
28
19
|
extract_quarter: 'EXTRACT(QUARTER FROM @exp)'
|
|
@@ -33,15 +24,15 @@ extract_week_of_year: 'EXTRACT(WEEK FROM @exp)'
|
|
|
33
24
|
extract_hour: 'EXTRACT(HOUR FROM @exp)'
|
|
34
25
|
extract_minute: 'EXTRACT(MINUTE FROM @exp)'
|
|
35
26
|
extract_year_month: "TO_CHAR(@exp, 'YYYYMM')::INTEGER"
|
|
36
|
-
default_week_start_day: "
|
|
37
|
-
week_start_day: "
|
|
38
|
-
sunday_week_start_day: "DATEADD(day, -1, DATE_TRUNC(WEEK, DATEADD(DAY,
|
|
39
|
-
monday_week_start_day: "DATEADD(day,
|
|
27
|
+
default_week_start_day: "monday" # Redshift uses Sunday as default
|
|
28
|
+
week_start_day: "monday"
|
|
29
|
+
sunday_week_start_day: "DATEADD(day, -1, DATE_TRUNC('WEEK', DATEADD(DAY, 1, @exp)))"
|
|
30
|
+
monday_week_start_day: "DATEADD(day, 1, DATE_TRUNC('WEEK', DATEADD(day, -1, @exp)))"
|
|
40
31
|
|
|
41
32
|
# string functions
|
|
42
|
-
trim: "
|
|
43
|
-
lower_case: "
|
|
44
|
-
upper_case: "
|
|
33
|
+
trim: "TRIM(@exp)"
|
|
34
|
+
lower_case: "LOWER(@exp)"
|
|
35
|
+
upper_case: "UPPER(@exp)"
|
|
45
36
|
|
|
46
37
|
# null handling
|
|
47
38
|
if_null: "COALESCE(@exp, @when_null)"
|