dwh 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -0
- data/README.md +10 -1
- data/docs/guides/adapters.md +158 -0
- data/docs/guides/getting-started.md +6 -1
- data/docs/guides/usage.md +33 -1
- data/lib/dwh/adapters/athena.rb +8 -1
- data/lib/dwh/adapters/databricks.rb +328 -0
- data/lib/dwh/adapters/duck_db.rb +8 -2
- data/lib/dwh/adapters/my_sql.rb +7 -1
- data/lib/dwh/adapters/postgres.rb +11 -5
- data/lib/dwh/adapters/redshift.rb +48 -0
- data/lib/dwh/adapters/sql_server.rb +8 -2
- data/lib/dwh/adapters/sqlite.rb +364 -0
- data/lib/dwh/adapters/trino.rb +7 -1
- data/lib/dwh/adapters.rb +3 -3
- data/lib/dwh/column.rb +12 -1
- data/lib/dwh/functions/dates.rb +15 -0
- data/lib/dwh/settings/databricks.yml +14 -15
- data/lib/dwh/settings/druid.yml +3 -3
- data/lib/dwh/settings/duckdb.yml +2 -2
- data/lib/dwh/settings/mysql.yml +2 -2
- data/lib/dwh/settings/postgres.yml +11 -11
- data/lib/dwh/settings/redshift.yml +15 -24
- data/lib/dwh/settings/snowflake.yml +15 -15
- data/lib/dwh/settings/sqlite.yml +42 -0
- data/lib/dwh/settings.rb +6 -2
- data/lib/dwh/table.rb +18 -10
- data/lib/dwh/version.rb +1 -1
- data/lib/dwh.rb +6 -4
- metadata +6 -16
data/lib/dwh/adapters/my_sql.rb
CHANGED
|
@@ -219,7 +219,13 @@ module DWH
|
|
|
219
219
|
super
|
|
220
220
|
require 'mysql2'
|
|
221
221
|
rescue LoadError
|
|
222
|
-
raise ConfigError,
|
|
222
|
+
raise ConfigError, <<~MSG
|
|
223
|
+
MySQL adapter requires the 'mysql2' gem.
|
|
224
|
+
|
|
225
|
+
Install with: gem install mysql2
|
|
226
|
+
|
|
227
|
+
System libraries: https://dev.mysql.com/downloads/
|
|
228
|
+
MSG
|
|
223
229
|
end
|
|
224
230
|
|
|
225
231
|
def result_to_csv(result)
|
|
@@ -27,7 +27,7 @@ module DWH
|
|
|
27
27
|
config :schema, String, default: 'public', message: 'schema name. defaults to "public"'
|
|
28
28
|
config :username, String, required: true, message: 'connection username'
|
|
29
29
|
config :password, String, required: false, default: nil, message: 'connection password'
|
|
30
|
-
config :query_timeout,
|
|
30
|
+
config :query_timeout, Integer, required: false, default: 3600, message: 'query execution timeout in seconds'
|
|
31
31
|
config :ssl, Boolean, required: false, default: false, message: 'use ssl'
|
|
32
32
|
config :client_name, String, required: false, default: 'DWH Ruby Gem', message: 'The name of the connecting app'
|
|
33
33
|
|
|
@@ -45,7 +45,7 @@ module DWH
|
|
|
45
45
|
password: config[:password],
|
|
46
46
|
application_name: config[:client_name]
|
|
47
47
|
}.merge(extra_connection_params)
|
|
48
|
-
properties[:options] = "#{properties[:options]} -c statement_timeout=#{config[:query_timeout]}
|
|
48
|
+
properties[:options] = "#{properties[:options]} -c statement_timeout=#{config[:query_timeout] * 1000}"
|
|
49
49
|
|
|
50
50
|
@connection = PG.connect(properties)
|
|
51
51
|
|
|
@@ -114,7 +114,7 @@ module DWH
|
|
|
114
114
|
db_table = Table.new table, schema: qualifiers[:schema]
|
|
115
115
|
|
|
116
116
|
schema_where = ''
|
|
117
|
-
if db_table.schema
|
|
117
|
+
if db_table.schema?
|
|
118
118
|
schema_where = "AND table_schema = '#{db_table.schema}'"
|
|
119
119
|
elsif schema?
|
|
120
120
|
schema_where = "AND table_schema in (#{qualified_schema_name})"
|
|
@@ -143,7 +143,7 @@ module DWH
|
|
|
143
143
|
|
|
144
144
|
# True if the configuration was setup with a schema.
|
|
145
145
|
def schema?
|
|
146
|
-
config[:schema].
|
|
146
|
+
!config[:schema].nil? && !config[:schema]&.strip&.empty?
|
|
147
147
|
end
|
|
148
148
|
|
|
149
149
|
# (see Adapter#execute)
|
|
@@ -221,7 +221,13 @@ module DWH
|
|
|
221
221
|
super
|
|
222
222
|
require 'pg'
|
|
223
223
|
rescue LoadError
|
|
224
|
-
raise ConfigError,
|
|
224
|
+
raise ConfigError, <<~MSG
|
|
225
|
+
PostgreSQL adapter requires the 'pg' gem.
|
|
226
|
+
|
|
227
|
+
Install with: gem install pg
|
|
228
|
+
|
|
229
|
+
System libraries: https://www.postgresql.org/download/
|
|
230
|
+
MSG
|
|
225
231
|
end
|
|
226
232
|
|
|
227
233
|
private
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
module DWH
|
|
2
|
+
module Adapters
|
|
3
|
+
# Redshift adapter. Please ensure the pg gem is available before using this adapter.
|
|
4
|
+
# Generally, adapters should be created using {DWH::Factory#create DWH.create}. Where a configuration
|
|
5
|
+
# is passed in as options hash or argument list.
|
|
6
|
+
#
|
|
7
|
+
# @example Basic connection with required only options
|
|
8
|
+
# DWH.create(:redshift, {host: 'localhost', database: 'redshift',
|
|
9
|
+
# username: 'redshift'})
|
|
10
|
+
#
|
|
11
|
+
# @example Connection with cert based SSL connection
|
|
12
|
+
# DWH.create(:redshift, {host: 'localhost', database: 'redshift',
|
|
13
|
+
# username: 'redshift', ssl: true,
|
|
14
|
+
# extra_connection_params: { sslmode: 'require' })
|
|
15
|
+
#
|
|
16
|
+
# valid sslmodes: disable, prefer, require, verify-ca, verify-full
|
|
17
|
+
# For modes requiring Certs make sure you add the appropirate params
|
|
18
|
+
# to extra_connection_params. (ie sslrootcert, sslcert etc.)
|
|
19
|
+
#
|
|
20
|
+
# @example Connection sending custom application name
|
|
21
|
+
# DWH.create(:redshift, {host: 'localhost', database: 'redshift',
|
|
22
|
+
# username: 'redshift', application_name: "Strata CLI" })
|
|
23
|
+
class Redshift < Postgres
|
|
24
|
+
config :host, String, required: true, message: 'server host ip address or domain name'
|
|
25
|
+
config :port, Integer, required: false, default: 5439, message: 'port to connect to'
|
|
26
|
+
config :database, String, required: true, message: 'name of database to connect to'
|
|
27
|
+
config :schema, String, default: 'public', message: 'schema name. defaults to "public"'
|
|
28
|
+
config :username, String, required: true, message: 'connection username'
|
|
29
|
+
config :password, String, required: false, default: nil, message: 'connection password'
|
|
30
|
+
config :query_timeout, Integer, required: false, default: 3600, message: 'query execution timeout in seconds'
|
|
31
|
+
config :client_name, String, required: false, default: 'DWH Ruby Gem', message: 'The name of the connecting app'
|
|
32
|
+
config :ssl, Boolean, required: false, default: false, message: 'use ssl'
|
|
33
|
+
|
|
34
|
+
# Need to override default add method
|
|
35
|
+
# since redshift doesn't support quarter as an
|
|
36
|
+
# interval.
|
|
37
|
+
# @param unit [String] Should be one of day, month, quarter etc
|
|
38
|
+
# @param val [String, Integer] The number of days to add
|
|
39
|
+
# @param exp [String] The sql expresssion to modify
|
|
40
|
+
def date_add(unit, val, exp)
|
|
41
|
+
gsk(:date_add)
|
|
42
|
+
.gsub(/@unit/i, unit)
|
|
43
|
+
.gsub(/@val/i, val.to_s)
|
|
44
|
+
.gsub(/@exp/i, exp)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -142,7 +142,7 @@ module DWH
|
|
|
142
142
|
change_current_database(db_table.catalog)
|
|
143
143
|
|
|
144
144
|
schema_where = ''
|
|
145
|
-
schema_where = "AND table_schema = '#{db_table.schema}'" if db_table.schema
|
|
145
|
+
schema_where = "AND table_schema = '#{db_table.schema}'" if db_table.schema?
|
|
146
146
|
|
|
147
147
|
sql = <<-SQL
|
|
148
148
|
SELECT column_name, data_type, character_maximum_length, numeric_precision,numeric_scale
|
|
@@ -234,7 +234,13 @@ module DWH
|
|
|
234
234
|
super
|
|
235
235
|
require 'tiny_tds'
|
|
236
236
|
rescue LoadError
|
|
237
|
-
raise ConfigError,
|
|
237
|
+
raise ConfigError, <<~MSG
|
|
238
|
+
SQL Server adapter requires the 'tiny_tds' gem.
|
|
239
|
+
|
|
240
|
+
Install with: gem install tiny_tds
|
|
241
|
+
|
|
242
|
+
System libraries (FreeTDS): https://www.freetds.org/
|
|
243
|
+
MSG
|
|
238
244
|
end
|
|
239
245
|
|
|
240
246
|
private
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
module DWH
|
|
2
|
+
module Adapters
|
|
3
|
+
# SQLite adapter optimized for analytical workloads.
|
|
4
|
+
#
|
|
5
|
+
# This requires the ruby {https://github.com/sparklemotion/sqlite3-ruby sqlite3} gem.
|
|
6
|
+
#
|
|
7
|
+
# Generally, adapters should be created using {DWH::Factory#create DWH.create}. Where a configuration
|
|
8
|
+
# is passed in as options hash or argument list.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic connection with required only options
|
|
11
|
+
# DWH.create(:sqlite, {file: 'path/to/my/database.db' })
|
|
12
|
+
#
|
|
13
|
+
# @example Open in read only mode
|
|
14
|
+
# DWH.create(:sqlite, {file: 'path/to/my/database.db', readonly: true})
|
|
15
|
+
#
|
|
16
|
+
# @example Configure with custom performance pragmas
|
|
17
|
+
# DWH.create(:sqlite, {file: 'path/to/my/database.db',
|
|
18
|
+
# pragmas: { cache_size: -128000, mmap_size: 268435456 }})
|
|
19
|
+
#
|
|
20
|
+
# @note This adapter enables WAL mode by default for better concurrent read performance.
|
|
21
|
+
# Set `enable_wal: false` to disable this behavior.
|
|
22
|
+
class Sqlite < Adapter
|
|
23
|
+
config :file, String, required: true, message: 'path/to/sqlite/db'
|
|
24
|
+
config :readonly, Boolean, required: false, default: false, message: 'open database in read-only mode'
|
|
25
|
+
config :enable_wal, Boolean, required: false, default: true, message: 'enable WAL mode for better concurrency'
|
|
26
|
+
config :pragmas, Hash, required: false, message: 'hash of PRAGMA statements for performance tuning'
|
|
27
|
+
config :timeout, Integer, required: false, default: 5000, message: 'busy timeout in milliseconds'
|
|
28
|
+
|
|
29
|
+
# Default pragmas optimized for analytical workloads
|
|
30
|
+
DEFAULT_PRAGMAS = {
|
|
31
|
+
cache_size: -64_000, # 64MB cache (negative means KB)
|
|
32
|
+
temp_store: 'MEMORY', # Store temp tables in memory
|
|
33
|
+
mmap_size: 134_217_728, # 128MB memory-mapped I/O
|
|
34
|
+
page_size: 4096, # Standard page size
|
|
35
|
+
synchronous: 'NORMAL' # Faster than FULL, safe with WAL
|
|
36
|
+
}.freeze
|
|
37
|
+
|
|
38
|
+
# (see Adapter#connection)
|
|
39
|
+
def connection
|
|
40
|
+
return @connection if @connection
|
|
41
|
+
|
|
42
|
+
options = build_open_options
|
|
43
|
+
@connection = SQLite3::Database.new(config[:file], options)
|
|
44
|
+
|
|
45
|
+
# Set busy timeout to handle concurrent access
|
|
46
|
+
@connection.busy_timeout(config[:timeout])
|
|
47
|
+
|
|
48
|
+
# Don't return results as hash by default for performance
|
|
49
|
+
@connection.results_as_hash = false
|
|
50
|
+
|
|
51
|
+
# Enable WAL mode for concurrent reads (unless disabled or readonly)
|
|
52
|
+
@connection.execute('PRAGMA journal_mode = WAL') if config[:enable_wal] && !config[:readonly]
|
|
53
|
+
|
|
54
|
+
# Apply default pragmas
|
|
55
|
+
apply_pragmas(DEFAULT_PRAGMAS)
|
|
56
|
+
|
|
57
|
+
# Apply user-specified pragmas (will override defaults)
|
|
58
|
+
apply_pragmas(config[:pragmas]) if config.key?(:pragmas)
|
|
59
|
+
|
|
60
|
+
@connection
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
raise ConfigError, e.message
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# (see Adapter#close)
|
|
66
|
+
def close
|
|
67
|
+
return if @connection.nil?
|
|
68
|
+
|
|
69
|
+
@connection.close unless @connection.closed?
|
|
70
|
+
@connection = nil
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# (see Adapter#test_connection)
|
|
74
|
+
def test_connection(raise_exception: false)
|
|
75
|
+
connection
|
|
76
|
+
connection.execute('SELECT 1')
|
|
77
|
+
true
|
|
78
|
+
rescue StandardError => e
|
|
79
|
+
raise ConnectionError, e.message if raise_exception
|
|
80
|
+
|
|
81
|
+
false
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# (see Adapter#tables)
|
|
85
|
+
def tables(**qualifiers)
|
|
86
|
+
sql = "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name"
|
|
87
|
+
|
|
88
|
+
res = execute(sql)
|
|
89
|
+
res.flatten
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# (see Adapter#stats)
|
|
93
|
+
def stats(table, date_column: nil, **qualifiers)
|
|
94
|
+
db_table = Table.new table, **qualifiers
|
|
95
|
+
|
|
96
|
+
sql = <<-SQL
|
|
97
|
+
SELECT count(*) AS ROW_COUNT
|
|
98
|
+
#{date_column.nil? ? '' : ", min(#{date_column}) AS DATE_START"}
|
|
99
|
+
#{date_column.nil? ? '' : ", max(#{date_column}) AS DATE_END"}
|
|
100
|
+
FROM #{db_table.physical_name}
|
|
101
|
+
SQL
|
|
102
|
+
|
|
103
|
+
result = execute(sql)
|
|
104
|
+
TableStats.new(
|
|
105
|
+
row_count: result.first[0],
|
|
106
|
+
date_start: date_column ? result.first[1] : nil,
|
|
107
|
+
date_end: date_column ? result.first[2] : nil
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# (see Adapter#metadata)
|
|
112
|
+
def metadata(table, **qualifiers)
|
|
113
|
+
db_table = Table.new table, **qualifiers
|
|
114
|
+
|
|
115
|
+
# SQLite uses PRAGMA table_info for metadata
|
|
116
|
+
sql = "PRAGMA table_info(#{db_table.physical_name})"
|
|
117
|
+
|
|
118
|
+
cols = execute(sql)
|
|
119
|
+
cols.each do |col|
|
|
120
|
+
# PRAGMA table_info returns: cid, name, type, notnull, dflt_value, pk
|
|
121
|
+
db_table << Column.new(
|
|
122
|
+
name: col[1],
|
|
123
|
+
data_type: col[2],
|
|
124
|
+
precision: nil,
|
|
125
|
+
scale: nil,
|
|
126
|
+
max_char_length: nil
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
db_table
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# (see Adapter#execute)
|
|
134
|
+
def execute(sql, format: :array, retries: 0)
|
|
135
|
+
begin
|
|
136
|
+
result = with_debug(sql) { with_retry(retries) { connection.execute(sql) } }
|
|
137
|
+
rescue StandardError => e
|
|
138
|
+
raise ExecutionError, e.message
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
format = format.downcase if format.is_a?(String)
|
|
142
|
+
case format.to_sym
|
|
143
|
+
when :array
|
|
144
|
+
result
|
|
145
|
+
when :object
|
|
146
|
+
result_to_hash(sql, result)
|
|
147
|
+
when :csv
|
|
148
|
+
result_to_csv(sql, result)
|
|
149
|
+
when :native
|
|
150
|
+
result
|
|
151
|
+
else
|
|
152
|
+
raise UnsupportedCapability, "Unsupported format: #{format} for this #{name}"
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# (see Adapter#execute_stream)
|
|
157
|
+
def execute_stream(sql, io, stats: nil, retries: 0)
|
|
158
|
+
with_debug(sql) do
|
|
159
|
+
with_retry(retries) do
|
|
160
|
+
stmt = connection.prepare(sql)
|
|
161
|
+
columns = stmt.columns
|
|
162
|
+
|
|
163
|
+
io.write(CSV.generate_line(columns))
|
|
164
|
+
|
|
165
|
+
stmt.execute.each do |row|
|
|
166
|
+
stats << row unless stats.nil?
|
|
167
|
+
io.write(CSV.generate_line(row))
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
stmt.close
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
io.rewind
|
|
175
|
+
io
|
|
176
|
+
rescue StandardError => e
|
|
177
|
+
raise ExecutionError, e.message
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# (see Adapter#stream)
|
|
181
|
+
def stream(sql, &block)
|
|
182
|
+
with_debug(sql) do
|
|
183
|
+
stmt = connection.prepare(sql)
|
|
184
|
+
stmt.execute.each do |row|
|
|
185
|
+
block.call(row)
|
|
186
|
+
end
|
|
187
|
+
stmt.close
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Custom date truncation implementation. SQLite doesn't offer
|
|
192
|
+
# a native DATE_TRUNC function. We use 'start of' modifiers
|
|
193
|
+
# for year, month, and day, and custom logic for quarter and week.
|
|
194
|
+
# @see Dates#truncate_date
|
|
195
|
+
def truncate_date(unit, exp)
|
|
196
|
+
unit = unit.strip.downcase
|
|
197
|
+
|
|
198
|
+
case unit
|
|
199
|
+
when 'year'
|
|
200
|
+
"date(#{exp}, 'start of year')"
|
|
201
|
+
when 'quarter'
|
|
202
|
+
# Calculate quarter start using CASE statement
|
|
203
|
+
# Q1: Jan-Mar (months 1-3) -> start of year
|
|
204
|
+
# Q2: Apr-Jun (months 4-6) -> start of year + 3 months
|
|
205
|
+
# Q3: Jul-Sep (months 7-9) -> start of year + 6 months
|
|
206
|
+
# Q4: Oct-Dec (months 10-12) -> start of year + 9 months
|
|
207
|
+
'(CASE ' \
|
|
208
|
+
"WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 1 AND 3 THEN date(#{exp}, 'start of year') " \
|
|
209
|
+
"WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 4 AND 6 THEN date(#{exp}, 'start of year', '+3 months') " \
|
|
210
|
+
"WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 7 AND 9 THEN date(#{exp}, 'start of year', '+6 months') " \
|
|
211
|
+
"ELSE date(#{exp}, 'start of year', '+9 months') " \
|
|
212
|
+
'END)'
|
|
213
|
+
when 'month'
|
|
214
|
+
"date(#{exp}, 'start of month')"
|
|
215
|
+
when 'week'
|
|
216
|
+
# Use week start day from settings
|
|
217
|
+
gsk("#{settings[:week_start_day].downcase}_week_start_day")
|
|
218
|
+
.gsub(/@exp/i, exp)
|
|
219
|
+
when 'day', 'date'
|
|
220
|
+
"date(#{exp})"
|
|
221
|
+
when 'hour'
|
|
222
|
+
# SQLite datetime returns timestamp, truncate to hour
|
|
223
|
+
"datetime(strftime('%Y-%m-%d %H:00:00', #{exp}))"
|
|
224
|
+
when 'minute'
|
|
225
|
+
"datetime(strftime('%Y-%m-%d %H:%M:00', #{exp}))"
|
|
226
|
+
when 'second'
|
|
227
|
+
"datetime(strftime('%Y-%m-%d %H:%M:%S', #{exp}))"
|
|
228
|
+
else
|
|
229
|
+
raise UnsupportedCapability, "Currently not supporting truncation at #{unit} level"
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# SQLite's strftime doesn't support %A (day name) or %B (month name)
|
|
234
|
+
# We need to implement these using CASE statements based on day/month numbers
|
|
235
|
+
def extract_day_name(exp, abbreviate: false)
|
|
236
|
+
day_num = "CAST(strftime('%w', #{exp}) AS INTEGER)"
|
|
237
|
+
|
|
238
|
+
if abbreviate
|
|
239
|
+
# Abbreviated day names: SUN, MON, TUE, etc.
|
|
240
|
+
"(CASE #{day_num} " \
|
|
241
|
+
"WHEN 0 THEN 'SUN' " \
|
|
242
|
+
"WHEN 1 THEN 'MON' " \
|
|
243
|
+
"WHEN 2 THEN 'TUE' " \
|
|
244
|
+
"WHEN 3 THEN 'WED' " \
|
|
245
|
+
"WHEN 4 THEN 'THU' " \
|
|
246
|
+
"WHEN 5 THEN 'FRI' " \
|
|
247
|
+
"WHEN 6 THEN 'SAT' " \
|
|
248
|
+
'END)'
|
|
249
|
+
else
|
|
250
|
+
# Full day names: SUNDAY, MONDAY, TUESDAY, etc.
|
|
251
|
+
"(CASE #{day_num} " \
|
|
252
|
+
"WHEN 0 THEN 'SUNDAY' " \
|
|
253
|
+
"WHEN 1 THEN 'MONDAY' " \
|
|
254
|
+
"WHEN 2 THEN 'TUESDAY' " \
|
|
255
|
+
"WHEN 3 THEN 'WEDNESDAY' " \
|
|
256
|
+
"WHEN 4 THEN 'THURSDAY' " \
|
|
257
|
+
"WHEN 5 THEN 'FRIDAY' " \
|
|
258
|
+
"WHEN 6 THEN 'SATURDAY' " \
|
|
259
|
+
'END)'
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def extract_month_name(exp, abbreviate: false)
|
|
264
|
+
month_num = "CAST(strftime('%m', #{exp}) AS INTEGER)"
|
|
265
|
+
|
|
266
|
+
if abbreviate
|
|
267
|
+
# Abbreviated month names: JAN, FEB, MAR, etc.
|
|
268
|
+
"(CASE #{month_num} " \
|
|
269
|
+
"WHEN 1 THEN 'JAN' " \
|
|
270
|
+
"WHEN 2 THEN 'FEB' " \
|
|
271
|
+
"WHEN 3 THEN 'MAR' " \
|
|
272
|
+
"WHEN 4 THEN 'APR' " \
|
|
273
|
+
"WHEN 5 THEN 'MAY' " \
|
|
274
|
+
"WHEN 6 THEN 'JUN' " \
|
|
275
|
+
"WHEN 7 THEN 'JUL' " \
|
|
276
|
+
"WHEN 8 THEN 'AUG' " \
|
|
277
|
+
"WHEN 9 THEN 'SEP' " \
|
|
278
|
+
"WHEN 10 THEN 'OCT' " \
|
|
279
|
+
"WHEN 11 THEN 'NOV' " \
|
|
280
|
+
"WHEN 12 THEN 'DEC' " \
|
|
281
|
+
'END)'
|
|
282
|
+
else
|
|
283
|
+
# Full month names: JANUARY, FEBRUARY, MARCH, etc.
|
|
284
|
+
"(CASE #{month_num} " \
|
|
285
|
+
"WHEN 1 THEN 'JANUARY' " \
|
|
286
|
+
"WHEN 2 THEN 'FEBRUARY' " \
|
|
287
|
+
"WHEN 3 THEN 'MARCH' " \
|
|
288
|
+
"WHEN 4 THEN 'APRIL' " \
|
|
289
|
+
"WHEN 5 THEN 'MAY' " \
|
|
290
|
+
"WHEN 6 THEN 'JUNE' " \
|
|
291
|
+
"WHEN 7 THEN 'JULY' " \
|
|
292
|
+
"WHEN 8 THEN 'AUGUST' " \
|
|
293
|
+
"WHEN 9 THEN 'SEPTEMBER' " \
|
|
294
|
+
"WHEN 10 THEN 'OCTOBER' " \
|
|
295
|
+
"WHEN 11 THEN 'NOVEMBER' " \
|
|
296
|
+
"WHEN 12 THEN 'DECEMBER' " \
|
|
297
|
+
'END)'
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# SQLite's CAST(... AS DATE) doesn't work properly - it just extracts the year
|
|
302
|
+
# We need to override cast to use the date() function for DATE types
|
|
303
|
+
def cast(exp, type)
|
|
304
|
+
if type.to_s.downcase == 'date'
|
|
305
|
+
"date(#{exp})"
|
|
306
|
+
else
|
|
307
|
+
super
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def valid_config?
|
|
312
|
+
super
|
|
313
|
+
require 'sqlite3'
|
|
314
|
+
rescue LoadError
|
|
315
|
+
raise ConfigError, "Required 'sqlite3' gem missing. Please add it to your Gemfile."
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
private
|
|
319
|
+
|
|
320
|
+
def build_open_options
|
|
321
|
+
options = {}
|
|
322
|
+
options[:readonly] = true if config[:readonly]
|
|
323
|
+
options
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def apply_pragmas(pragmas)
|
|
327
|
+
return unless pragmas
|
|
328
|
+
|
|
329
|
+
pragmas.each do |pragma, value|
|
|
330
|
+
# Format value appropriately (quote strings, leave numbers/keywords as-is)
|
|
331
|
+
formatted_value = value.is_a?(String) && value.upcase != value ? "'#{value}'" : value
|
|
332
|
+
@connection.execute("PRAGMA #{pragma} = #{formatted_value}")
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def result_to_hash(sql, result)
|
|
337
|
+
return [] if result.empty?
|
|
338
|
+
|
|
339
|
+
# Get column names by preparing statement
|
|
340
|
+
stmt = connection.prepare(sql)
|
|
341
|
+
columns = stmt.columns
|
|
342
|
+
stmt.close
|
|
343
|
+
|
|
344
|
+
result.map do |row|
|
|
345
|
+
columns.zip(row).to_h
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def result_to_csv(sql, result)
|
|
350
|
+
# Get column names by preparing statement
|
|
351
|
+
stmt = connection.prepare(sql)
|
|
352
|
+
columns = stmt.columns
|
|
353
|
+
stmt.close
|
|
354
|
+
|
|
355
|
+
CSV.generate do |csv|
|
|
356
|
+
csv << columns
|
|
357
|
+
result.each do |row|
|
|
358
|
+
csv << row
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
end
|
data/lib/dwh/adapters/trino.rb
CHANGED
|
@@ -194,7 +194,13 @@ module DWH
|
|
|
194
194
|
super
|
|
195
195
|
require 'trino-client'
|
|
196
196
|
rescue LoadError
|
|
197
|
-
raise ConfigError,
|
|
197
|
+
raise ConfigError, <<~MSG
|
|
198
|
+
Trino adapter requires the 'trino-client' gem.
|
|
199
|
+
|
|
200
|
+
Install with: gem install trino-client
|
|
201
|
+
|
|
202
|
+
No system libraries required (pure Ruby).
|
|
203
|
+
MSG
|
|
198
204
|
end
|
|
199
205
|
|
|
200
206
|
private
|
data/lib/dwh/adapters.rb
CHANGED
|
@@ -80,12 +80,12 @@ module DWH
|
|
|
80
80
|
attr_reader :config
|
|
81
81
|
|
|
82
82
|
def initialize(config)
|
|
83
|
-
@config = config.
|
|
83
|
+
@config = config.transform_keys(&:to_sym)
|
|
84
84
|
# Per instance customization of general settings
|
|
85
85
|
# So you can have multiple connections to Trino
|
|
86
86
|
# but exhibit diff behavior
|
|
87
87
|
@settings = self.class.adapter_settings.merge(
|
|
88
|
-
(config[:settings] || {}).
|
|
88
|
+
(config[:settings] || {}).transform_keys(&:to_sym)
|
|
89
89
|
)
|
|
90
90
|
|
|
91
91
|
valid_config?
|
|
@@ -300,7 +300,7 @@ module DWH
|
|
|
300
300
|
# Adapter name from the class name
|
|
301
301
|
# @return [String]
|
|
302
302
|
def adapter_name
|
|
303
|
-
self.class.name.
|
|
303
|
+
self.class.name.split('::').last.downcase
|
|
304
304
|
end
|
|
305
305
|
|
|
306
306
|
# If any extra connection params were passed in the config
|
data/lib/dwh/column.rb
CHANGED
|
@@ -22,7 +22,7 @@ module DWH
|
|
|
22
22
|
|
|
23
23
|
DEFAULT_RULES = { /[_+]+/ => ' ', /\s+id$/i => ' ID', /desc/i => 'Description' }.freeze
|
|
24
24
|
def namify(rules = DEFAULT_RULES)
|
|
25
|
-
named = name
|
|
25
|
+
named = titleize(name)
|
|
26
26
|
rules.each do |k, v|
|
|
27
27
|
named = named.gsub(Regexp.new(k), v)
|
|
28
28
|
end
|
|
@@ -75,5 +75,16 @@ module DWH
|
|
|
75
75
|
def to_s
|
|
76
76
|
"<Column:#{name}:#{data_type}>"
|
|
77
77
|
end
|
|
78
|
+
|
|
79
|
+
def titleize(name)
|
|
80
|
+
# Handle underscores, dashes, and multiple spaces
|
|
81
|
+
# Also preserves existing spacing patterns better
|
|
82
|
+
name.gsub(/[_-]/, ' ') # Convert underscores and dashes to spaces
|
|
83
|
+
.gsub(/\s+/, ' ') # Normalize multiple spaces to single spaces
|
|
84
|
+
.strip # Remove leading/trailing whitespace
|
|
85
|
+
.split(' ') # Split into words
|
|
86
|
+
.map(&:capitalize) # Capitalize each word
|
|
87
|
+
.join(' ') # Join with single spaces
|
|
88
|
+
end
|
|
78
89
|
end
|
|
79
90
|
end
|
data/lib/dwh/functions/dates.rb
CHANGED
|
@@ -124,12 +124,27 @@ module DWH
|
|
|
124
124
|
gsk(:date_literal).gsub(/@val/i, val)
|
|
125
125
|
end
|
|
126
126
|
|
|
127
|
+
# @see #date_literal
|
|
128
|
+
def date_lit(val)
|
|
129
|
+
date_literal(val)
|
|
130
|
+
end
|
|
131
|
+
|
|
127
132
|
# @param val [String, Date, DateTime, Time]
|
|
128
133
|
def date_time_literal(val)
|
|
129
134
|
val = DATE_CLASSES.include?(val.class) ? val.strftime(date_time_format) : val
|
|
130
135
|
gsk(:date_time_literal).gsub(/@val/i, val)
|
|
131
136
|
end
|
|
132
137
|
|
|
138
|
+
# @see #date_time_literal
|
|
139
|
+
def timestamp_lit(val)
|
|
140
|
+
date_time_literal(val)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @see #date_time_literal
|
|
144
|
+
def timestamp_literal(val)
|
|
145
|
+
date_time_literal(val)
|
|
146
|
+
end
|
|
147
|
+
|
|
133
148
|
# The current default week start day. This is how
|
|
134
149
|
# the db is currently setup. Should be either monday or sunday
|
|
135
150
|
def default_week_start_day
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
|
|
2
1
|
# quotes and string lit
|
|
3
|
-
quote: "
|
|
2
|
+
quote: "`@exp`"
|
|
4
3
|
string_literal: "'@exp'"
|
|
5
4
|
|
|
6
5
|
# Date Literal Formats
|
|
@@ -13,20 +12,20 @@ abbreviated_day_name_format: "EEE"
|
|
|
13
12
|
month_name_format: "MMMM"
|
|
14
13
|
abbreviated_month_name_format: "MMM"
|
|
15
14
|
|
|
16
|
-
date_add: "
|
|
17
|
-
date_diff: "
|
|
18
|
-
date_format_sql: "
|
|
19
|
-
extract_day_of_year: '
|
|
20
|
-
extract_day_of_week: '
|
|
21
|
-
extract_week_of_year: '
|
|
22
|
-
extract_year_month: '
|
|
15
|
+
date_add: "DATE_ADD(@unit, @val, @exp)"
|
|
16
|
+
date_diff: "DATE_DIFF(@unit, @start_exp, @end_exp)"
|
|
17
|
+
date_format_sql: "DATE_FORMAT(@exp, '@format')"
|
|
18
|
+
extract_day_of_year: 'DAYOFYEAR(@exp)'
|
|
19
|
+
extract_day_of_week: 'DAYOFWEEK(@exp)'
|
|
20
|
+
extract_week_of_year: 'WEEKOFYEAR(@exp)'
|
|
21
|
+
extract_year_month: 'CAST(CONCAT(YEAR(@exp), LPAD(MONTH(@exp), 2, "0")) as INT)'
|
|
23
22
|
|
|
24
23
|
cast: "CAST(@exp AS @type)"
|
|
25
24
|
|
|
26
25
|
# string functions
|
|
27
|
-
trim: "
|
|
28
|
-
lower_case: "
|
|
29
|
-
upper_case: "
|
|
26
|
+
trim: "TRIM(@exp)"
|
|
27
|
+
lower_case: "LOWER(@exp)"
|
|
28
|
+
upper_case: "UPPER(@exp)"
|
|
30
29
|
|
|
31
30
|
# null handling
|
|
32
31
|
if_null: "COALESCE(@exp, @when_null)"
|
|
@@ -45,7 +44,7 @@ supports_window_functions: true
|
|
|
45
44
|
extend_ending_date_to_last_hour_of_day: false # druid needs this for inclusive filtering
|
|
46
45
|
|
|
47
46
|
# array operations
|
|
48
|
-
array_in_list: "
|
|
49
|
-
array_exclude_list: "
|
|
50
|
-
array_unnest_join: "LATERAL VIEW
|
|
47
|
+
array_in_list: "EXISTS(@exp, x -> x IN (@list))"
|
|
48
|
+
array_exclude_list: "NOT EXISTS(@exp, x -> x IN (@list))"
|
|
49
|
+
array_unnest_join: "LATERAL VIEW EXPLODE(@exp) AS @alias"
|
|
51
50
|
|
data/lib/dwh/settings/druid.yml
CHANGED
|
@@ -25,9 +25,9 @@ sunday_week_start_day: "TIME_FLOOR(@exp, 'P7D', TIMESTAMP '1970-01-04 00:00:00')
|
|
|
25
25
|
monday_week_start_day: "TIME_FLOOR(@exp, 'P7D', TIMESTAMP '1970-01-05 00:00:00')"
|
|
26
26
|
|
|
27
27
|
# string functions
|
|
28
|
-
trim: "
|
|
29
|
-
lower_case: "
|
|
30
|
-
upper_case: "
|
|
28
|
+
trim: "TRIM(@exp)"
|
|
29
|
+
lower_case: "LOWER(@exp)"
|
|
30
|
+
upper_case: "UPPER(@exp)"
|
|
31
31
|
|
|
32
32
|
# Relevant db capabilities
|
|
33
33
|
supports_table_join: true
|
data/lib/dwh/settings/duckdb.yml
CHANGED
|
@@ -38,7 +38,7 @@ upper_case: "UPPER(@exp)"
|
|
|
38
38
|
create_temp_table_template: "CREATE TEMP TABLE @table AS \n@sql"
|
|
39
39
|
|
|
40
40
|
# array operations
|
|
41
|
-
array_in_list: "
|
|
42
|
-
array_exclude_list: "
|
|
41
|
+
array_in_list: "ARRAY_LENGTH(ARRAY_INTERSECT(@exp, @list)) > 0"
|
|
42
|
+
array_exclude_list: "ARRAY_LENGTH(ARRAY_INTERSECT(@exp, @list)) = 0"
|
|
43
43
|
array_unnest_join: ", LATERAL (SELECT UNNEST(@exp)) AS @alias"
|
|
44
44
|
|