dwh 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/dwh/adapters/click_house.rb +172 -0
- data/lib/dwh/adapters/duck_db.rb +9 -9
- data/lib/dwh/column.rb +26 -7
- data/lib/dwh/settings/base.yml +150 -0
- data/lib/dwh/settings/clickhouse.yml +129 -0
- data/lib/dwh/settings/databricks.yml +11 -0
- data/lib/dwh/settings/snowflake.yml +15 -0
- data/lib/dwh/settings.rb +26 -0
- data/lib/dwh/version.rb +1 -1
- data/lib/dwh.rb +20 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d91ce7dc2866f584744c0636918a6e4d5a2d189d3356b08fcee2b60ee40b5317
|
|
4
|
+
data.tar.gz: 51e0c508c1a77e5302493868a3e493547ec00fc022195a1b31f581b49abb843b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1ac97b11067710faab814ff16f4e17e46ffa70acc7281e3c45c86e32861feb5eb7207d9f372f29849bc7453f891241c5715624024fc82f5d660d098d812990e5
|
|
7
|
+
data.tar.gz: 2aa0820ef26779facc52dc41797403930ebb39186fc0eb81146b310395e8a0052c2d23c1468ee3991ec901204cd9d4f1ce079cf1d86d2c69c7a6616a07b2093e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.5.0] - 2026-06-19
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- ClickHouse adapter with dedicated settings and test coverage for system and adapter behavior.
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- Added dialect-specific reserved keywords and aggregate functions for ClickHouse, Snowflake, and Databricks expression parsing.
|
|
12
|
+
|
|
3
13
|
## [0.4.2] - 2026-05-22
|
|
4
14
|
|
|
5
15
|
### Fixed
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
module DWH
|
|
2
|
+
module Adapters
|
|
3
|
+
# ClickHouse adapter for executing analytical queries against ClickHouse databases.
|
|
4
|
+
# Uses the ClickHouse HTTP interface (default port 8123) via Faraday.
|
|
5
|
+
#
|
|
6
|
+
# @example Basic local connection
|
|
7
|
+
# DWH.create(:clickhouse, { host: 'localhost', database: 'default' })
|
|
8
|
+
#
|
|
9
|
+
# @example With authentication
|
|
10
|
+
# DWH.create(:clickhouse, {
|
|
11
|
+
# host: 'my-clickhouse.example.com',
|
|
12
|
+
# port: 8443,
|
|
13
|
+
# protocol: 'https',
|
|
14
|
+
# database: 'analytics',
|
|
15
|
+
# username: 'analyst',
|
|
16
|
+
# password: 'secret'
|
|
17
|
+
# })
|
|
18
|
+
class ClickHouse < Adapter
|
|
19
|
+
QUERY_FORMAT = 'JSONCompact'.freeze
|
|
20
|
+
|
|
21
|
+
config :protocol, String, required: false, default: 'http', message: 'http or https'
|
|
22
|
+
config :host, String, required: true, message: 'server host ip address or domain name'
|
|
23
|
+
config :port, Integer, required: false, default: 8123, message: 'ClickHouse HTTP interface port (default 8123)'
|
|
24
|
+
config :database, String, required: false, default: 'default', message: 'database to connect to'
|
|
25
|
+
config :username, String, required: false, default: 'default', message: 'username (default: default)'
|
|
26
|
+
config :password, String, required: false, default: nil, message: 'password'
|
|
27
|
+
config :query_timeout, Integer, required: false, default: 300, message: 'query execution timeout in seconds'
|
|
28
|
+
|
|
29
|
+
def connection
|
|
30
|
+
return @connection if @connection
|
|
31
|
+
|
|
32
|
+
headers = {
|
|
33
|
+
'Content-Type' => 'text/plain',
|
|
34
|
+
'X-ClickHouse-User' => config[:username],
|
|
35
|
+
'X-ClickHouse-Database' => database
|
|
36
|
+
}
|
|
37
|
+
headers['X-ClickHouse-Key'] = config[:password] if config[:password]
|
|
38
|
+
|
|
39
|
+
@connection = Faraday.new(
|
|
40
|
+
url: "#{config[:protocol]}://#{config[:host]}:#{config[:port]}",
|
|
41
|
+
headers: headers,
|
|
42
|
+
request: { timeout: config[:query_timeout] }
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
@connection
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def test_connection(raise_exception: false)
|
|
49
|
+
res = connection.get('/ping')
|
|
50
|
+
unless res.success? && res.body.strip == 'Ok.'
|
|
51
|
+
raise ConnectionError, "ClickHouse ping returned: #{res.body}" if raise_exception
|
|
52
|
+
|
|
53
|
+
return false
|
|
54
|
+
end
|
|
55
|
+
true
|
|
56
|
+
rescue Faraday::ConnectionFailed => e
|
|
57
|
+
raise ConnectionError, e.message if raise_exception
|
|
58
|
+
|
|
59
|
+
false
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def tables(**qualifiers)
|
|
63
|
+
db = qualifiers[:schema] || database
|
|
64
|
+
sql = "SELECT name FROM system.tables WHERE database = '#{db}' AND engine NOT IN ('View', 'MaterializedView')"
|
|
65
|
+
execute_raw(sql)['data'].flatten
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def metadata(table, **qualifiers)
|
|
69
|
+
db = qualifiers[:schema] || database
|
|
70
|
+
full_table = db ? "#{db}.#{table}" : table
|
|
71
|
+
# DESCRIBE returns: name, type, default_type, default_expression, comment, codec_expression, ttl_expression
|
|
72
|
+
res = execute_raw("DESCRIBE TABLE #{full_table}")
|
|
73
|
+
db_table = Table.new(table, schema: db)
|
|
74
|
+
res['data'].each do |row|
|
|
75
|
+
db_table << Column.new(name: row[0], data_type: row[1])
|
|
76
|
+
end
|
|
77
|
+
db_table
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def stats(table, date_column: nil, **qualifiers)
|
|
81
|
+
db = qualifiers[:schema] || database
|
|
82
|
+
full_table = db ? "#{db}.#{table}" : table
|
|
83
|
+
sql = +'SELECT count() AS row_count'
|
|
84
|
+
sql << ", min(#{date_column}) AS date_start, max(#{date_column}) AS date_end" if date_column
|
|
85
|
+
sql << " FROM #{full_table}"
|
|
86
|
+
|
|
87
|
+
row = execute_raw(sql)['data'][0]
|
|
88
|
+
TableStats.new(
|
|
89
|
+
row_count: row[0].to_i,
|
|
90
|
+
date_start: date_column ? safe_parse_date(row[1]) : nil,
|
|
91
|
+
date_end: date_column ? safe_parse_date(row[2]) : nil
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def execute(sql, format: :array, retries: 0)
|
|
96
|
+
raw = with_debug(sql) { with_retry(retries) { execute_raw(sql) } }
|
|
97
|
+
format_result(raw, format)
|
|
98
|
+
rescue ExecutionError
|
|
99
|
+
raise
|
|
100
|
+
rescue StandardError => e
|
|
101
|
+
raise ExecutionError, e.message
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def execute_stream(sql, io, stats: nil, retries: 0)
|
|
105
|
+
with_debug(sql) do
|
|
106
|
+
with_retry(retries) do
|
|
107
|
+
raw = execute_raw(sql)
|
|
108
|
+
cols = raw['meta'].map { it['name'] }
|
|
109
|
+
io.write(CSV.generate_line(cols))
|
|
110
|
+
raw['data'].each do |row|
|
|
111
|
+
stats << row unless stats.nil?
|
|
112
|
+
io.write(CSV.generate_line(row))
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
io.rewind
|
|
117
|
+
io
|
|
118
|
+
rescue ExecutionError
|
|
119
|
+
raise
|
|
120
|
+
rescue StandardError => e
|
|
121
|
+
raise ExecutionError, e.message
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# (see Adapter#stream)
|
|
125
|
+
def stream(sql, &block)
|
|
126
|
+
with_debug(sql) do
|
|
127
|
+
execute_raw(sql)['data'].each(&block)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
private
|
|
132
|
+
|
|
133
|
+
def execute_raw(sql)
|
|
134
|
+
resp = connection.post('/') do |req|
|
|
135
|
+
req.body = "#{sql} FORMAT #{QUERY_FORMAT}"
|
|
136
|
+
end
|
|
137
|
+
raise ExecutionError, "ClickHouse error: #{resp.body}" unless resp.success?
|
|
138
|
+
|
|
139
|
+
JSON.parse(resp.body)
|
|
140
|
+
rescue Faraday::Error => e
|
|
141
|
+
raise ExecutionError, e.message
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def format_result(raw, format)
|
|
145
|
+
case format.to_sym
|
|
146
|
+
when :array
|
|
147
|
+
raw['data']
|
|
148
|
+
when :object
|
|
149
|
+
cols = raw['meta'].map { it['name'] }
|
|
150
|
+
raw['data'].map { |row| cols.zip(row).to_h }
|
|
151
|
+
when :csv
|
|
152
|
+
CSV.generate do |csv|
|
|
153
|
+
csv << raw['meta'].map { it['name'] }
|
|
154
|
+
raw['data'].each { |row| csv << row }
|
|
155
|
+
end
|
|
156
|
+
when :native
|
|
157
|
+
raw
|
|
158
|
+
else
|
|
159
|
+
raise ArgumentError, "Unsupported format: #{format}"
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def safe_parse_date(val)
|
|
164
|
+
return nil if val.nil? || val.to_s.empty?
|
|
165
|
+
|
|
166
|
+
Date.parse(val.to_s)
|
|
167
|
+
rescue Date::Error
|
|
168
|
+
nil
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
data/lib/dwh/adapters/duck_db.rb
CHANGED
|
@@ -85,19 +85,19 @@ module DWH
|
|
|
85
85
|
# (see Adapter#tables)
|
|
86
86
|
def tables(**qualifiers)
|
|
87
87
|
catalog, schema = qualifiers.values_at(:catalog, :schema)
|
|
88
|
-
|
|
88
|
+
schema_filter = schema || config[:schema]
|
|
89
89
|
|
|
90
|
-
where = []
|
|
90
|
+
where = ["schema_name = '#{schema_filter}'"]
|
|
91
91
|
where << "database_name = '#{catalog}'" if catalog
|
|
92
|
+
condition = where.join(' AND ')
|
|
92
93
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
94
|
+
sql = <<~SQL
|
|
95
|
+
SELECT table_name FROM duckdb_tables WHERE #{condition}
|
|
96
|
+
UNION ALL
|
|
97
|
+
SELECT view_name FROM duckdb_views WHERE #{condition}
|
|
98
|
+
SQL
|
|
98
99
|
|
|
99
|
-
|
|
100
|
-
res.flatten
|
|
100
|
+
execute(sql).flatten
|
|
101
101
|
end
|
|
102
102
|
|
|
103
103
|
# (see Adapter#stats)
|
data/lib/dwh/column.rb
CHANGED
|
@@ -31,23 +31,32 @@ module DWH
|
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
def normalized_data_type
|
|
34
|
-
|
|
34
|
+
# Strip ClickHouse type wrappers (Nullable(T), LowCardinality(T), Array(T))
|
|
35
|
+
# so the inner type is matched by the rules below.
|
|
36
|
+
inner = unwrap_type(data_type)
|
|
37
|
+
|
|
38
|
+
case inner
|
|
35
39
|
when /binary/, 'image'
|
|
36
40
|
'binary'
|
|
37
|
-
when /varchar/, 'string', /text/, /char/
|
|
41
|
+
when /varchar/, 'string', /text/, /char/, /fixedstring/
|
|
38
42
|
'string'
|
|
39
|
-
when 'date'
|
|
43
|
+
when 'date', 'date32'
|
|
40
44
|
'date'
|
|
41
45
|
when /date_time/, /datetime/, 'time', /timestamp/
|
|
42
46
|
'date_time'
|
|
43
|
-
when 'int', 'integer', 'smallint', 'tinyint'
|
|
47
|
+
when 'int', 'integer', 'smallint', 'tinyint', /^int8$/, /^int16$/, /^int32$/,
|
|
48
|
+
/^uint8$/, /^uint16$/, /^uint32$/
|
|
44
49
|
'integer'
|
|
45
|
-
when 'bigint', 'bit_int', 'big_integer'
|
|
50
|
+
when 'bigint', 'bit_int', 'big_integer', /^int64$/, /^int128$/, /^int256$/,
|
|
51
|
+
/^uint64$/, /^uint128$/, /^uint256$/
|
|
46
52
|
'bigint'
|
|
47
|
-
when 'decimal', 'double', 'float', 'real', 'dec', 'numeric', 'money'
|
|
53
|
+
when 'decimal', 'double', 'float', 'real', 'dec', 'numeric', 'money',
|
|
54
|
+
/^float32$/, /^float64$/, /^decimal/
|
|
48
55
|
'decimal'
|
|
49
|
-
when 'boolean', 'bit'
|
|
56
|
+
when 'boolean', 'bit', 'bool'
|
|
50
57
|
'boolean'
|
|
58
|
+
when 'uuid'
|
|
59
|
+
'string'
|
|
51
60
|
when 'number'
|
|
52
61
|
if precision >= 38 && scale.zero?
|
|
53
62
|
'bigint'
|
|
@@ -76,6 +85,16 @@ module DWH
|
|
|
76
85
|
"<Column:#{name}:#{data_type}>"
|
|
77
86
|
end
|
|
78
87
|
|
|
88
|
+
# Strips ClickHouse parameterized wrappers like Nullable(T), LowCardinality(T),
|
|
89
|
+
# Array(T) so the inner type can be normalised by the standard rules above.
|
|
90
|
+
# Safe to call on any type string; returns the input unchanged if no wrapper matches.
|
|
91
|
+
def unwrap_type(type)
|
|
92
|
+
inner = type.to_s.downcase
|
|
93
|
+
inner = inner.sub(/\Anullable\((.+)\)\z/, '\1')
|
|
94
|
+
inner = inner.sub(/\Alowcardinality\((.+)\)\z/, '\1')
|
|
95
|
+
inner.sub(/\Aarray\((.+)\)\z/, '\1')
|
|
96
|
+
end
|
|
97
|
+
|
|
79
98
|
def titleize(name)
|
|
80
99
|
# Handle underscores, dashes, and multiple spaces
|
|
81
100
|
# Also preserves existing spacing patterns better
|
data/lib/dwh/settings/base.yml
CHANGED
|
@@ -79,3 +79,153 @@ final_pass_measure_join_type: "full" # inner left right etc
|
|
|
79
79
|
apply_advanced_filtering_on_array_projections: false # druid needs a having clause or un-nesting
|
|
80
80
|
greedy_apply_date_filters: true
|
|
81
81
|
cross_universe_measure_filtering_strategy: "both" # both, final, intermediate
|
|
82
|
+
|
|
83
|
+
# Standard SQL reserved keywords. Bare tokens matching these are treated as SQL
|
|
84
|
+
# syntax during expression parsing, not column names.
|
|
85
|
+
# Per-adapter YAMLs extend this baseline with extra_reserved_keywords.
|
|
86
|
+
reserved_keywords:
|
|
87
|
+
- __STR__
|
|
88
|
+
- as
|
|
89
|
+
- cast
|
|
90
|
+
- case
|
|
91
|
+
- when
|
|
92
|
+
- then
|
|
93
|
+
- else
|
|
94
|
+
- 'null'
|
|
95
|
+
- end
|
|
96
|
+
- and
|
|
97
|
+
- or
|
|
98
|
+
- current_date
|
|
99
|
+
- current_time
|
|
100
|
+
- current_timestamp
|
|
101
|
+
- is
|
|
102
|
+
- not
|
|
103
|
+
- between
|
|
104
|
+
- like
|
|
105
|
+
- in
|
|
106
|
+
- exists
|
|
107
|
+
- any
|
|
108
|
+
- some
|
|
109
|
+
- over
|
|
110
|
+
- partition
|
|
111
|
+
- by
|
|
112
|
+
- order
|
|
113
|
+
- asc
|
|
114
|
+
- desc
|
|
115
|
+
- row
|
|
116
|
+
- rows
|
|
117
|
+
- range
|
|
118
|
+
- preceding
|
|
119
|
+
- following
|
|
120
|
+
- unbounded
|
|
121
|
+
- current
|
|
122
|
+
- interval
|
|
123
|
+
- int
|
|
124
|
+
- integer
|
|
125
|
+
- bigint
|
|
126
|
+
- smallint
|
|
127
|
+
- tinyint
|
|
128
|
+
- boolean
|
|
129
|
+
- date
|
|
130
|
+
- time
|
|
131
|
+
- timestamp
|
|
132
|
+
- varchar
|
|
133
|
+
- string
|
|
134
|
+
- decimal
|
|
135
|
+
- float
|
|
136
|
+
- real
|
|
137
|
+
- numeric
|
|
138
|
+
- double
|
|
139
|
+
- array
|
|
140
|
+
- varbinary
|
|
141
|
+
- json
|
|
142
|
+
- map
|
|
143
|
+
- from
|
|
144
|
+
- where
|
|
145
|
+
- filter
|
|
146
|
+
- distinct
|
|
147
|
+
- select
|
|
148
|
+
- having
|
|
149
|
+
- limit
|
|
150
|
+
- offset
|
|
151
|
+
- group
|
|
152
|
+
- join
|
|
153
|
+
- left
|
|
154
|
+
- right
|
|
155
|
+
- inner
|
|
156
|
+
- outer
|
|
157
|
+
- full
|
|
158
|
+
- cross
|
|
159
|
+
- on
|
|
160
|
+
- using
|
|
161
|
+
- union
|
|
162
|
+
- intersect
|
|
163
|
+
- except
|
|
164
|
+
- all
|
|
165
|
+
- with
|
|
166
|
+
- recursive
|
|
167
|
+
- lateral
|
|
168
|
+
- 'true'
|
|
169
|
+
- 'false'
|
|
170
|
+
- value
|
|
171
|
+
- epoch
|
|
172
|
+
- year
|
|
173
|
+
- month
|
|
174
|
+
- day
|
|
175
|
+
- hour
|
|
176
|
+
- minute
|
|
177
|
+
- second
|
|
178
|
+
- week
|
|
179
|
+
- quarter
|
|
180
|
+
- millisecond
|
|
181
|
+
- microsecond
|
|
182
|
+
- dow
|
|
183
|
+
- doy
|
|
184
|
+
- timezone
|
|
185
|
+
- leading
|
|
186
|
+
- trailing
|
|
187
|
+
- both
|
|
188
|
+
- nulls
|
|
189
|
+
- first
|
|
190
|
+
- last
|
|
191
|
+
- within
|
|
192
|
+
- ties
|
|
193
|
+
- at
|
|
194
|
+
- zone
|
|
195
|
+
- to
|
|
196
|
+
- ilike
|
|
197
|
+
- similar
|
|
198
|
+
|
|
199
|
+
# Standard SQL aggregate functions. Presence of any of these in a formula marks
|
|
200
|
+
# the expression as a measure (re-aggregating). Per-adapter YAMLs extend with
|
|
201
|
+
# extra_aggregate_functions.
|
|
202
|
+
aggregate_functions:
|
|
203
|
+
- sum
|
|
204
|
+
- count
|
|
205
|
+
- avg
|
|
206
|
+
- min
|
|
207
|
+
- max
|
|
208
|
+
- median
|
|
209
|
+
- mode
|
|
210
|
+
- stddev
|
|
211
|
+
- stddev_pop
|
|
212
|
+
- stddev_samp
|
|
213
|
+
- variance
|
|
214
|
+
- var_pop
|
|
215
|
+
- var_samp
|
|
216
|
+
- array_agg
|
|
217
|
+
- string_agg
|
|
218
|
+
- listagg
|
|
219
|
+
- bool_and
|
|
220
|
+
- bool_or
|
|
221
|
+
- every
|
|
222
|
+
- bit_and
|
|
223
|
+
- bit_or
|
|
224
|
+
- count_if
|
|
225
|
+
- percentile_cont
|
|
226
|
+
- percentile_disc
|
|
227
|
+
- corr
|
|
228
|
+
- covar_pop
|
|
229
|
+
- covar_samp
|
|
230
|
+
- approx_distinct
|
|
231
|
+
- approx_count_distinct
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# ClickHouse adapter settings
|
|
2
|
+
# Only overrides that differ from base.yml are listed here.
|
|
3
|
+
|
|
4
|
+
# ClickHouse uses backticks for identifier quoting (same as MySQL).
|
|
5
|
+
quote: "`@exp`"
|
|
6
|
+
|
|
7
|
+
# Date literals
|
|
8
|
+
date_literal: "toDate('@val')"
|
|
9
|
+
date_time_literal: "toDateTime('@val')"
|
|
10
|
+
|
|
11
|
+
# ClickHouse date arithmetic uses interval syntax compatible with SQL standard.
|
|
12
|
+
date_add: "(@exp + INTERVAL @val @unit)"
|
|
13
|
+
# dateDiff(unit, start, end) — note the argument order (start before end)
|
|
14
|
+
date_diff: "dateDiff('@unit', @start_exp, @end_exp)"
|
|
15
|
+
|
|
16
|
+
# DATE_TRUNC is supported in modern ClickHouse (v22+) as date_trunc.
|
|
17
|
+
truncate_date: "date_trunc('@unit', @exp)"
|
|
18
|
+
|
|
19
|
+
# ClickHouse date format function
|
|
20
|
+
date_format_sql: "formatDateTime(@exp, '@format')"
|
|
21
|
+
|
|
22
|
+
# ClickHouse date-part extraction
|
|
23
|
+
extract_year: 'toYear(@exp)'
|
|
24
|
+
extract_month: 'toMonth(@exp)'
|
|
25
|
+
extract_quarter: 'toQuarter(@exp)'
|
|
26
|
+
extract_day_of_year: 'toDayOfYear(@exp)'
|
|
27
|
+
extract_day_of_month: 'toDayOfMonth(@exp)'
|
|
28
|
+
extract_day_of_week: 'toDayOfWeek(@exp)'
|
|
29
|
+
extract_week_of_year: 'toWeek(@exp)'
|
|
30
|
+
extract_hour: 'toHour(@exp)'
|
|
31
|
+
extract_minute: 'toMinute(@exp)'
|
|
32
|
+
extract_year_month: 'CAST((toString(toYear(@exp)) || lpad(toString(toMonth(@exp)), 2, ''0'')) AS Int32)'
|
|
33
|
+
|
|
34
|
+
# ClickHouse week-start helpers
|
|
35
|
+
sunday_week_start_day: "toMonday((@exp + INTERVAL 1 DAY)) - INTERVAL 1 DAY"
|
|
36
|
+
monday_week_start_day: "toMonday(@exp)"
|
|
37
|
+
|
|
38
|
+
# Current time functions
|
|
39
|
+
current_date: "today()"
|
|
40
|
+
current_timestamp: "now()"
|
|
41
|
+
# current_time is less commonly used; now() returns a DateTime.
|
|
42
|
+
current_time: "now()"
|
|
43
|
+
|
|
44
|
+
# Null handling — ClickHouse-native forms
|
|
45
|
+
if_null: "ifNull(@exp, @when_null)"
|
|
46
|
+
null_if: "nullIf(@exp, @target)"
|
|
47
|
+
null_if_zero: "nullIf(@exp, 0)"
|
|
48
|
+
|
|
49
|
+
# Array operations — ClickHouse uses hasAny / NOT hasAny
|
|
50
|
+
array_in_list: "hasAny(@exp, [@list])"
|
|
51
|
+
array_exclude_list: "NOT hasAny(@exp, [@list])"
|
|
52
|
+
array_unnest_join: "ARRAY JOIN @exp AS @alias"
|
|
53
|
+
|
|
54
|
+
# Capabilities
|
|
55
|
+
# ClickHouse supports CTEs (WITH clause) but not CREATE TEMPORARY TABLE.
|
|
56
|
+
supports_temp_tables: false
|
|
57
|
+
temp_table_type: "cte"
|
|
58
|
+
# Window functions supported since v21.x
|
|
59
|
+
supports_window_functions: true
|
|
60
|
+
supports_array_functions: true
|
|
61
|
+
supports_common_table_expressions: true
|
|
62
|
+
supports_full_join: true
|
|
63
|
+
|
|
64
|
+
# ClickHouse-specific reserved keywords not in the standard baseline.
|
|
65
|
+
extra_reserved_keywords:
|
|
66
|
+
- prewhere
|
|
67
|
+
- final
|
|
68
|
+
- sample
|
|
69
|
+
- totals
|
|
70
|
+
- rollup
|
|
71
|
+
- cube
|
|
72
|
+
- tuple
|
|
73
|
+
- array
|
|
74
|
+
- fixedstring
|
|
75
|
+
- lowcardinality
|
|
76
|
+
- nullable
|
|
77
|
+
- enum
|
|
78
|
+
- uuid
|
|
79
|
+
- ipv4
|
|
80
|
+
- ipv6
|
|
81
|
+
|
|
82
|
+
# ClickHouse-specific aggregate functions not in the standard baseline.
|
|
83
|
+
extra_aggregate_functions:
|
|
84
|
+
- uniq
|
|
85
|
+
- uniqexact
|
|
86
|
+
- uniqhll12
|
|
87
|
+
- uniqcombined
|
|
88
|
+
- uniqthetasketch
|
|
89
|
+
- quantile
|
|
90
|
+
- quantileexact
|
|
91
|
+
- quantileexactweighted
|
|
92
|
+
- quantiles
|
|
93
|
+
- quantilestdigest
|
|
94
|
+
- quantiletiming
|
|
95
|
+
- quantilebfloat16
|
|
96
|
+
- argmax
|
|
97
|
+
- argmin
|
|
98
|
+
- sumif
|
|
99
|
+
- countif
|
|
100
|
+
- avgif
|
|
101
|
+
- maxif
|
|
102
|
+
- minif
|
|
103
|
+
- anyif
|
|
104
|
+
- anylazyif
|
|
105
|
+
- sumarray
|
|
106
|
+
- grouparray
|
|
107
|
+
- grouparrayinsertat
|
|
108
|
+
- grouparraysample
|
|
109
|
+
- groupuniqarray
|
|
110
|
+
- skewpop
|
|
111
|
+
- skewsamp
|
|
112
|
+
- kurtpop
|
|
113
|
+
- kurtsamp
|
|
114
|
+
- entropy
|
|
115
|
+
- simplelinearregression
|
|
116
|
+
- stochasticlinearregression
|
|
117
|
+
- stochasticlogisticregression
|
|
118
|
+
- categoricaldistancefunctions
|
|
119
|
+
- any
|
|
120
|
+
- anylast
|
|
121
|
+
- anyheavy
|
|
122
|
+
- first_value
|
|
123
|
+
- last_value
|
|
124
|
+
- topk
|
|
125
|
+
- topkweighted
|
|
126
|
+
- singlevalueorempty
|
|
127
|
+
- deltasum
|
|
128
|
+
- deltasumtimestamp
|
|
129
|
+
- exponentialmovingaverage
|
|
@@ -48,3 +48,14 @@ array_in_list: "EXISTS(@exp, x -> x IN (@list))"
|
|
|
48
48
|
array_exclude_list: "NOT EXISTS(@exp, x -> x IN (@list))"
|
|
49
49
|
array_unnest_join: "LATERAL VIEW EXPLODE(@exp) AS @alias"
|
|
50
50
|
|
|
51
|
+
# Databricks-specific reserved keywords not in the standard baseline.
|
|
52
|
+
extra_reserved_keywords:
|
|
53
|
+
- qualify
|
|
54
|
+
|
|
55
|
+
# Databricks-specific aggregate functions not in the standard baseline.
|
|
56
|
+
extra_aggregate_functions:
|
|
57
|
+
- collect_list
|
|
58
|
+
- collect_set
|
|
59
|
+
- percentile_approx
|
|
60
|
+
- approx_percentile
|
|
61
|
+
|
|
@@ -43,3 +43,18 @@ array_unnest_join: "LATERAL FLATTEN(INPUT => @exp) @alias"
|
|
|
43
43
|
# null handling
|
|
44
44
|
if_null: "NVL(@exp, @when_null)"
|
|
45
45
|
null_if_zero: "NULLIFZERO(@exp)"
|
|
46
|
+
|
|
47
|
+
# Snowflake-specific reserved keywords not in the standard baseline.
|
|
48
|
+
extra_reserved_keywords:
|
|
49
|
+
- qualify
|
|
50
|
+
- pivot
|
|
51
|
+
- unpivot
|
|
52
|
+
- iff
|
|
53
|
+
- nvl
|
|
54
|
+
- decode
|
|
55
|
+
|
|
56
|
+
# Snowflake-specific aggregate functions not in the standard baseline.
|
|
57
|
+
extra_aggregate_functions:
|
|
58
|
+
- boolor_agg
|
|
59
|
+
- booland_agg
|
|
60
|
+
- approx_percentile
|
data/lib/dwh/settings.rb
CHANGED
|
@@ -79,5 +79,31 @@ module DWH
|
|
|
79
79
|
def using_base_settings?
|
|
80
80
|
@using_base
|
|
81
81
|
end
|
|
82
|
+
|
|
83
|
+
# Returns the full reserved-keyword list for this adapter class
|
|
84
|
+
# (baseline from base.yml merged with any extra_reserved_keywords from the
|
|
85
|
+
# adapter's own settings file). Safe to call at class level without an instance.
|
|
86
|
+
def reserved_keywords
|
|
87
|
+
base = Array(adapter_settings[:reserved_keywords])
|
|
88
|
+
extra = Array(adapter_settings[:extra_reserved_keywords])
|
|
89
|
+
(base + extra).map { |k| k.to_s.downcase }.uniq.freeze
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Returns the full aggregate-function list for this adapter class
|
|
93
|
+
# (baseline from base.yml merged with any extra_aggregate_functions from the
|
|
94
|
+
# adapter's own settings file). Safe to call at class level without an instance.
|
|
95
|
+
def aggregate_functions
|
|
96
|
+
base = Array(adapter_settings[:aggregate_functions])
|
|
97
|
+
extra = Array(adapter_settings[:extra_aggregate_functions])
|
|
98
|
+
(base + extra).map { |k| k.to_s.downcase }.uniq.freeze
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def reserved?(name)
|
|
102
|
+
reserved_keywords.include?(name.to_s.downcase)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def aggregate_function?(name)
|
|
106
|
+
aggregate_functions.include?(name.to_s.downcase)
|
|
107
|
+
end
|
|
82
108
|
end
|
|
83
109
|
end
|
data/lib/dwh/version.rb
CHANGED
data/lib/dwh.rb
CHANGED
|
@@ -20,6 +20,7 @@ require_relative 'dwh/adapters/sqlite'
|
|
|
20
20
|
require_relative 'dwh/adapters/athena'
|
|
21
21
|
require_relative 'dwh/adapters/redshift'
|
|
22
22
|
require_relative 'dwh/adapters/databricks'
|
|
23
|
+
require_relative 'dwh/adapters/click_house'
|
|
23
24
|
|
|
24
25
|
# DWH encapsulates the full functionality of this gem.
|
|
25
26
|
#
|
|
@@ -52,6 +53,25 @@ module DWH
|
|
|
52
53
|
register(:athena, Adapters::Athena)
|
|
53
54
|
register(:redshift, Adapters::Redshift)
|
|
54
55
|
register(:databricks, Adapters::Databricks)
|
|
56
|
+
register(:clickhouse, Adapters::ClickHouse)
|
|
57
|
+
|
|
58
|
+
# The raw base.yml settings, loaded once. This is the single source of
|
|
59
|
+
# truth for the standard, warehouse-agnostic dialect baseline.
|
|
60
|
+
def self.base_settings
|
|
61
|
+
@base_settings ||= YAML.load_file(Settings::BASE_SETTINGS_FILE).transform_keys(&:to_sym)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Baseline reserved keywords shared by all adapters (from base.yml).
|
|
65
|
+
# Per-adapter sets (adapter.reserved_keywords) extend this.
|
|
66
|
+
def self.reserved_keywords
|
|
67
|
+
@reserved_keywords ||= Array(base_settings[:reserved_keywords]).map { |k| k.to_s.downcase }.uniq.freeze
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Baseline aggregate functions shared by all adapters (from base.yml).
|
|
71
|
+
# Per-adapter sets (adapter.aggregate_functions) extend this.
|
|
72
|
+
def self.aggregate_functions
|
|
73
|
+
@aggregate_functions ||= Array(base_settings[:aggregate_functions]).map { |k| k.to_s.downcase }.uniq.freeze
|
|
74
|
+
end
|
|
55
75
|
|
|
56
76
|
# start_reaper
|
|
57
77
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dwh
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ajo Abraham
|
|
@@ -154,6 +154,7 @@ files:
|
|
|
154
154
|
- lib/dwh.rb
|
|
155
155
|
- lib/dwh/adapters.rb
|
|
156
156
|
- lib/dwh/adapters/athena.rb
|
|
157
|
+
- lib/dwh/adapters/click_house.rb
|
|
157
158
|
- lib/dwh/adapters/databricks.rb
|
|
158
159
|
- lib/dwh/adapters/druid.rb
|
|
159
160
|
- lib/dwh/adapters/duck_db.rb
|
|
@@ -180,6 +181,7 @@ files:
|
|
|
180
181
|
- lib/dwh/settings.rb
|
|
181
182
|
- lib/dwh/settings/athena.yml
|
|
182
183
|
- lib/dwh/settings/base.yml
|
|
184
|
+
- lib/dwh/settings/clickhouse.yml
|
|
183
185
|
- lib/dwh/settings/databricks.yml
|
|
184
186
|
- lib/dwh/settings/druid.yml
|
|
185
187
|
- lib/dwh/settings/duckdb.yml
|