clickhouse-ruby 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +74 -1
- data/README.md +165 -79
- data/lib/clickhouse_ruby/active_record/arel_visitor.rb +205 -76
- data/lib/clickhouse_ruby/active_record/connection_adapter.rb +103 -98
- data/lib/clickhouse_ruby/active_record/railtie.rb +20 -15
- data/lib/clickhouse_ruby/active_record/relation_extensions.rb +398 -0
- data/lib/clickhouse_ruby/active_record/schema_statements.rb +90 -104
- data/lib/clickhouse_ruby/active_record.rb +24 -10
- data/lib/clickhouse_ruby/client.rb +181 -74
- data/lib/clickhouse_ruby/configuration.rb +51 -10
- data/lib/clickhouse_ruby/connection.rb +180 -64
- data/lib/clickhouse_ruby/connection_pool.rb +25 -19
- data/lib/clickhouse_ruby/errors.rb +13 -1
- data/lib/clickhouse_ruby/result.rb +11 -16
- data/lib/clickhouse_ruby/retry_handler.rb +172 -0
- data/lib/clickhouse_ruby/streaming_result.rb +309 -0
- data/lib/clickhouse_ruby/types/array.rb +11 -64
- data/lib/clickhouse_ruby/types/base.rb +59 -0
- data/lib/clickhouse_ruby/types/boolean.rb +28 -25
- data/lib/clickhouse_ruby/types/date_time.rb +10 -27
- data/lib/clickhouse_ruby/types/decimal.rb +173 -0
- data/lib/clickhouse_ruby/types/enum.rb +262 -0
- data/lib/clickhouse_ruby/types/float.rb +14 -28
- data/lib/clickhouse_ruby/types/integer.rb +21 -43
- data/lib/clickhouse_ruby/types/low_cardinality.rb +1 -1
- data/lib/clickhouse_ruby/types/map.rb +21 -36
- data/lib/clickhouse_ruby/types/null_safe.rb +81 -0
- data/lib/clickhouse_ruby/types/nullable.rb +2 -2
- data/lib/clickhouse_ruby/types/parser.rb +28 -18
- data/lib/clickhouse_ruby/types/registry.rb +40 -29
- data/lib/clickhouse_ruby/types/string.rb +9 -13
- data/lib/clickhouse_ruby/types/string_parser.rb +135 -0
- data/lib/clickhouse_ruby/types/tuple.rb +11 -68
- data/lib/clickhouse_ruby/types/uuid.rb +15 -22
- data/lib/clickhouse_ruby/types.rb +19 -15
- data/lib/clickhouse_ruby/version.rb +1 -1
- data/lib/clickhouse_ruby.rb +11 -11
- metadata +41 -6
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClickhouseRuby
|
|
4
|
+
module ActiveRecord
|
|
5
|
+
# Extensions to ActiveRecord::Relation for ClickHouse-specific query methods
|
|
6
|
+
#
|
|
7
|
+
# This module adds support for ClickHouse-specific clauses that aren't part of standard
|
|
8
|
+
# ActiveRecord, such as PREWHERE, FINAL, SAMPLE, and SETTINGS.
|
|
9
|
+
#
|
|
10
|
+
# These methods are mixed into ActiveRecord::Relation via the ConnectionAdapter
|
|
11
|
+
# when a ClickHouse connection is established.
|
|
12
|
+
#
|
|
13
|
+
# @example PREWHERE usage
|
|
14
|
+
# Event.prewhere(date: Date.today).where(status: 'active')
|
|
15
|
+
# # SELECT * FROM events PREWHERE date = '2024-01-31' WHERE status = 'active'
|
|
16
|
+
#
|
|
17
|
+
module RelationExtensions
|
|
18
|
+
extend ::ActiveSupport::Concern
|
|
19
|
+
|
|
20
|
+
# PREWHERE clause support
|
|
21
|
+
#
|
|
22
|
+
# Filters data at an earlier stage than WHERE for better query optimization.
|
|
23
|
+
# ClickHouse reads only the columns needed for PREWHERE, applies the filter,
|
|
24
|
+
# then reads remaining columns for WHERE processing.
|
|
25
|
+
#
|
|
26
|
+
# Can be called with:
|
|
27
|
+
# - Hash conditions: `prewhere(active: true, status: 'done')`
|
|
28
|
+
# - String conditions: `prewhere('date > ?', date)`
|
|
29
|
+
# - Arel nodes: `prewhere(arel_expr)`
|
|
30
|
+
# - Chain syntax: `prewhere.not(deleted: true)`
|
|
31
|
+
#
|
|
32
|
+
# @param opts [Hash, String, Arel::Nodes::Node, Symbol] the conditions
|
|
33
|
+
# @param rest [Array] bind parameters for string conditions
|
|
34
|
+
# @return [ActiveRecord::Relation] self for chaining, or PrewhereChain if opts == :chain
|
|
35
|
+
def prewhere(opts = :chain, *rest)
|
|
36
|
+
case opts
|
|
37
|
+
when :chain
|
|
38
|
+
PrewhereChain.new(spawn)
|
|
39
|
+
when nil, false
|
|
40
|
+
self
|
|
41
|
+
else
|
|
42
|
+
spawn.prewhere!(opts, *rest)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Internal method to apply prewhere conditions
|
|
47
|
+
#
|
|
48
|
+
# @param opts [Hash, String, Arel::Nodes::Node] the conditions
|
|
49
|
+
# @param rest [Array] bind parameters
|
|
50
|
+
# @return [ActiveRecord::Relation] self
|
|
51
|
+
def prewhere!(opts, *rest)
|
|
52
|
+
@prewhere_values ||= []
|
|
53
|
+
|
|
54
|
+
case opts
|
|
55
|
+
when String
|
|
56
|
+
@prewhere_values << Arel.sql(sanitize_sql_array([opts, *rest]))
|
|
57
|
+
when Hash
|
|
58
|
+
opts.each do |key, value|
|
|
59
|
+
@prewhere_values << build_prewhere_condition(key, value)
|
|
60
|
+
end
|
|
61
|
+
when Arel::Nodes::Node
|
|
62
|
+
@prewhere_values << opts
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
self
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Get the accumulated prewhere conditions
|
|
69
|
+
#
|
|
70
|
+
# @return [Array] array of prewhere condition nodes
|
|
71
|
+
def prewhere_values
|
|
72
|
+
@prewhere_values || []
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
# Build a prewhere condition from a column and value
|
|
78
|
+
#
|
|
79
|
+
# Handles different value types:
|
|
80
|
+
# - nil: column IS NULL
|
|
81
|
+
# - Array: column IN (values)
|
|
82
|
+
# - Range: column BETWEEN start AND end
|
|
83
|
+
# - Other: column = value
|
|
84
|
+
#
|
|
85
|
+
# @param column [Symbol, String] the column name
|
|
86
|
+
# @param value [Object] the value to filter by
|
|
87
|
+
# @return [Arel::Nodes::Node] the condition node
|
|
88
|
+
def build_prewhere_condition(column, value)
|
|
89
|
+
arel_table = self.arel_table
|
|
90
|
+
|
|
91
|
+
case value
|
|
92
|
+
when nil
|
|
93
|
+
arel_table[column].eq(nil)
|
|
94
|
+
when Array
|
|
95
|
+
arel_table[column].in(value)
|
|
96
|
+
when Range
|
|
97
|
+
arel_table[column].between(value)
|
|
98
|
+
else
|
|
99
|
+
arel_table[column].eq(value)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Normalize settings hash
|
|
104
|
+
#
|
|
105
|
+
# Converts:
|
|
106
|
+
# - Keys to strings
|
|
107
|
+
# - Ruby true/false to 1/0
|
|
108
|
+
#
|
|
109
|
+
# @param opts [Hash] the raw settings
|
|
110
|
+
# @return [Hash] normalized settings
|
|
111
|
+
def normalize_settings(opts)
|
|
112
|
+
opts.transform_keys(&:to_s).transform_values do |value|
|
|
113
|
+
case value
|
|
114
|
+
when true then 1
|
|
115
|
+
when false then 0
|
|
116
|
+
else value
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Format a setting value for SQL
|
|
122
|
+
#
|
|
123
|
+
# Converts:
|
|
124
|
+
# - Strings: wrapped in single quotes
|
|
125
|
+
# - Other values: converted via to_s
|
|
126
|
+
#
|
|
127
|
+
# @param value [Object] the setting value
|
|
128
|
+
# @return [String] the formatted value
|
|
129
|
+
def format_setting_value(value)
|
|
130
|
+
case value
|
|
131
|
+
when String
|
|
132
|
+
"'#{value}'"
|
|
133
|
+
else
|
|
134
|
+
value.to_s
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# SAMPLE clause support
|
|
139
|
+
#
|
|
140
|
+
# Queries a subset of data for approximate results with faster execution.
|
|
141
|
+
# SAMPLE allows you to explore large datasets or run approximate aggregations.
|
|
142
|
+
#
|
|
143
|
+
# Syntax variants:
|
|
144
|
+
# - Fractional: `sample(0.1)` for 10% of data
|
|
145
|
+
# - Absolute: `sample(10000)` for at least 10,000 rows
|
|
146
|
+
# - With offset: `sample(0.1, offset: 0.5)` for reproducible subsets
|
|
147
|
+
#
|
|
148
|
+
# Can be called with:
|
|
149
|
+
# - Float between 0 and 1: `sample(0.1)` (fraction of data)
|
|
150
|
+
# - Integer >= 1: `sample(10000)` (at least n rows)
|
|
151
|
+
# - With offset: `sample(0.1, offset: 0.5)` (fraction with offset)
|
|
152
|
+
#
|
|
153
|
+
# Important: Table must be created with SAMPLE BY clause!
|
|
154
|
+
# ```sql
|
|
155
|
+
# CREATE TABLE events (id UInt64, ...) ENGINE = MergeTree()
|
|
156
|
+
# SAMPLE BY intHash32(id)
|
|
157
|
+
# ORDER BY id
|
|
158
|
+
# ```
|
|
159
|
+
#
|
|
160
|
+
# @param ratio_or_rows [Float, Integer] sampling ratio (0 < x <= 1) or min row count
|
|
161
|
+
# @param offset [Float, Integer, nil] optional offset for deterministic subsets
|
|
162
|
+
# @return [ActiveRecord::Relation] self for chaining
|
|
163
|
+
#
|
|
164
|
+
# @example Fractional sampling (10% of data)
|
|
165
|
+
# Event.sample(0.1).limit(100)
|
|
166
|
+
# # SELECT * FROM events SAMPLE 0.1 LIMIT 100
|
|
167
|
+
#
|
|
168
|
+
# @example Absolute sampling (at least 10,000 rows)
|
|
169
|
+
# Event.sample(10000).average(:amount)
|
|
170
|
+
# # SELECT avg(amount) FROM events SAMPLE 10000
|
|
171
|
+
#
|
|
172
|
+
# @example Sample with offset (reproducible subsets)
|
|
173
|
+
# Event.sample(0.1, offset: 0.5).where(status: 'done')
|
|
174
|
+
# # SELECT * FROM events SAMPLE 0.1 OFFSET 0.5 WHERE status = 'done'
|
|
175
|
+
def sample(ratio_or_rows, offset: nil)
|
|
176
|
+
spawn.sample!(ratio_or_rows, offset: offset)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Internal method to apply sample
|
|
180
|
+
#
|
|
181
|
+
# @param ratio_or_rows [Float, Integer] sampling ratio or min row count
|
|
182
|
+
# @param offset [Float, Integer, nil] optional offset
|
|
183
|
+
# @return [ActiveRecord::Relation] self
|
|
184
|
+
def sample!(ratio_or_rows, offset: nil)
|
|
185
|
+
@sample_value = ratio_or_rows
|
|
186
|
+
@sample_offset = offset
|
|
187
|
+
self
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Get the sample value
|
|
191
|
+
#
|
|
192
|
+
# @return [Float, Integer, nil] the sample ratio or row count
|
|
193
|
+
def sample_value
|
|
194
|
+
@sample_value
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Get the sample offset
|
|
198
|
+
#
|
|
199
|
+
# @return [Float, Integer, nil] the sample offset
|
|
200
|
+
def sample_offset
|
|
201
|
+
@sample_offset
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# SETTINGS clause support
|
|
205
|
+
#
|
|
206
|
+
# Per-query configuration for ClickHouse execution parameters
|
|
207
|
+
#
|
|
208
|
+
# Can be called with:
|
|
209
|
+
# - Hash settings: `settings(max_execution_time: 60, max_threads: 4)`
|
|
210
|
+
# - Multiple calls (settings merge): `settings(max_threads: 4).settings(async_insert: true)`
|
|
211
|
+
#
|
|
212
|
+
# @param opts [Hash] the settings as key-value pairs
|
|
213
|
+
# @return [ActiveRecord::Relation] a new relation with settings applied
|
|
214
|
+
#
|
|
215
|
+
# @example
|
|
216
|
+
# Event.settings(max_execution_time: 60)
|
|
217
|
+
# Event.settings(max_threads: 4, async_insert: true)
|
|
218
|
+
# Event.settings(max_execution_time: 60).where(active: true)
|
|
219
|
+
def settings(opts = {})
|
|
220
|
+
spawn.settings!(opts)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Internal method to apply settings (mutating)
|
|
224
|
+
#
|
|
225
|
+
# @param opts [Hash] the settings as key-value pairs
|
|
226
|
+
# @return [ActiveRecord::Relation] self
|
|
227
|
+
#
|
|
228
|
+
# @private
|
|
229
|
+
def settings!(opts)
|
|
230
|
+
@query_settings ||= {}
|
|
231
|
+
@query_settings.merge!(normalize_settings(opts))
|
|
232
|
+
self
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Get current query settings
|
|
236
|
+
#
|
|
237
|
+
# @return [Hash] the query settings
|
|
238
|
+
#
|
|
239
|
+
# @private
|
|
240
|
+
def query_settings
|
|
241
|
+
@query_settings || {}
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Get SETTINGS clause for SQL generation
|
|
245
|
+
#
|
|
246
|
+
# @return [String, nil] the SETTINGS clause or nil if no settings
|
|
247
|
+
#
|
|
248
|
+
# @private
|
|
249
|
+
def settings_clause
|
|
250
|
+
return nil if query_settings.empty?
|
|
251
|
+
|
|
252
|
+
pairs = query_settings.map do |key, value|
|
|
253
|
+
"#{key} = #{format_setting_value(value)}"
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
"SETTINGS #{pairs.join(", ")}"
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# FINAL modifier support
|
|
260
|
+
#
|
|
261
|
+
# Forces ClickHouse to merge data at query time for deduplication.
|
|
262
|
+
# Applies to ReplacingMergeTree, CollapsingMergeTree, SummingMergeTree,
|
|
263
|
+
# AggregatingMergeTree, and VersionedCollapsingMergeTree.
|
|
264
|
+
#
|
|
265
|
+
# Warning: FINAL forces merge during query execution, which can be 2-10x slower.
|
|
266
|
+
# Use only when accuracy is critical; otherwise accept eventual consistency.
|
|
267
|
+
#
|
|
268
|
+
# When combined with prewhere, automatically adds required settings:
|
|
269
|
+
# - optimize_move_to_prewhere = 1
|
|
270
|
+
# - optimize_move_to_prewhere_if_final = 1
|
|
271
|
+
#
|
|
272
|
+
# @return [ActiveRecord::Relation] a new relation with FINAL modifier applied
|
|
273
|
+
#
|
|
274
|
+
# @example Basic FINAL usage
|
|
275
|
+
# User.final
|
|
276
|
+
# # SELECT * FROM users FINAL
|
|
277
|
+
#
|
|
278
|
+
# @example FINAL with WHERE
|
|
279
|
+
# User.final.where(active: true)
|
|
280
|
+
# # SELECT * FROM users FINAL WHERE active = 1
|
|
281
|
+
#
|
|
282
|
+
# @example FINAL with PREWHERE (auto-adds settings)
|
|
283
|
+
# User.final.prewhere(created_at: Date.today..)
|
|
284
|
+
# # SELECT * FROM users FINAL PREWHERE ...
|
|
285
|
+
# # SETTINGS optimize_move_to_prewhere = 1, optimize_move_to_prewhere_if_final = 1
|
|
286
|
+
def final
|
|
287
|
+
spawn.final!
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Internal method to apply FINAL modifier (mutating)
|
|
291
|
+
#
|
|
292
|
+
# @return [ActiveRecord::Relation] self
|
|
293
|
+
#
|
|
294
|
+
# @private
|
|
295
|
+
def final!
|
|
296
|
+
@use_final = true
|
|
297
|
+
|
|
298
|
+
# Auto-add required settings when combining with prewhere
|
|
299
|
+
if prewhere_values.any?
|
|
300
|
+
@query_settings ||= {}
|
|
301
|
+
@query_settings["optimize_move_to_prewhere"] = 1
|
|
302
|
+
@query_settings["optimize_move_to_prewhere_if_final"] = 1
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
self
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
# Check if FINAL modifier is applied
|
|
309
|
+
#
|
|
310
|
+
# @return [Boolean] true if FINAL modifier is active
|
|
311
|
+
#
|
|
312
|
+
# @private
|
|
313
|
+
def final?
|
|
314
|
+
@use_final || false
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Remove FINAL modifier from the relation
|
|
318
|
+
#
|
|
319
|
+
# Useful for building subqueries that shouldn't include FINAL.
|
|
320
|
+
#
|
|
321
|
+
# @return [ActiveRecord::Relation] a new relation without FINAL
|
|
322
|
+
#
|
|
323
|
+
# @example
|
|
324
|
+
# relation = User.final.where(active: true)
|
|
325
|
+
# subquery = relation.unscope_final
|
|
326
|
+
# # SELECT * FROM users WHERE active = 1 (no FINAL)
|
|
327
|
+
def unscope_final
|
|
328
|
+
spawn.tap { |r| r.instance_variable_set(:@use_final, false) }
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Override build_arel to attach ClickHouse-specific state to the Arel AST
|
|
332
|
+
#
|
|
333
|
+
# This allows the ArelVisitor to access FINAL, SAMPLE, PREWHERE, and SETTINGS
|
|
334
|
+
# state when generating SQL.
|
|
335
|
+
#
|
|
336
|
+
# @return [Arel::SelectManager]
|
|
337
|
+
def build_arel(*)
|
|
338
|
+
arel = super
|
|
339
|
+
|
|
340
|
+
# Attach ClickHouse-specific state to the Arel statement
|
|
341
|
+
if arel.ast.is_a?(Arel::Nodes::SelectStatement)
|
|
342
|
+
arel.ast.instance_variable_set(:@clickhouse_final, @use_final)
|
|
343
|
+
arel.ast.instance_variable_set(:@clickhouse_sample_value, @sample_value)
|
|
344
|
+
arel.ast.instance_variable_set(:@clickhouse_sample_offset, @sample_offset)
|
|
345
|
+
arel.ast.instance_variable_set(:@clickhouse_prewhere_values, @prewhere_values)
|
|
346
|
+
arel.ast.instance_variable_set(:@clickhouse_query_settings, @query_settings)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
arel
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# Chain object for prewhere.not syntax
|
|
353
|
+
#
|
|
354
|
+
# Allows negation of prewhere conditions:
|
|
355
|
+
# Model.prewhere.not(deleted: true)
|
|
356
|
+
# # PREWHERE NOT(deleted = 1)
|
|
357
|
+
#
|
|
358
|
+
class PrewhereChain
|
|
359
|
+
def initialize(relation)
|
|
360
|
+
@relation = relation
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Negate the prewhere conditions
|
|
364
|
+
#
|
|
365
|
+
# @param opts [Hash, String, Arel::Nodes::Node] the conditions to negate
|
|
366
|
+
# @param rest [Array] bind parameters
|
|
367
|
+
# @return [ActiveRecord::Relation] the relation with negated prewhere
|
|
368
|
+
def not(opts, *rest)
|
|
369
|
+
condition_to_negate = case opts
|
|
370
|
+
when Hash
|
|
371
|
+
build_combined_condition(opts)
|
|
372
|
+
when String
|
|
373
|
+
Arel.sql(@relation.sanitize_sql_array([opts, *rest]))
|
|
374
|
+
else
|
|
375
|
+
opts
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
negated = Arel::Nodes::Not.new(condition_to_negate)
|
|
379
|
+
@relation.prewhere!(negated)
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
private
|
|
383
|
+
|
|
384
|
+
def build_combined_condition(opts)
|
|
385
|
+
conditions = opts.map do |key, value|
|
|
386
|
+
@relation.send(:build_prewhere_condition, key, value)
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
if conditions.size == 1
|
|
390
|
+
conditions.first
|
|
391
|
+
else
|
|
392
|
+
Arel::Nodes::And.new(conditions)
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
end
|