clickhouse-ruby 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +74 -1
  3. data/README.md +165 -79
  4. data/lib/clickhouse_ruby/active_record/arel_visitor.rb +205 -76
  5. data/lib/clickhouse_ruby/active_record/connection_adapter.rb +103 -98
  6. data/lib/clickhouse_ruby/active_record/railtie.rb +20 -15
  7. data/lib/clickhouse_ruby/active_record/relation_extensions.rb +398 -0
  8. data/lib/clickhouse_ruby/active_record/schema_statements.rb +90 -104
  9. data/lib/clickhouse_ruby/active_record.rb +24 -10
  10. data/lib/clickhouse_ruby/client.rb +181 -74
  11. data/lib/clickhouse_ruby/configuration.rb +51 -10
  12. data/lib/clickhouse_ruby/connection.rb +180 -64
  13. data/lib/clickhouse_ruby/connection_pool.rb +25 -19
  14. data/lib/clickhouse_ruby/errors.rb +13 -1
  15. data/lib/clickhouse_ruby/result.rb +11 -16
  16. data/lib/clickhouse_ruby/retry_handler.rb +172 -0
  17. data/lib/clickhouse_ruby/streaming_result.rb +309 -0
  18. data/lib/clickhouse_ruby/types/array.rb +11 -64
  19. data/lib/clickhouse_ruby/types/base.rb +59 -0
  20. data/lib/clickhouse_ruby/types/boolean.rb +28 -25
  21. data/lib/clickhouse_ruby/types/date_time.rb +10 -27
  22. data/lib/clickhouse_ruby/types/decimal.rb +173 -0
  23. data/lib/clickhouse_ruby/types/enum.rb +262 -0
  24. data/lib/clickhouse_ruby/types/float.rb +14 -28
  25. data/lib/clickhouse_ruby/types/integer.rb +21 -43
  26. data/lib/clickhouse_ruby/types/low_cardinality.rb +1 -1
  27. data/lib/clickhouse_ruby/types/map.rb +21 -36
  28. data/lib/clickhouse_ruby/types/null_safe.rb +81 -0
  29. data/lib/clickhouse_ruby/types/nullable.rb +2 -2
  30. data/lib/clickhouse_ruby/types/parser.rb +28 -18
  31. data/lib/clickhouse_ruby/types/registry.rb +40 -29
  32. data/lib/clickhouse_ruby/types/string.rb +9 -13
  33. data/lib/clickhouse_ruby/types/string_parser.rb +135 -0
  34. data/lib/clickhouse_ruby/types/tuple.rb +11 -68
  35. data/lib/clickhouse_ruby/types/uuid.rb +15 -22
  36. data/lib/clickhouse_ruby/types.rb +19 -15
  37. data/lib/clickhouse_ruby/version.rb +1 -1
  38. data/lib/clickhouse_ruby.rb +11 -11
  39. metadata +41 -6
@@ -0,0 +1,398 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickhouseRuby
4
+ module ActiveRecord
5
+ # Extensions to ActiveRecord::Relation for ClickHouse-specific query methods
6
+ #
7
+ # This module adds support for ClickHouse-specific clauses that aren't part of standard
8
+ # ActiveRecord, such as PREWHERE, FINAL, SAMPLE, and SETTINGS.
9
+ #
10
+ # These methods are mixed into ActiveRecord::Relation via the ConnectionAdapter
11
+ # when a ClickHouse connection is established.
12
+ #
13
+ # @example PREWHERE usage
14
+ # Event.prewhere(date: Date.today).where(status: 'active')
15
+ # # SELECT * FROM events PREWHERE date = '2024-01-31' WHERE status = 'active'
16
+ #
17
+ module RelationExtensions
18
+ extend ::ActiveSupport::Concern
19
+
20
+ # PREWHERE clause support
21
+ #
22
+ # Filters data at an earlier stage than WHERE for better query optimization.
23
+ # ClickHouse reads only the columns needed for PREWHERE, applies the filter,
24
+ # then reads remaining columns for WHERE processing.
25
+ #
26
+ # Can be called with:
27
+ # - Hash conditions: `prewhere(active: true, status: 'done')`
28
+ # - String conditions: `prewhere('date > ?', date)`
29
+ # - Arel nodes: `prewhere(arel_expr)`
30
+ # - Chain syntax: `prewhere.not(deleted: true)`
31
+ #
32
+ # @param opts [Hash, String, Arel::Nodes::Node, Symbol] the conditions
33
+ # @param rest [Array] bind parameters for string conditions
34
+ # @return [ActiveRecord::Relation] self for chaining, or PrewhereChain if opts == :chain
35
+ def prewhere(opts = :chain, *rest)
36
+ case opts
37
+ when :chain
38
+ PrewhereChain.new(spawn)
39
+ when nil, false
40
+ self
41
+ else
42
+ spawn.prewhere!(opts, *rest)
43
+ end
44
+ end
45
+
46
+ # Internal method to apply prewhere conditions
47
+ #
48
+ # @param opts [Hash, String, Arel::Nodes::Node] the conditions
49
+ # @param rest [Array] bind parameters
50
+ # @return [ActiveRecord::Relation] self
51
+ def prewhere!(opts, *rest)
52
+ @prewhere_values ||= []
53
+
54
+ case opts
55
+ when String
56
+ @prewhere_values << Arel.sql(sanitize_sql_array([opts, *rest]))
57
+ when Hash
58
+ opts.each do |key, value|
59
+ @prewhere_values << build_prewhere_condition(key, value)
60
+ end
61
+ when Arel::Nodes::Node
62
+ @prewhere_values << opts
63
+ end
64
+
65
+ self
66
+ end
67
+
68
+ # Get the accumulated prewhere conditions
69
+ #
70
+ # @return [Array] array of prewhere condition nodes
71
+ def prewhere_values
72
+ @prewhere_values || []
73
+ end
74
+
75
+ private
76
+
77
+ # Build a prewhere condition from a column and value
78
+ #
79
+ # Handles different value types:
80
+ # - nil: column IS NULL
81
+ # - Array: column IN (values)
82
+ # - Range: column BETWEEN start AND end
83
+ # - Other: column = value
84
+ #
85
+ # @param column [Symbol, String] the column name
86
+ # @param value [Object] the value to filter by
87
+ # @return [Arel::Nodes::Node] the condition node
88
+ def build_prewhere_condition(column, value)
89
+ arel_table = self.arel_table
90
+
91
+ case value
92
+ when nil
93
+ arel_table[column].eq(nil)
94
+ when Array
95
+ arel_table[column].in(value)
96
+ when Range
97
+ arel_table[column].between(value)
98
+ else
99
+ arel_table[column].eq(value)
100
+ end
101
+ end
102
+
103
+ # Normalize settings hash
104
+ #
105
+ # Converts:
106
+ # - Keys to strings
107
+ # - Ruby true/false to 1/0
108
+ #
109
+ # @param opts [Hash] the raw settings
110
+ # @return [Hash] normalized settings
111
+ def normalize_settings(opts)
112
+ opts.transform_keys(&:to_s).transform_values do |value|
113
+ case value
114
+ when true then 1
115
+ when false then 0
116
+ else value
117
+ end
118
+ end
119
+ end
120
+
121
+ # Format a setting value for SQL
122
+ #
123
+ # Converts:
124
+ # - Strings: wrapped in single quotes
125
+ # - Other values: converted via to_s
126
+ #
127
+ # @param value [Object] the setting value
128
+ # @return [String] the formatted value
129
+ def format_setting_value(value)
130
+ case value
131
+ when String
132
+ "'#{value}'"
133
+ else
134
+ value.to_s
135
+ end
136
+ end
137
+
138
+ # SAMPLE clause support
139
+ #
140
+ # Queries a subset of data for approximate results with faster execution.
141
+ # SAMPLE allows you to explore large datasets or run approximate aggregations.
142
+ #
143
+ # Syntax variants:
144
+ # - Fractional: `sample(0.1)` for 10% of data
145
+ # - Absolute: `sample(10000)` for at least 10,000 rows
146
+ # - With offset: `sample(0.1, offset: 0.5)` for reproducible subsets
147
+ #
148
+ # Can be called with:
149
+ # - Float between 0 and 1: `sample(0.1)` (fraction of data)
150
+ # - Integer >= 1: `sample(10000)` (at least n rows)
151
+ # - With offset: `sample(0.1, offset: 0.5)` (fraction with offset)
152
+ #
153
+ # Important: Table must be created with SAMPLE BY clause!
154
+ # ```sql
155
+ # CREATE TABLE events (id UInt64, ...) ENGINE = MergeTree()
156
+ # SAMPLE BY intHash32(id)
157
+ # ORDER BY id
158
+ # ```
159
+ #
160
+ # @param ratio_or_rows [Float, Integer] sampling ratio (0 < x <= 1) or min row count
161
+ # @param offset [Float, Integer, nil] optional offset for deterministic subsets
162
+ # @return [ActiveRecord::Relation] self for chaining
163
+ #
164
+ # @example Fractional sampling (10% of data)
165
+ # Event.sample(0.1).limit(100)
166
+ # # SELECT * FROM events SAMPLE 0.1 LIMIT 100
167
+ #
168
+ # @example Absolute sampling (at least 10,000 rows)
169
+ # Event.sample(10000).average(:amount)
170
+ # # SELECT avg(amount) FROM events SAMPLE 10000
171
+ #
172
+ # @example Sample with offset (reproducible subsets)
173
+ # Event.sample(0.1, offset: 0.5).where(status: 'done')
174
+ # # SELECT * FROM events SAMPLE 0.1 OFFSET 0.5 WHERE status = 'done'
175
+ def sample(ratio_or_rows, offset: nil)
176
+ spawn.sample!(ratio_or_rows, offset: offset)
177
+ end
178
+
179
+ # Internal method to apply sample
180
+ #
181
+ # @param ratio_or_rows [Float, Integer] sampling ratio or min row count
182
+ # @param offset [Float, Integer, nil] optional offset
183
+ # @return [ActiveRecord::Relation] self
184
+ def sample!(ratio_or_rows, offset: nil)
185
+ @sample_value = ratio_or_rows
186
+ @sample_offset = offset
187
+ self
188
+ end
189
+
190
+ # Get the sample value
191
+ #
192
+ # @return [Float, Integer, nil] the sample ratio or row count
193
+ def sample_value
194
+ @sample_value
195
+ end
196
+
197
+ # Get the sample offset
198
+ #
199
+ # @return [Float, Integer, nil] the sample offset
200
+ def sample_offset
201
+ @sample_offset
202
+ end
203
+
204
+ # SETTINGS clause support
205
+ #
206
+ # Per-query configuration for ClickHouse execution parameters
207
+ #
208
+ # Can be called with:
209
+ # - Hash settings: `settings(max_execution_time: 60, max_threads: 4)`
210
+ # - Multiple calls (settings merge): `settings(max_threads: 4).settings(async_insert: true)`
211
+ #
212
+ # @param opts [Hash] the settings as key-value pairs
213
+ # @return [ActiveRecord::Relation] a new relation with settings applied
214
+ #
215
+ # @example
216
+ # Event.settings(max_execution_time: 60)
217
+ # Event.settings(max_threads: 4, async_insert: true)
218
+ # Event.settings(max_execution_time: 60).where(active: true)
219
+ def settings(opts = {})
220
+ spawn.settings!(opts)
221
+ end
222
+
223
+ # Internal method to apply settings (mutating)
224
+ #
225
+ # @param opts [Hash] the settings as key-value pairs
226
+ # @return [ActiveRecord::Relation] self
227
+ #
228
+ # @private
229
+ def settings!(opts)
230
+ @query_settings ||= {}
231
+ @query_settings.merge!(normalize_settings(opts))
232
+ self
233
+ end
234
+
235
+ # Get current query settings
236
+ #
237
+ # @return [Hash] the query settings
238
+ #
239
+ # @private
240
+ def query_settings
241
+ @query_settings || {}
242
+ end
243
+
244
+ # Get SETTINGS clause for SQL generation
245
+ #
246
+ # @return [String, nil] the SETTINGS clause or nil if no settings
247
+ #
248
+ # @private
249
+ def settings_clause
250
+ return nil if query_settings.empty?
251
+
252
+ pairs = query_settings.map do |key, value|
253
+ "#{key} = #{format_setting_value(value)}"
254
+ end
255
+
256
+ "SETTINGS #{pairs.join(", ")}"
257
+ end
258
+
259
+ # FINAL modifier support
260
+ #
261
+ # Forces ClickHouse to merge data at query time for deduplication.
262
+ # Applies to ReplacingMergeTree, CollapsingMergeTree, SummingMergeTree,
263
+ # AggregatingMergeTree, and VersionedCollapsingMergeTree.
264
+ #
265
+ # Warning: FINAL forces merge during query execution, which can be 2-10x slower.
266
+ # Use only when accuracy is critical; otherwise accept eventual consistency.
267
+ #
268
+ # When combined with prewhere, automatically adds required settings:
269
+ # - optimize_move_to_prewhere = 1
270
+ # - optimize_move_to_prewhere_if_final = 1
271
+ #
272
+ # @return [ActiveRecord::Relation] a new relation with FINAL modifier applied
273
+ #
274
+ # @example Basic FINAL usage
275
+ # User.final
276
+ # # SELECT * FROM users FINAL
277
+ #
278
+ # @example FINAL with WHERE
279
+ # User.final.where(active: true)
280
+ # # SELECT * FROM users FINAL WHERE active = 1
281
+ #
282
+ # @example FINAL with PREWHERE (auto-adds settings)
283
+ # User.final.prewhere(created_at: Date.today..)
284
+ # # SELECT * FROM users FINAL PREWHERE ...
285
+ # # SETTINGS optimize_move_to_prewhere = 1, optimize_move_to_prewhere_if_final = 1
286
+ def final
287
+ spawn.final!
288
+ end
289
+
290
+ # Internal method to apply FINAL modifier (mutating)
291
+ #
292
+ # @return [ActiveRecord::Relation] self
293
+ #
294
+ # @private
295
+ def final!
296
+ @use_final = true
297
+
298
+ # Auto-add required settings when combining with prewhere
299
+ if prewhere_values.any?
300
+ @query_settings ||= {}
301
+ @query_settings["optimize_move_to_prewhere"] = 1
302
+ @query_settings["optimize_move_to_prewhere_if_final"] = 1
303
+ end
304
+
305
+ self
306
+ end
307
+
308
+ # Check if FINAL modifier is applied
309
+ #
310
+ # @return [Boolean] true if FINAL modifier is active
311
+ #
312
+ # @private
313
+ def final?
314
+ @use_final || false
315
+ end
316
+
317
+ # Remove FINAL modifier from the relation
318
+ #
319
+ # Useful for building subqueries that shouldn't include FINAL.
320
+ #
321
+ # @return [ActiveRecord::Relation] a new relation without FINAL
322
+ #
323
+ # @example
324
+ # relation = User.final.where(active: true)
325
+ # subquery = relation.unscope_final
326
+ # # SELECT * FROM users WHERE active = 1 (no FINAL)
327
+ def unscope_final
328
+ spawn.tap { |r| r.instance_variable_set(:@use_final, false) }
329
+ end
330
+
331
+ # Override build_arel to attach ClickHouse-specific state to the Arel AST
332
+ #
333
+ # This allows the ArelVisitor to access FINAL, SAMPLE, PREWHERE, and SETTINGS
334
+ # state when generating SQL.
335
+ #
336
+ # @return [Arel::SelectManager]
337
+ def build_arel(*)
338
+ arel = super
339
+
340
+ # Attach ClickHouse-specific state to the Arel statement
341
+ if arel.ast.is_a?(Arel::Nodes::SelectStatement)
342
+ arel.ast.instance_variable_set(:@clickhouse_final, @use_final)
343
+ arel.ast.instance_variable_set(:@clickhouse_sample_value, @sample_value)
344
+ arel.ast.instance_variable_set(:@clickhouse_sample_offset, @sample_offset)
345
+ arel.ast.instance_variable_set(:@clickhouse_prewhere_values, @prewhere_values)
346
+ arel.ast.instance_variable_set(:@clickhouse_query_settings, @query_settings)
347
+ end
348
+
349
+ arel
350
+ end
351
+
352
+ # Chain object for prewhere.not syntax
353
+ #
354
+ # Allows negation of prewhere conditions:
355
+ # Model.prewhere.not(deleted: true)
356
+ # # PREWHERE NOT(deleted = 1)
357
+ #
358
+ class PrewhereChain
359
+ def initialize(relation)
360
+ @relation = relation
361
+ end
362
+
363
+ # Negate the prewhere conditions
364
+ #
365
+ # @param opts [Hash, String, Arel::Nodes::Node] the conditions to negate
366
+ # @param rest [Array] bind parameters
367
+ # @return [ActiveRecord::Relation] the relation with negated prewhere
368
+ def not(opts, *rest)
369
+ condition_to_negate = case opts
370
+ when Hash
371
+ build_combined_condition(opts)
372
+ when String
373
+ Arel.sql(@relation.sanitize_sql_array([opts, *rest]))
374
+ else
375
+ opts
376
+ end
377
+
378
+ negated = Arel::Nodes::Not.new(condition_to_negate)
379
+ @relation.prewhere!(negated)
380
+ end
381
+
382
+ private
383
+
384
+ def build_combined_condition(opts)
385
+ conditions = opts.map do |key, value|
386
+ @relation.send(:build_prewhere_condition, key, value)
387
+ end
388
+
389
+ if conditions.size == 1
390
+ conditions.first
391
+ else
392
+ Arel::Nodes::And.new(conditions)
393
+ end
394
+ end
395
+ end
396
+ end
397
+ end
398
+ end