clickhouse-ruby 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +74 -1
  3. data/README.md +165 -79
  4. data/lib/clickhouse_ruby/active_record/arel_visitor.rb +205 -76
  5. data/lib/clickhouse_ruby/active_record/connection_adapter.rb +103 -98
  6. data/lib/clickhouse_ruby/active_record/railtie.rb +20 -15
  7. data/lib/clickhouse_ruby/active_record/relation_extensions.rb +398 -0
  8. data/lib/clickhouse_ruby/active_record/schema_statements.rb +90 -104
  9. data/lib/clickhouse_ruby/active_record.rb +24 -10
  10. data/lib/clickhouse_ruby/client.rb +181 -74
  11. data/lib/clickhouse_ruby/configuration.rb +51 -10
  12. data/lib/clickhouse_ruby/connection.rb +180 -64
  13. data/lib/clickhouse_ruby/connection_pool.rb +25 -19
  14. data/lib/clickhouse_ruby/errors.rb +13 -1
  15. data/lib/clickhouse_ruby/result.rb +11 -16
  16. data/lib/clickhouse_ruby/retry_handler.rb +172 -0
  17. data/lib/clickhouse_ruby/streaming_result.rb +309 -0
  18. data/lib/clickhouse_ruby/types/array.rb +11 -64
  19. data/lib/clickhouse_ruby/types/base.rb +59 -0
  20. data/lib/clickhouse_ruby/types/boolean.rb +28 -25
  21. data/lib/clickhouse_ruby/types/date_time.rb +10 -27
  22. data/lib/clickhouse_ruby/types/decimal.rb +173 -0
  23. data/lib/clickhouse_ruby/types/enum.rb +262 -0
  24. data/lib/clickhouse_ruby/types/float.rb +14 -28
  25. data/lib/clickhouse_ruby/types/integer.rb +21 -43
  26. data/lib/clickhouse_ruby/types/low_cardinality.rb +1 -1
  27. data/lib/clickhouse_ruby/types/map.rb +21 -36
  28. data/lib/clickhouse_ruby/types/null_safe.rb +81 -0
  29. data/lib/clickhouse_ruby/types/nullable.rb +2 -2
  30. data/lib/clickhouse_ruby/types/parser.rb +28 -18
  31. data/lib/clickhouse_ruby/types/registry.rb +40 -29
  32. data/lib/clickhouse_ruby/types/string.rb +9 -13
  33. data/lib/clickhouse_ruby/types/string_parser.rb +135 -0
  34. data/lib/clickhouse_ruby/types/tuple.rb +11 -68
  35. data/lib/clickhouse_ruby/types/uuid.rb +15 -22
  36. data/lib/clickhouse_ruby/types.rb +19 -15
  37. data/lib/clickhouse_ruby/version.rb +1 -1
  38. data/lib/clickhouse_ruby.rb +11 -11
  39. metadata +41 -6
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'json'
4
- require 'uri'
5
- require 'bigdecimal'
3
+ require "json"
4
+ require "uri"
5
+ require "bigdecimal"
6
6
 
7
7
  module ClickhouseRuby
8
8
  # Main HTTP client for ClickHouse communication
@@ -40,10 +40,10 @@ module ClickhouseRuby
40
40
  #
41
41
  class Client
42
42
  # Default response format for queries
43
- DEFAULT_FORMAT = 'JSONCompact'
43
+ DEFAULT_FORMAT = "JSONCompact"
44
44
 
45
45
  # Format for bulk inserts (5x faster than VALUES)
46
- INSERT_FORMAT = 'JSONEachRow'
46
+ INSERT_FORMAT = "JSONEachRow"
47
47
 
48
48
  # @return [Configuration] the client configuration
49
49
  attr_reader :config
@@ -51,6 +51,9 @@ module ClickhouseRuby
51
51
  # @return [ConnectionPool] the connection pool
52
52
  attr_reader :pool
53
53
 
54
+ # @return [RetryHandler] the retry handler
55
+ attr_reader :retry_handler
56
+
54
57
  # Creates a new Client
55
58
  #
56
59
  # @param config [Configuration] connection configuration
@@ -61,6 +64,13 @@ module ClickhouseRuby
61
64
  @pool = ConnectionPool.new(config)
62
65
  @logger = config.logger
63
66
  @default_settings = config.default_settings || {}
67
+ @retry_handler = RetryHandler.new(
68
+ max_attempts: config.max_retries,
69
+ initial_backoff: config.initial_backoff,
70
+ max_backoff: config.max_backoff,
71
+ multiplier: config.backoff_multiplier,
72
+ jitter: config.retry_jitter,
73
+ )
64
74
  end
65
75
 
66
76
  # Executes a SQL query and returns results
@@ -82,17 +92,9 @@ module ClickhouseRuby
82
92
  # settings: { max_rows_to_read: 1_000_000 }
83
93
  # )
84
94
  def execute(sql, settings: {}, format: DEFAULT_FORMAT)
85
- # Build the query with format
86
- query_with_format = "#{sql.strip} FORMAT #{format}"
87
-
88
- # Build query parameters
89
- params = build_query_params(settings)
90
-
91
- # Execute via connection pool
92
- response = execute_request(query_with_format, params)
93
-
94
- # Parse response based on format
95
- parse_response(response, sql, format)
95
+ @retry_handler.with_retry(idempotent: true) do
96
+ execute_internal(sql, settings: settings, format: format)
97
+ end
96
98
  end
97
99
 
98
100
  # Executes a command (INSERT, CREATE, DROP, etc.) that doesn't return data
@@ -137,41 +139,12 @@ module ClickhouseRuby
137
139
  # { id: 1, name: 'click', extra: 'ignored' },
138
140
  # ], columns: ['id', 'name'])
139
141
  def insert(table, rows, columns: nil, settings: {}, format: :json_each_row)
140
- raise ArgumentError, 'rows cannot be empty' if rows.nil? || rows.empty?
141
-
142
- # Determine columns from first row if not specified
143
- columns ||= rows.first.keys.map(&:to_s)
144
-
145
- # Build INSERT statement
146
- columns_str = columns.map { |c| quote_identifier(c) }.join(', ')
147
- sql = "INSERT INTO #{quote_identifier(table)} (#{columns_str}) FORMAT #{INSERT_FORMAT}"
148
-
149
- # Build JSON body
150
- body = rows.map do |row|
151
- row_data = {}
152
- columns.each do |col|
153
- key = col.to_s
154
- value = row[col] || row[col.to_sym]
155
- row_data[key] = serialize_value(value)
156
- end
157
- JSON.generate(row_data)
158
- end.join("\n")
142
+ raise ArgumentError, "rows cannot be empty" if rows.nil? || rows.empty?
159
143
 
160
- # Build params and execute
161
- params = build_query_params(settings)
162
- path = build_path(params)
163
-
164
- @pool.with_connection do |conn|
165
- log_query(sql) if @logger
166
-
167
- response = conn.post("#{path}&query=#{URI.encode_www_form_component(sql)}", body, {
168
- 'Content-Type' => 'application/json'
169
- })
170
-
171
- handle_response(response, sql)
144
+ @retry_handler.with_retry(idempotent: false) do |query_id|
145
+ settings_with_id = settings.merge(query_id: query_id)
146
+ insert_internal(table, rows, columns: columns, settings: settings_with_id, format: format)
172
147
  end
173
-
174
- true
175
148
  end
176
149
 
177
150
  # Checks if the ClickHouse server is reachable
@@ -179,7 +152,9 @@ module ClickhouseRuby
179
152
  # @return [Boolean] true if server responds to ping
180
153
  def ping
181
154
  @pool.with_connection(&:ping)
182
- rescue StandardError
155
+ rescue ClickhouseRuby::ConnectionError, ClickhouseRuby::ConnectionTimeout,
156
+ ClickhouseRuby::PoolTimeout, SystemCallError, SocketError,
157
+ Net::OpenTimeout, Net::ReadTimeout
183
158
  false
184
159
  end
185
160
 
@@ -188,8 +163,8 @@ module ClickhouseRuby
188
163
  # @return [String] version string
189
164
  # @raise [QueryError] if query fails
190
165
  def server_version
191
- result = execute('SELECT version() AS version')
192
- result.first['version']
166
+ result = execute("SELECT version() AS version")
167
+ result.first["version"]
193
168
  end
194
169
 
195
170
  # Closes all connections in the pool
@@ -209,17 +184,149 @@ module ClickhouseRuby
209
184
  @pool.stats
210
185
  end
211
186
 
187
+ # Returns a streaming result for memory-efficient processing
188
+ #
189
+ # Useful for queries that return large result sets. Results are parsed
190
+ # line-by-line as they arrive from the server, keeping memory usage constant.
191
+ #
192
+ # @param sql [String] the SQL query to execute
193
+ # @param settings [Hash] ClickHouse settings for this query
194
+ # @return [StreamingResult] the streaming result
195
+ #
196
+ # @example
197
+ # result = client.stream_execute('SELECT * FROM huge_table')
198
+ # result.each { |row| process(row) }
199
+ #
200
+ # @example Lazy enumeration
201
+ # client.stream_execute('SELECT * FROM huge_table')
202
+ # .lazy
203
+ # .select { |row| row['active'] == 1 }
204
+ # .take(100)
205
+ # .to_a
206
+ def stream_execute(sql, settings: {})
207
+ # Create dedicated connection (not from pool)
208
+ connection = Connection.new(**@config.to_connection_options)
209
+
210
+ StreamingResult.new(
211
+ connection,
212
+ sql,
213
+ compression: @config.compression,
214
+ )
215
+ end
216
+
217
+ # Convenience method for iterating over rows one at a time
218
+ #
219
+ # Equivalent to stream_execute(sql).each(&block)
220
+ #
221
+ # @param sql [String] the SQL query to execute
222
+ # @param settings [Hash] ClickHouse settings
223
+ # @yield [Hash] each row
224
+ # @return [Enumerator] if no block given, otherwise nil
225
+ #
226
+ # @example
227
+ # client.each_row('SELECT * FROM events') do |row|
228
+ # process(row)
229
+ # end
230
+ def each_row(sql, settings: {}, &block)
231
+ stream_execute(sql, settings: settings).each(&block)
232
+ end
233
+
234
+ # Convenience method for batch processing
235
+ #
236
+ # Equivalent to stream_execute(sql).each_batch(size: batch_size, &block)
237
+ #
238
+ # @param sql [String] the SQL query to execute
239
+ # @param batch_size [Integer] number of rows per batch
240
+ # @param settings [Hash] ClickHouse settings
241
+ # @yield [Array<Hash>] each batch of rows
242
+ # @return [Enumerator] if no block given, otherwise nil
243
+ #
244
+ # @example
245
+ # client.each_batch('SELECT * FROM events', batch_size: 500) do |batch|
246
+ # insert_into_cache(batch)
247
+ # end
248
+ def each_batch(sql, batch_size: 1000, settings: {}, &block)
249
+ stream_execute(sql, settings: settings).each_batch(size: batch_size, &block)
250
+ end
251
+
212
252
  private
213
253
 
254
+ # Internal execute without retry wrapper
255
+ #
256
+ # @param sql [String] the SQL query to execute
257
+ # @param settings [Hash] ClickHouse settings for this query
258
+ # @param format [String] response format (default: JSONCompact)
259
+ # @return [Result] query results
260
+ def execute_internal(sql, settings: {}, format: DEFAULT_FORMAT)
261
+ # Build the query with format
262
+ query_with_format = "#{sql.strip} FORMAT #{format}"
263
+
264
+ # Build query parameters
265
+ params = build_query_params(settings)
266
+
267
+ # Execute via connection pool
268
+ response = execute_request(query_with_format, params)
269
+
270
+ # Parse response based on format
271
+ parse_response(response, sql, format)
272
+ end
273
+
274
+ # Internal insert without retry wrapper
275
+ #
276
+ # @param table [String] the table name
277
+ # @param rows [Array<Hash>] array of row hashes
278
+ # @param columns [Array<String>, nil] column names
279
+ # @param settings [Hash] ClickHouse settings (may include query_id)
280
+ # @param format [Symbol] insert format
281
+ # @return [Boolean] true if successful
282
+ def insert_internal(table, rows, columns: nil, settings: {}, format: :json_each_row)
283
+ # Determine columns from first row if not specified
284
+ columns ||= rows.first.keys.map(&:to_s)
285
+
286
+ # Build INSERT statement
287
+ columns_str = columns.map { |c| quote_identifier(c) }.join(", ")
288
+ sql = "INSERT INTO #{quote_identifier(table)} (#{columns_str}) FORMAT #{INSERT_FORMAT}"
289
+
290
+ # Build JSON body
291
+ body = rows.map do |row|
292
+ row_data = {}
293
+ columns.each do |col|
294
+ key = col.to_s
295
+ value = row[col] || row[col.to_sym]
296
+ row_data[key] = serialize_value(value)
297
+ end
298
+ JSON.generate(row_data)
299
+ end.join("\n")
300
+
301
+ # Build params and execute
302
+ params = build_query_params(settings)
303
+ path = build_path(params)
304
+
305
+ @pool.with_connection do |conn|
306
+ log_query(sql) if @logger
307
+
308
+ response = conn.post("#{path}&query=#{URI.encode_www_form_component(sql)}", body, {
309
+ "Content-Type" => "application/json",
310
+ },)
311
+
312
+ handle_response(response, sql)
313
+ end
314
+
315
+ true
316
+ end
317
+
214
318
  # Builds query parameters including database and settings
215
319
  #
216
320
  # @param settings [Hash] query-specific settings
217
321
  # @return [Hash] all query parameters
218
322
  def build_query_params(settings = {})
219
323
  params = {
220
- 'database' => @config.database
324
+ "database" => @config.database,
221
325
  }
222
326
 
327
+ # Add compression parameter if enabled
328
+ params["enable_http_compression"] = "1" if @config.compression_enabled?
329
+
223
330
  # Merge default settings and query-specific settings
224
331
  all_settings = @default_settings.merge(settings)
225
332
  all_settings.each do |key, value|
@@ -234,7 +341,7 @@ module ClickhouseRuby
234
341
  # @param params [Hash] query parameters
235
342
  # @return [String] the path with query string
236
343
  def build_path(params)
237
- query_string = params.map { |k, v| "#{k}=#{URI.encode_www_form_component(v)}" }.join('&')
344
+ query_string = params.map { |k, v| "#{k}=#{URI.encode_www_form_component(v)}" }.join("&")
238
345
  "/?#{query_string}"
239
346
  end
240
347
 
@@ -268,9 +375,9 @@ module ClickhouseRuby
268
375
  # @raise [QueryError] if response indicates an error
269
376
  def handle_response(response, sql)
270
377
  # CRITICAL: Check status FIRST - never silently ignore errors
271
- unless response.code == '200'
272
- raise_clickhouse_error(response, sql)
273
- end
378
+ return if response.code == "200"
379
+
380
+ raise_clickhouse_error(response, sql)
274
381
 
275
382
  # Response is successful - caller can now safely parse body
276
383
  end
@@ -284,7 +391,7 @@ module ClickhouseRuby
284
391
  # @param sql [String] the SQL that failed
285
392
  # @raise [QueryError] always raises
286
393
  def raise_clickhouse_error(response, sql)
287
- body = response.body || ''
394
+ body = response.body || ""
288
395
  code = extract_error_code(body)
289
396
  message = extract_error_message(body)
290
397
 
@@ -297,7 +404,7 @@ module ClickhouseRuby
297
404
  message,
298
405
  code: code,
299
406
  http_status: response.code,
300
- sql: truncate_sql(sql)
407
+ sql: truncate_sql(sql),
301
408
  )
302
409
  end
303
410
 
@@ -320,9 +427,9 @@ module ClickhouseRuby
320
427
  # ClickHouse error format: "Code: 60. DB::Exception: Table ... doesn't exist."
321
428
  # Try to extract just the meaningful part
322
429
  if body =~ /DB::Exception:\s*(.+?)(?:\s*\(version|$)/m
323
- $1.strip
430
+ ::Regexp.last_match(1).strip
324
431
  else
325
- body.strip.empty? ? 'Unknown ClickHouse error' : body.strip
432
+ body.strip.empty? ? "Unknown ClickHouse error" : body.strip
326
433
  end
327
434
  end
328
435
 
@@ -339,13 +446,13 @@ module ClickhouseRuby
339
446
  return Result.empty if body.nil? || body.strip.empty?
340
447
 
341
448
  case format
342
- when 'JSONCompact'
449
+ when "JSONCompact"
343
450
  parse_json_compact(body, sql)
344
- when 'JSON'
451
+ when "JSON"
345
452
  parse_json(body, sql)
346
453
  else
347
454
  # For unknown formats, return raw body wrapped in result
348
- Result.new(columns: ['result'], types: ['String'], data: [[body]])
455
+ Result.new(columns: ["result"], types: ["String"], data: [[body]])
349
456
  end
350
457
  end
351
458
 
@@ -367,10 +474,10 @@ module ClickhouseRuby
367
474
  def parse_json(body, sql)
368
475
  data = parse_json_body(body, sql)
369
476
 
370
- meta = data['meta'] || []
371
- columns = meta.map { |m| m['name'] }
372
- types = meta.map { |m| m['type'] }
373
- rows = data['data'] || []
477
+ meta = data["meta"] || []
478
+ columns = meta.map { |m| m["name"] }
479
+ types = meta.map { |m| m["type"] }
480
+ rows = data["data"] || []
374
481
 
375
482
  # JSON format returns rows as objects, convert to arrays
376
483
  row_arrays = rows.map { |row| columns.map { |col| row[col] } }
@@ -379,7 +486,7 @@ module ClickhouseRuby
379
486
  columns: columns,
380
487
  types: types,
381
488
  data: row_arrays,
382
- statistics: data['statistics'] || {}
489
+ statistics: data["statistics"] || {},
383
490
  )
384
491
  end
385
492
 
@@ -395,7 +502,7 @@ module ClickhouseRuby
395
502
  raise QueryError.new(
396
503
  "Failed to parse ClickHouse response: #{e.message}",
397
504
  sql: truncate_sql(sql),
398
- original_error: e
505
+ original_error: e,
399
506
  )
400
507
  end
401
508
 
@@ -405,7 +512,7 @@ module ClickhouseRuby
405
512
  # @return [String] quoted identifier
406
513
  def quote_identifier(identifier)
407
514
  # ClickHouse uses backticks for identifiers
408
- "`#{identifier.to_s.gsub('`', '``')}`"
515
+ "`#{identifier.to_s.gsub("`", "``")}`"
409
516
  end
410
517
 
411
518
  # Serializes a Ruby value for JSON insertion
@@ -416,9 +523,9 @@ module ClickhouseRuby
416
523
  case value
417
524
  when Time, DateTime
418
525
  # ClickHouse expects ISO8601 format for DateTime
419
- value.strftime('%Y-%m-%d %H:%M:%S')
526
+ value.strftime("%Y-%m-%d %H:%M:%S")
420
527
  when Date
421
- value.strftime('%Y-%m-%d')
528
+ value.strftime("%Y-%m-%d")
422
529
  when BigDecimal
423
530
  value.to_f
424
531
  when Symbol
@@ -464,7 +571,7 @@ module ClickhouseRuby
464
571
 
465
572
  @logger.error(
466
573
  "[ClickhouseRuby] ClickHouse error: #{message} " \
467
- "(code: #{code || 'unknown'}, http: #{http_status}, sql: #{truncate_sql(sql, 200)})"
574
+ "(code: #{code || "unknown"}, http: #{http_status}, sql: #{truncate_sql(sql, 200)})",
468
575
  )
469
576
  end
470
577
  end
@@ -59,15 +59,36 @@ module ClickhouseRuby
59
59
  # @return [Hash] default ClickHouse settings for all queries
60
60
  attr_accessor :default_settings
61
61
 
62
+ # @return [String, nil] compression algorithm ('gzip' or nil to disable)
63
+ attr_accessor :compression
64
+
65
+ # @return [Integer] minimum body size in bytes to compress (default: 1024)
66
+ attr_accessor :compression_threshold
67
+
68
+ # @return [Integer] maximum number of retry attempts (default: 3)
69
+ attr_accessor :max_retries
70
+
71
+ # @return [Float] initial backoff delay in seconds (default: 1.0)
72
+ attr_accessor :initial_backoff
73
+
74
+ # @return [Float] maximum backoff delay in seconds (default: 120.0)
75
+ attr_accessor :max_backoff
76
+
77
+ # @return [Float] exponential backoff multiplier (default: 1.6)
78
+ attr_accessor :backoff_multiplier
79
+
80
+ # @return [Symbol] jitter strategy: :full, :equal, or :none (default: :equal)
81
+ attr_accessor :retry_jitter
82
+
62
83
  # Creates a new Configuration with sensible defaults
63
84
  def initialize
64
- @host = 'localhost'
85
+ @host = "localhost"
65
86
  @port = 8123
66
- @database = 'default'
87
+ @database = "default"
67
88
  @username = nil
68
89
  @password = nil
69
90
  @ssl = false
70
- @ssl_verify = true # SECURITY: Verify certificates by default
91
+ @ssl_verify = true # SECURITY: Verify certificates by default
71
92
  @ssl_ca_path = nil
72
93
  @connect_timeout = 10
73
94
  @read_timeout = 60
@@ -77,13 +98,20 @@ module ClickhouseRuby
77
98
  @logger = nil
78
99
  @log_level = :info
79
100
  @default_settings = {}
101
+ @compression = nil
102
+ @compression_threshold = 1024
103
+ @max_retries = 3
104
+ @initial_backoff = 1.0
105
+ @max_backoff = 120.0
106
+ @backoff_multiplier = 1.6
107
+ @retry_jitter = :equal
80
108
  end
81
109
 
82
110
  # Returns the base URL for HTTP connections
83
111
  #
84
112
  # @return [String] the base URL
85
113
  def base_url
86
- scheme = ssl ? 'https' : 'http'
114
+ scheme = ssl ? "https" : "http"
87
115
  "#{scheme}://#{host}:#{port}"
88
116
  end
89
117
 
@@ -98,6 +126,13 @@ module ClickhouseRuby
98
126
  [8443, 443].include?(port)
99
127
  end
100
128
 
129
+ # Returns whether compression is enabled
130
+ #
131
+ # @return [Boolean] true if compression is set to 'gzip'
132
+ def compression_enabled?
133
+ @compression == "gzip"
134
+ end
135
+
101
136
  # Returns a hash suitable for creating HTTP connections
102
137
  #
103
138
  # @return [Hash] connection options
@@ -113,7 +148,9 @@ module ClickhouseRuby
113
148
  ssl_ca_path: ssl_ca_path,
114
149
  connect_timeout: connect_timeout,
115
150
  read_timeout: read_timeout,
116
- write_timeout: write_timeout
151
+ write_timeout: write_timeout,
152
+ compression: compression,
153
+ compression_threshold: compression_threshold,
117
154
  }
118
155
  end
119
156
 
@@ -124,7 +161,11 @@ module ClickhouseRuby
124
161
  new_config = Configuration.new
125
162
  instance_variables.each do |var|
126
163
  value = instance_variable_get(var)
127
- new_config.instance_variable_set(var, value.dup) rescue value
164
+ begin
165
+ new_config.instance_variable_set(var, value.dup)
166
+ rescue StandardError
167
+ value
168
+ end
128
169
  end
129
170
  new_config
130
171
  end
@@ -134,10 +175,10 @@ module ClickhouseRuby
134
175
  # @raise [ConfigurationError] if the configuration is invalid
135
176
  # @return [Boolean] true if valid
136
177
  def validate!
137
- raise ConfigurationError, 'host is required' if host.nil? || host.empty?
138
- raise ConfigurationError, 'port must be a positive integer' unless port.is_a?(Integer) && port.positive?
139
- raise ConfigurationError, 'database is required' if database.nil? || database.empty?
140
- raise ConfigurationError, 'pool_size must be at least 1' unless pool_size >= 1
178
+ raise ConfigurationError, "host is required" if host.nil? || host.empty?
179
+ raise ConfigurationError, "port must be a positive integer" unless port.is_a?(Integer) && port.positive?
180
+ raise ConfigurationError, "database is required" if database.nil? || database.empty?
181
+ raise ConfigurationError, "pool_size must be at least 1" unless pool_size >= 1
141
182
 
142
183
  true
143
184
  end