clickhouse-ruby 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +74 -1
  3. data/README.md +165 -79
  4. data/lib/clickhouse_ruby/active_record/arel_visitor.rb +205 -76
  5. data/lib/clickhouse_ruby/active_record/connection_adapter.rb +103 -98
  6. data/lib/clickhouse_ruby/active_record/railtie.rb +20 -15
  7. data/lib/clickhouse_ruby/active_record/relation_extensions.rb +398 -0
  8. data/lib/clickhouse_ruby/active_record/schema_statements.rb +90 -104
  9. data/lib/clickhouse_ruby/active_record.rb +24 -10
  10. data/lib/clickhouse_ruby/client.rb +181 -74
  11. data/lib/clickhouse_ruby/configuration.rb +51 -10
  12. data/lib/clickhouse_ruby/connection.rb +180 -64
  13. data/lib/clickhouse_ruby/connection_pool.rb +25 -19
  14. data/lib/clickhouse_ruby/errors.rb +13 -1
  15. data/lib/clickhouse_ruby/result.rb +11 -16
  16. data/lib/clickhouse_ruby/retry_handler.rb +172 -0
  17. data/lib/clickhouse_ruby/streaming_result.rb +309 -0
  18. data/lib/clickhouse_ruby/types/array.rb +11 -64
  19. data/lib/clickhouse_ruby/types/base.rb +59 -0
  20. data/lib/clickhouse_ruby/types/boolean.rb +28 -25
  21. data/lib/clickhouse_ruby/types/date_time.rb +10 -27
  22. data/lib/clickhouse_ruby/types/decimal.rb +173 -0
  23. data/lib/clickhouse_ruby/types/enum.rb +262 -0
  24. data/lib/clickhouse_ruby/types/float.rb +14 -28
  25. data/lib/clickhouse_ruby/types/integer.rb +21 -43
  26. data/lib/clickhouse_ruby/types/low_cardinality.rb +1 -1
  27. data/lib/clickhouse_ruby/types/map.rb +21 -36
  28. data/lib/clickhouse_ruby/types/null_safe.rb +81 -0
  29. data/lib/clickhouse_ruby/types/nullable.rb +2 -2
  30. data/lib/clickhouse_ruby/types/parser.rb +28 -18
  31. data/lib/clickhouse_ruby/types/registry.rb +40 -29
  32. data/lib/clickhouse_ruby/types/string.rb +9 -13
  33. data/lib/clickhouse_ruby/types/string_parser.rb +135 -0
  34. data/lib/clickhouse_ruby/types/tuple.rb +11 -68
  35. data/lib/clickhouse_ruby/types/uuid.rb +15 -22
  36. data/lib/clickhouse_ruby/types.rb +19 -15
  37. data/lib/clickhouse_ruby/version.rb +1 -1
  38. data/lib/clickhouse_ruby.rb +11 -11
  39. metadata +41 -6
@@ -1,8 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'net/http'
4
- require 'uri'
5
- require 'openssl'
3
+ require "net/http"
4
+ require "uri"
5
+ require "openssl"
6
+ require "zlib"
6
7
 
7
8
  module ClickhouseRuby
8
9
  # Single HTTP connection wrapper for ClickHouse communication
@@ -31,6 +32,44 @@ module ClickhouseRuby
31
32
  # )
32
33
  #
33
34
  class Connection
35
+ # Error mapping for network issues to ClickhouseRuby exceptions
36
+ # Each entry maps an exception class to a lambda that creates the appropriate error
37
+ NETWORK_ERROR_MAPPING = {
38
+ OpenSSL::SSL::SSLError => lambda { |e, _conn|
39
+ SSLError.new("SSL connection failed: #{e.message}", original_error: e)
40
+ },
41
+ Errno::ECONNREFUSED => lambda { |e, conn|
42
+ ConnectionNotEstablished.new("Connection refused to #{conn.host}:#{conn.port}: #{e.message}", original_error: e)
43
+ },
44
+ Errno::EHOSTUNREACH => lambda { |e, conn|
45
+ ConnectionNotEstablished.new("Host unreachable #{conn.host}:#{conn.port}: #{e.message}", original_error: e)
46
+ },
47
+ SocketError => lambda { |e, conn|
48
+ ConnectionNotEstablished.new("Socket error to #{conn.host}:#{conn.port}: #{e.message}", original_error: e)
49
+ },
50
+ Net::OpenTimeout => lambda { |e, conn|
51
+ ConnectionTimeout.new("Connection timeout to #{conn.host}:#{conn.port}", original_error: e)
52
+ },
53
+ Net::ReadTimeout => lambda { |e, _conn|
54
+ ConnectionTimeout.new("Read timeout: #{e.message}", original_error: e)
55
+ },
56
+ Net::WriteTimeout => lambda { |e, _conn|
57
+ ConnectionTimeout.new("Write timeout: #{e.message}", original_error: e)
58
+ },
59
+ Errno::ECONNRESET => lambda { |e, _conn|
60
+ ConnectionError.new("Connection reset: #{e.message}", original_error: e)
61
+ },
62
+ Errno::EPIPE => lambda { |e, _conn|
63
+ ConnectionError.new("Broken pipe: #{e.message}", original_error: e)
64
+ },
65
+ IOError => lambda { |e, _conn|
66
+ ConnectionError.new("IO error: #{e.message}", original_error: e)
67
+ },
68
+ }.freeze
69
+
70
+ # All exception classes that should be caught and mapped
71
+ NETWORK_ERRORS = NETWORK_ERROR_MAPPING.keys.freeze
72
+
34
73
  # @return [String] the ClickHouse host
35
74
  attr_reader :host
36
75
 
@@ -66,10 +105,12 @@ module ClickhouseRuby
66
105
  # @param connect_timeout [Integer] connection timeout in seconds
67
106
  # @param read_timeout [Integer] read timeout in seconds
68
107
  # @param write_timeout [Integer] write timeout in seconds
108
+ # @param compression [String, nil] compression algorithm ('gzip' or nil)
109
+ # @param compression_threshold [Integer] minimum body size to compress
69
110
  def initialize(
70
111
  host:,
71
112
  port: 8123,
72
- database: 'default',
113
+ database: "default",
73
114
  username: nil,
74
115
  password: nil,
75
116
  use_ssl: false,
@@ -77,7 +118,9 @@ module ClickhouseRuby
77
118
  ssl_ca_path: nil,
78
119
  connect_timeout: 10,
79
120
  read_timeout: 60,
80
- write_timeout: 60
121
+ write_timeout: 60,
122
+ compression: nil,
123
+ compression_threshold: 1024
81
124
  )
82
125
  @host = host
83
126
  @port = port
@@ -90,6 +133,8 @@ module ClickhouseRuby
90
133
  @connect_timeout = connect_timeout
91
134
  @read_timeout = read_timeout
92
135
  @write_timeout = write_timeout
136
+ @compression = compression
137
+ @compression_threshold = compression_threshold
93
138
 
94
139
  @http = nil
95
140
  @connected = false
@@ -106,29 +151,11 @@ module ClickhouseRuby
106
151
  @mutex.synchronize do
107
152
  return self if @connected && @http&.started?
108
153
 
109
- begin
154
+ with_error_handling do
110
155
  @http = build_http
111
156
  @http.start
112
157
  @connected = true
113
158
  @last_used_at = Time.now
114
- rescue OpenSSL::SSL::SSLError => e
115
- @connected = false
116
- raise SSLError.new(
117
- "SSL connection failed: #{e.message}",
118
- original_error: e
119
- )
120
- rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH, SocketError => e
121
- @connected = false
122
- raise ConnectionNotEstablished.new(
123
- "Failed to connect to #{@host}:#{@port}: #{e.message}",
124
- original_error: e
125
- )
126
- rescue Net::OpenTimeout => e
127
- @connected = false
128
- raise ConnectionTimeout.new(
129
- "Connection timeout to #{@host}:#{@port}",
130
- original_error: e
131
- )
132
159
  end
133
160
  end
134
161
 
@@ -141,7 +168,11 @@ module ClickhouseRuby
141
168
  def disconnect
142
169
  @mutex.synchronize do
143
170
  if @http&.started?
144
- @http.finish rescue nil
171
+ begin
172
+ @http.finish
173
+ rescue StandardError
174
+ nil
175
+ end
145
176
  end
146
177
  @http = nil
147
178
  @connected = false
@@ -170,22 +201,26 @@ module ClickhouseRuby
170
201
  ensure_connected
171
202
 
172
203
  request = Net::HTTP::Post.new(path)
173
- request.body = body
174
204
 
175
205
  # Set default headers
176
- request['Content-Type'] = 'application/x-www-form-urlencoded'
177
- request['Accept'] = 'application/json'
178
- request['User-Agent'] = "ClickhouseRuby/#{ClickhouseRuby::VERSION} Ruby/#{RUBY_VERSION}"
206
+ request["Content-Type"] = "application/x-www-form-urlencoded"
207
+ request["Accept"] = "application/json"
208
+ request["User-Agent"] = "ClickhouseRuby/#{ClickhouseRuby::VERSION} Ruby/#{RUBY_VERSION}"
209
+
210
+ # Add compression headers if enabled
211
+ request["Accept-Encoding"] = "gzip" if @compression == "gzip"
179
212
 
180
213
  # Add authentication
181
- if @username
182
- request.basic_auth(@username, @password || '')
183
- end
214
+ request.basic_auth(@username, @password || "") if @username
184
215
 
185
216
  # Merge custom headers
186
217
  headers.each { |k, v| request[k] = v }
187
218
 
188
- execute_request(request)
219
+ # Handle request body compression
220
+ setup_body(request, body)
221
+
222
+ response = execute_request(request)
223
+ decompress_response(response)
189
224
  end
190
225
 
191
226
  # Executes an HTTP GET request
@@ -197,12 +232,10 @@ module ClickhouseRuby
197
232
  ensure_connected
198
233
 
199
234
  request = Net::HTTP::Get.new(path)
200
- request['Accept'] = 'application/json'
201
- request['User-Agent'] = "ClickhouseRuby/#{ClickhouseRuby::VERSION} Ruby/#{RUBY_VERSION}"
235
+ request["Accept"] = "application/json"
236
+ request["User-Agent"] = "ClickhouseRuby/#{ClickhouseRuby::VERSION} Ruby/#{RUBY_VERSION}"
202
237
 
203
- if @username
204
- request.basic_auth(@username, @password || '')
205
- end
238
+ request.basic_auth(@username, @password || "") if @username
206
239
 
207
240
  headers.each { |k, v| request[k] = v }
208
241
 
@@ -215,8 +248,8 @@ module ClickhouseRuby
215
248
  def ping
216
249
  connect unless connected?
217
250
 
218
- response = get('/ping')
219
- response.code == '200' && response.body&.strip == 'Ok.'
251
+ response = get("/ping")
252
+ response.code == "200" && response.body&.strip == "Ok."
220
253
  rescue StandardError
221
254
  false
222
255
  end
@@ -242,8 +275,8 @@ module ClickhouseRuby
242
275
  #
243
276
  # @return [String]
244
277
  def inspect
245
- scheme = @use_ssl ? 'https' : 'http'
246
- status = @connected ? 'connected' : 'disconnected'
278
+ scheme = @use_ssl ? "https" : "http"
279
+ status = @connected ? "connected" : "disconnected"
247
280
  "#<#{self.class.name} #{scheme}://#{@host}:#{@port} #{status}>"
248
281
  end
249
282
 
@@ -272,6 +305,7 @@ module ClickhouseRuby
272
305
  else
273
306
  # Only disable if explicitly requested (development only!)
274
307
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
308
+ warn "[ClickhouseRuby] WARNING: SSL verification disabled. Insecure for production."
275
309
  end
276
310
 
277
311
  # Use modern TLS versions
@@ -288,9 +322,9 @@ module ClickhouseRuby
288
322
  #
289
323
  # @raise [ConnectionNotEstablished] if not connected
290
324
  def ensure_connected
291
- unless @connected && @http&.started?
292
- connect
293
- end
325
+ return if @connected && @http&.started?
326
+
327
+ connect
294
328
  end
295
329
 
296
330
  # Executes an HTTP request with error handling
@@ -299,28 +333,110 @@ module ClickhouseRuby
299
333
  # @return [Net::HTTPResponse]
300
334
  def execute_request(request)
301
335
  @mutex.synchronize do
302
- begin
336
+ with_error_handling do
303
337
  response = @http.request(request)
304
338
  @last_used_at = Time.now
305
339
  response
306
- rescue Net::ReadTimeout => e
307
- @connected = false
308
- raise ConnectionTimeout.new(
309
- "Read timeout: #{e.message}",
310
- original_error: e
311
- )
312
- rescue Net::WriteTimeout => e
313
- @connected = false
314
- raise ConnectionTimeout.new(
315
- "Write timeout: #{e.message}",
316
- original_error: e
317
- )
318
- rescue Errno::ECONNRESET, Errno::EPIPE, IOError => e
319
- @connected = false
320
- raise ConnectionError.new(
321
- "Connection lost: #{e.message}",
322
- original_error: e
323
- )
340
+ end
341
+ end
342
+ end
343
+
344
+ # Executes a block with network error handling
345
+ #
346
+ # Maps network exceptions to ClickhouseRuby exceptions and marks
347
+ # connection as disconnected on error.
348
+ #
349
+ # @yield the block to execute
350
+ # @return the block's return value
351
+ # @raise [ConnectionError, ConnectionTimeout, ConnectionNotEstablished, SSLError]
352
+ def with_error_handling
353
+ yield
354
+ rescue *NETWORK_ERRORS => e
355
+ @connected = false
356
+ handler = NETWORK_ERROR_MAPPING[e.class] || find_handler_for(e)
357
+ raise handler.call(e, self) if handler
358
+ raise e # Re-raise if no handler found (shouldn't happen, but be safe)
359
+ end
360
+
361
+ # Finds error handler by checking exception class and its ancestors
362
+ #
363
+ # @param exception [Exception] the exception to find a handler for
364
+ # @return [Proc, nil] the handler lambda or nil if not found
365
+ private def find_handler_for(exception)
366
+ exception.class.ancestors.each do |ancestor_class|
367
+ return NETWORK_ERROR_MAPPING[ancestor_class] if NETWORK_ERROR_MAPPING.key?(ancestor_class)
368
+ end
369
+ nil
370
+ end
371
+
372
+ # Sets up request body with optional compression
373
+ #
374
+ # @param request [Net::HTTPRequest] the request object
375
+ # @param body [String, nil] the request body
376
+ # @return [void]
377
+ def setup_body(request, body)
378
+ return unless body
379
+
380
+ if should_compress?(body)
381
+ request["Content-Encoding"] = "gzip"
382
+ request["Content-Type"] = "application/octet-stream"
383
+ request.body = Zlib.gzip(body, level: Zlib::DEFAULT_COMPRESSION)
384
+ else
385
+ request.body = body
386
+ end
387
+ end
388
+
389
+ # Determines if body should be compressed
390
+ #
391
+ # @param body [String] the request body
392
+ # @return [Boolean] true if compression is enabled and body exceeds threshold
393
+ def should_compress?(body)
394
+ @compression == "gzip" && body.bytesize > @compression_threshold
395
+ end
396
+
397
+ # Decompresses response if needed
398
+ #
399
+ # @param response [Net::HTTPResponse] the HTTP response
400
+ # @return [Net::HTTPResponse] the response (possibly wrapped with decompression)
401
+ def decompress_response(response)
402
+ return response unless response["Content-Encoding"] == "gzip"
403
+
404
+ DecompressedResponse.new(response)
405
+ end
406
+
407
+ # Wrapper for automatically decompressing gzip responses
408
+ class DecompressedResponse
409
+ # @param response [Net::HTTPResponse] the original HTTP response
410
+ def initialize(response)
411
+ @response = response
412
+ @decompressed_body = nil
413
+ end
414
+
415
+ # Returns the HTTP status code
416
+ #
417
+ # @return [String] status code
418
+ def code
419
+ @response.code
420
+ end
421
+
422
+ # Returns a header value
423
+ #
424
+ # @param header [String] header name
425
+ # @return [String, nil] header value
426
+ def [](header)
427
+ @response[header]
428
+ end
429
+
430
+ # Returns the decompressed response body
431
+ #
432
+ # @return [String] decompressed body
433
+ def body
434
+ @decompressed_body ||= begin
435
+ return @response.body if @response.body.nil? || @response.body.empty?
436
+
437
+ Zlib.gunzip(@response.body)
438
+ rescue Zlib::Error
439
+ @response.body
324
440
  end
325
441
  end
326
442
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'thread'
4
- require 'timeout'
3
+ require "timeout"
5
4
 
6
5
  module ClickhouseRuby
7
6
  # Thread-safe connection pool for managing multiple ClickHouse connections
@@ -46,7 +45,7 @@ module ClickhouseRuby
46
45
  @connection_options = config.to_connection_options
47
46
 
48
47
  # Pool state
49
- @available = [] # Connections available for checkout
48
+ @available = [] # Connections available for checkout
50
49
  @in_use = [] # Connections currently checked out
51
50
  @all_connections = [] # All connections ever created
52
51
 
@@ -110,10 +109,8 @@ module ClickhouseRuby
110
109
  remaining = deadline - Time.now
111
110
  if remaining <= 0
112
111
  @total_timeouts += 1
113
- raise PoolTimeout.new(
114
- "Could not obtain a connection from the pool within #{@timeout} seconds " \
115
- "(pool size: #{@size}, in use: #{@in_use.size})"
116
- )
112
+ raise PoolTimeout, "Could not obtain a connection from the pool within #{@timeout} seconds " \
113
+ "(pool size: #{@size}, in use: #{@in_use.size})"
117
114
  end
118
115
 
119
116
  @condition.wait(@mutex, remaining)
@@ -136,7 +133,7 @@ module ClickhouseRuby
136
133
  @available << connection
137
134
  else
138
135
  # Disconnect unhealthy connections
139
- connection.disconnect rescue nil
136
+ safe_disconnect(connection)
140
137
  @all_connections.delete(connection)
141
138
  end
142
139
 
@@ -182,7 +179,7 @@ module ClickhouseRuby
182
179
  def shutdown
183
180
  @mutex.synchronize do
184
181
  (@available + @in_use).each do |conn|
185
- conn.disconnect rescue nil
182
+ safe_disconnect(conn)
186
183
  end
187
184
 
188
185
  @available.clear
@@ -201,7 +198,7 @@ module ClickhouseRuby
201
198
  @mutex.synchronize do
202
199
  @available.reject! do |conn|
203
200
  if conn.stale?(max_idle_seconds) || !conn.healthy?
204
- conn.disconnect rescue nil
201
+ safe_disconnect(conn)
205
202
  @all_connections.delete(conn)
206
203
  removed += 1
207
204
  true
@@ -236,7 +233,7 @@ module ClickhouseRuby
236
233
  total: @all_connections.size,
237
234
  capacity: @size,
238
235
  healthy: healthy,
239
- unhealthy: unhealthy
236
+ unhealthy: unhealthy,
240
237
  }
241
238
  end
242
239
  end
@@ -253,7 +250,7 @@ module ClickhouseRuby
253
250
  total_connections: @all_connections.size,
254
251
  total_checkouts: @total_checkouts,
255
252
  total_timeouts: @total_timeouts,
256
- uptime_seconds: Time.now - @created_at
253
+ uptime_seconds: Time.now - @created_at,
257
254
  }
258
255
  end
259
256
  end
@@ -275,18 +272,27 @@ module ClickhouseRuby
275
272
  def get_available_connection
276
273
  while (conn = @available.pop)
277
274
  # Verify the connection is still healthy
278
- if conn.healthy? && !conn.stale?
279
- return conn
280
- else
281
- # Remove unhealthy connections
282
- conn.disconnect rescue nil
283
- @all_connections.delete(conn)
284
- end
275
+ return conn if conn.healthy? && !conn.stale?
276
+
277
+ # Remove unhealthy connections
278
+ safe_disconnect(conn)
279
+ @all_connections.delete(conn)
280
+
285
281
  end
286
282
 
287
283
  nil
288
284
  end
289
285
 
286
+ # Safely disconnects a connection, logging any errors
287
+ #
288
+ # @param connection [Connection] the connection to disconnect
289
+ # @return [void]
290
+ def safe_disconnect(connection)
291
+ connection.disconnect
292
+ rescue StandardError => e
293
+ @config.logger&.warn("[ClickhouseRuby] Disconnect error: #{e.class} - #{e.message}")
294
+ end
295
+
290
296
  # Creates a new connection
291
297
  #
292
298
  # @return [Connection] the new connection
@@ -62,7 +62,7 @@ module ClickhouseRuby
62
62
  parts << "Code: #{code}" if code
63
63
  parts << "HTTP Status: #{http_status}" if http_status
64
64
  parts << "SQL: #{sql}" if sql
65
- parts.join(' | ')
65
+ parts.join(" | ")
66
66
  end
67
67
  end
68
68
 
@@ -140,5 +140,17 @@ module ClickhouseRuby
140
140
  def error_class_for_code(code)
141
141
  ERROR_CODE_MAPPING.fetch(code, QueryError)
142
142
  end
143
+
144
+ # Sanitizes a message to prevent credential leakage
145
+ #
146
+ # @param message [String] the message to sanitize
147
+ # @param config [Configuration, nil] the configuration containing credentials
148
+ # @return [String] the sanitized message
149
+ def sanitize_message(message, config)
150
+ return message unless config.respond_to?(:password) && config&.password
151
+ return message if config.password.to_s.empty?
152
+
153
+ message.gsub(config.password.to_s, "[REDACTED]")
154
+ end
143
155
  end
144
156
  end
@@ -57,16 +57,12 @@ module ClickhouseRuby
57
57
  def initialize(columns:, types:, data:, statistics: {}, deserialize: true)
58
58
  @columns = columns.freeze
59
59
  @types = types.freeze
60
- @elapsed_time = statistics['elapsed']
61
- @rows_read = statistics['rows_read']
62
- @bytes_read = statistics['bytes_read']
60
+ @elapsed_time = statistics["elapsed"]
61
+ @rows_read = statistics["rows_read"]
62
+ @bytes_read = statistics["bytes_read"]
63
63
 
64
64
  # Build type instances for deserialization
65
- @type_instances = if deserialize
66
- types.map { |t| Types.lookup(t) }
67
- else
68
- nil
69
- end
65
+ @type_instances = (types.map { |t| Types.lookup(t) } if deserialize)
70
66
 
71
67
  # Convert raw data to row hashes
72
68
  @rows = build_rows(data).freeze
@@ -88,6 +84,7 @@ module ClickhouseRuby
88
84
  end
89
85
  alias size count
90
86
  alias length count
87
+ alias data rows
91
88
 
92
89
  # Returns whether there are no rows
93
90
  #
@@ -149,11 +146,11 @@ module ClickhouseRuby
149
146
  # @param response_data [Hash] parsed JSON response
150
147
  # @return [Result] the result
151
148
  def self.from_json_compact(response_data)
152
- meta = response_data['meta'] || []
153
- columns = meta.map { |m| m['name'] }
154
- types = meta.map { |m| m['type'] }
155
- data = response_data['data'] || []
156
- statistics = response_data['statistics'] || {}
149
+ meta = response_data["meta"] || []
150
+ columns = meta.map { |m| m["name"] }
151
+ types = meta.map { |m| m["type"] }
152
+ data = response_data["data"] || []
153
+ statistics = response_data["statistics"] || {}
157
154
 
158
155
  new(columns: columns, types: types, data: data, statistics: statistics)
159
156
  end
@@ -177,9 +174,7 @@ module ClickhouseRuby
177
174
  @columns.each_with_index do |col, i|
178
175
  value = row_values[i]
179
176
  # Deserialize if we have type instances
180
- if @type_instances
181
- value = @type_instances[i].deserialize(value)
182
- end
177
+ value = @type_instances[i].deserialize(value) if @type_instances
183
178
  row[col] = value
184
179
  end
185
180
  row