clickhouse-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +80 -0
  3. data/LICENSE +21 -0
  4. data/README.md +251 -0
  5. data/lib/clickhouse_ruby/active_record/arel_visitor.rb +468 -0
  6. data/lib/clickhouse_ruby/active_record/connection_adapter.rb +723 -0
  7. data/lib/clickhouse_ruby/active_record/railtie.rb +192 -0
  8. data/lib/clickhouse_ruby/active_record/schema_statements.rb +693 -0
  9. data/lib/clickhouse_ruby/active_record.rb +121 -0
  10. data/lib/clickhouse_ruby/client.rb +471 -0
  11. data/lib/clickhouse_ruby/configuration.rb +145 -0
  12. data/lib/clickhouse_ruby/connection.rb +328 -0
  13. data/lib/clickhouse_ruby/connection_pool.rb +301 -0
  14. data/lib/clickhouse_ruby/errors.rb +144 -0
  15. data/lib/clickhouse_ruby/result.rb +189 -0
  16. data/lib/clickhouse_ruby/types/array.rb +183 -0
  17. data/lib/clickhouse_ruby/types/base.rb +77 -0
  18. data/lib/clickhouse_ruby/types/boolean.rb +68 -0
  19. data/lib/clickhouse_ruby/types/date_time.rb +163 -0
  20. data/lib/clickhouse_ruby/types/float.rb +115 -0
  21. data/lib/clickhouse_ruby/types/integer.rb +157 -0
  22. data/lib/clickhouse_ruby/types/low_cardinality.rb +58 -0
  23. data/lib/clickhouse_ruby/types/map.rb +249 -0
  24. data/lib/clickhouse_ruby/types/nullable.rb +73 -0
  25. data/lib/clickhouse_ruby/types/parser.rb +244 -0
  26. data/lib/clickhouse_ruby/types/registry.rb +148 -0
  27. data/lib/clickhouse_ruby/types/string.rb +83 -0
  28. data/lib/clickhouse_ruby/types/tuple.rb +206 -0
  29. data/lib/clickhouse_ruby/types/uuid.rb +84 -0
  30. data/lib/clickhouse_ruby/types.rb +69 -0
  31. data/lib/clickhouse_ruby/version.rb +5 -0
  32. data/lib/clickhouse_ruby.rb +101 -0
  33. metadata +150 -0
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_record'
4
+ require 'active_record/connection_adapters/abstract_adapter'
5
+
6
+ require_relative 'active_record/arel_visitor'
7
+ require_relative 'active_record/schema_statements'
8
+ require_relative 'active_record/connection_adapter'
9
+
10
+ # Load Railtie if Rails is available
11
+ if defined?(Rails::Railtie)
12
+ require_relative 'active_record/railtie'
13
+ end
14
+
15
+ module ClickhouseRuby
16
+ # ActiveRecord integration for ClickHouse
17
+ #
18
+ # This module provides full ActiveRecord adapter support for ClickHouse,
19
+ # allowing Rails applications to use ClickHouse as a database backend.
20
+ #
21
+ # @example Configuration in database.yml
22
+ # development:
23
+ # adapter: clickhouse
24
+ # host: localhost
25
+ # port: 8123
26
+ # database: analytics_development
27
+ # username: default
28
+ # password: ''
29
+ #
30
+ # production:
31
+ # adapter: clickhouse
32
+ # host: <%= ENV['CLICKHOUSE_HOST'] %>
33
+ # port: 8443
34
+ # database: analytics_production
35
+ # ssl: true
36
+ # ssl_verify: true
37
+ # username: <%= ENV['CLICKHOUSE_USER'] %>
38
+ # password: <%= ENV['CLICKHOUSE_PASSWORD'] %>
39
+ #
40
+ # @example Model usage
41
+ # class Event < ApplicationRecord
42
+ # self.table_name = 'events'
43
+ #
44
+ # # ClickHouse doesn't use auto-increment IDs
45
+ # # Generate UUIDs or use application-level ID generation
46
+ # before_create :generate_uuid
47
+ #
48
+ # private
49
+ #
50
+ # def generate_uuid
51
+ # self.id ||= SecureRandom.uuid
52
+ # end
53
+ # end
54
+ #
55
+ # @example Querying
56
+ # # Standard ActiveRecord queries work
57
+ # Event.where(user_id: 123).count
58
+ # Event.where(created_at: 1.day.ago..).limit(100)
59
+ # Event.select(:user_id).distinct.pluck(:user_id)
60
+ #
61
+ # @example Bulk inserts (recommended for ClickHouse)
62
+ # Event.insert_all([
63
+ # { id: SecureRandom.uuid, name: 'click', user_id: 1 },
64
+ # { id: SecureRandom.uuid, name: 'view', user_id: 2 }
65
+ # ])
66
+ #
67
+ # @example Mutations (DELETE/UPDATE)
68
+ # # IMPORTANT: These raise errors on failure (never silently fail)
69
+ # Event.where(status: 'old').delete_all
70
+ # Event.where(user_id: 123).update_all(status: 'archived')
71
+ #
72
+ # @note ClickHouse Limitations
73
+ # - No transaction support (savepoints, rollback are no-ops)
74
+ # - No foreign key constraints
75
+ # - DELETE/UPDATE are asynchronous mutations
76
+ # - No auto-increment primary keys
77
+ #
78
+ module ActiveRecord
79
+ class << self
80
+ # Check if the adapter is properly registered
81
+ #
82
+ # @return [Boolean] true if the adapter is available
83
+ def registered?
84
+ defined?(::ActiveRecord::ConnectionAdapters) &&
85
+ ::ActiveRecord::ConnectionAdapters.respond_to?(:resolve) &&
86
+ ::ActiveRecord::ConnectionAdapters.resolve('clickhouse').present?
87
+ rescue StandardError
88
+ false
89
+ end
90
+
91
+ # Get the adapter version
92
+ #
93
+ # @return [String] the adapter version
94
+ def version
95
+ ClickhouseRuby::VERSION
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ # Establish a ClickHouse connection method on ActiveRecord::Base
102
+ module ActiveRecord
103
+ class Base
104
+ class << self
105
+ # Establish a connection to ClickHouse
106
+ #
107
+ # @param config [Hash] database configuration
108
+ # @return [ConnectionAdapter] the connection adapter
109
+ def clickhouse_connection(config)
110
+ config = config.symbolize_keys
111
+
112
+ ClickhouseRuby::ActiveRecord::ConnectionAdapter.new(
113
+ nil,
114
+ logger,
115
+ nil,
116
+ config
117
+ )
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,471 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'uri'
5
+ require 'bigdecimal'
6
+
7
+ module ClickhouseRuby
8
+ # Main HTTP client for ClickHouse communication
9
+ #
10
+ # The Client provides a high-level interface for executing queries
11
+ # and inserting data into ClickHouse. It handles:
12
+ # - Connection pooling for performance
13
+ # - Automatic format handling (JSONCompact for queries)
14
+ # - Proper error handling with rich context
15
+ # - Bulk inserts with JSONEachRow format
16
+ #
17
+ # CRITICAL: This client ALWAYS checks HTTP status codes before parsing
18
+ # response bodies. This prevents the silent error bug found in
19
+ # clickhouse-activerecord (issue #230).
20
+ #
21
+ # @example Basic usage
22
+ # config = ClickhouseRuby::Configuration.new
23
+ # config.host = 'localhost'
24
+ # client = ClickhouseRuby::Client.new(config)
25
+ #
26
+ # result = client.execute('SELECT * FROM users LIMIT 10')
27
+ # result.each { |row| puts row['name'] }
28
+ #
29
+ # @example With settings
30
+ # result = client.execute(
31
+ # 'SELECT * FROM large_table',
32
+ # settings: { max_execution_time: 120 }
33
+ # )
34
+ #
35
+ # @example Bulk insert
36
+ # client.insert('events', [
37
+ # { id: 1, event: 'click', timestamp: Time.now },
38
+ # { id: 2, event: 'view', timestamp: Time.now }
39
+ # ])
40
+ #
41
+ class Client
42
+ # Default response format for queries
43
+ DEFAULT_FORMAT = 'JSONCompact'
44
+
45
+ # Format for bulk inserts (5x faster than VALUES)
46
+ INSERT_FORMAT = 'JSONEachRow'
47
+
48
+ # @return [Configuration] the client configuration
49
+ attr_reader :config
50
+
51
+ # @return [ConnectionPool] the connection pool
52
+ attr_reader :pool
53
+
54
+ # Creates a new Client
55
+ #
56
+ # @param config [Configuration] connection configuration
57
+ # @raise [ConfigurationError] if configuration is invalid
58
+ def initialize(config)
59
+ @config = config
60
+ @config.validate!
61
+ @pool = ConnectionPool.new(config)
62
+ @logger = config.logger
63
+ @default_settings = config.default_settings || {}
64
+ end
65
+
66
+ # Executes a SQL query and returns results
67
+ #
68
+ # @param sql [String] the SQL query to execute
69
+ # @param settings [Hash] ClickHouse settings for this query
70
+ # @param format [String] response format (default: JSONCompact)
71
+ # @return [Result] query results
72
+ # @raise [QueryError] if query fails
73
+ # @raise [ConnectionError] if connection fails
74
+ #
75
+ # @example
76
+ # result = client.execute('SELECT count() FROM users')
77
+ # puts result.first['count()']
78
+ #
79
+ # @example With settings
80
+ # result = client.execute(
81
+ # 'SELECT * FROM events',
82
+ # settings: { max_rows_to_read: 1_000_000 }
83
+ # )
84
+ def execute(sql, settings: {}, format: DEFAULT_FORMAT)
85
+ # Build the query with format
86
+ query_with_format = "#{sql.strip} FORMAT #{format}"
87
+
88
+ # Build query parameters
89
+ params = build_query_params(settings)
90
+
91
+ # Execute via connection pool
92
+ response = execute_request(query_with_format, params)
93
+
94
+ # Parse response based on format
95
+ parse_response(response, sql, format)
96
+ end
97
+
98
+ # Executes a command (INSERT, CREATE, DROP, etc.) that doesn't return data
99
+ #
100
+ # @param sql [String] the SQL command to execute
101
+ # @param settings [Hash] ClickHouse settings
102
+ # @return [Boolean] true if successful
103
+ # @raise [QueryError] if command fails
104
+ #
105
+ # @example
106
+ # client.command('CREATE TABLE test (id UInt64) ENGINE = Memory')
107
+ # client.command('DROP TABLE test')
108
+ def command(sql, settings: {})
109
+ params = build_query_params(settings)
110
+ execute_request(sql, params)
111
+ true
112
+ end
113
+
114
+ # Inserts multiple rows using bulk insert (JSONEachRow format)
115
+ #
116
+ # This is significantly faster than INSERT ... VALUES for large datasets.
117
+ # The data is sent in JSONEachRow format which ClickHouse can parse
118
+ # efficiently.
119
+ #
120
+ # @param table [String] the table name
121
+ # @param rows [Array<Hash>] array of row hashes
122
+ # @param columns [Array<String>, nil] column names (inferred from first row if nil)
123
+ # @param settings [Hash] ClickHouse settings
124
+ # @param format [Symbol] insert format (:json_each_row is default and recommended)
125
+ # @return [Boolean] true if successful
126
+ # @raise [QueryError] if insert fails
127
+ # @raise [ArgumentError] if rows is empty
128
+ #
129
+ # @example
130
+ # client.insert('events', [
131
+ # { id: 1, name: 'click' },
132
+ # { id: 2, name: 'view' }
133
+ # ])
134
+ #
135
+ # @example With explicit columns
136
+ # client.insert('events', [
137
+ # { id: 1, name: 'click', extra: 'ignored' },
138
+ # ], columns: ['id', 'name'])
139
+ def insert(table, rows, columns: nil, settings: {}, format: :json_each_row)
140
+ raise ArgumentError, 'rows cannot be empty' if rows.nil? || rows.empty?
141
+
142
+ # Determine columns from first row if not specified
143
+ columns ||= rows.first.keys.map(&:to_s)
144
+
145
+ # Build INSERT statement
146
+ columns_str = columns.map { |c| quote_identifier(c) }.join(', ')
147
+ sql = "INSERT INTO #{quote_identifier(table)} (#{columns_str}) FORMAT #{INSERT_FORMAT}"
148
+
149
+ # Build JSON body
150
+ body = rows.map do |row|
151
+ row_data = {}
152
+ columns.each do |col|
153
+ key = col.to_s
154
+ value = row[col] || row[col.to_sym]
155
+ row_data[key] = serialize_value(value)
156
+ end
157
+ JSON.generate(row_data)
158
+ end.join("\n")
159
+
160
+ # Build params and execute
161
+ params = build_query_params(settings)
162
+ path = build_path(params)
163
+
164
+ @pool.with_connection do |conn|
165
+ log_query(sql) if @logger
166
+
167
+ response = conn.post("#{path}&query=#{URI.encode_www_form_component(sql)}", body, {
168
+ 'Content-Type' => 'application/json'
169
+ })
170
+
171
+ handle_response(response, sql)
172
+ end
173
+
174
+ true
175
+ end
176
+
177
+ # Checks if the ClickHouse server is reachable
178
+ #
179
+ # @return [Boolean] true if server responds to ping
180
+ def ping
181
+ @pool.with_connection(&:ping)
182
+ rescue StandardError
183
+ false
184
+ end
185
+
186
+ # Returns the ClickHouse server version
187
+ #
188
+ # @return [String] version string
189
+ # @raise [QueryError] if query fails
190
+ def server_version
191
+ result = execute('SELECT version() AS version')
192
+ result.first['version']
193
+ end
194
+
195
+ # Closes all connections in the pool
196
+ #
197
+ # Call this when shutting down to clean up resources.
198
+ #
199
+ # @return [void]
200
+ def close
201
+ @pool.shutdown
202
+ end
203
+ alias disconnect close
204
+
205
+ # Returns pool statistics
206
+ #
207
+ # @return [Hash] pool stats
208
+ def pool_stats
209
+ @pool.stats
210
+ end
211
+
212
+ private
213
+
214
+ # Builds query parameters including database and settings
215
+ #
216
+ # @param settings [Hash] query-specific settings
217
+ # @return [Hash] all query parameters
218
+ def build_query_params(settings = {})
219
+ params = {
220
+ 'database' => @config.database
221
+ }
222
+
223
+ # Merge default settings and query-specific settings
224
+ all_settings = @default_settings.merge(settings)
225
+ all_settings.each do |key, value|
226
+ params[key.to_s] = value.to_s
227
+ end
228
+
229
+ params
230
+ end
231
+
232
+ # Builds the request path with query parameters
233
+ #
234
+ # @param params [Hash] query parameters
235
+ # @return [String] the path with query string
236
+ def build_path(params)
237
+ query_string = params.map { |k, v| "#{k}=#{URI.encode_www_form_component(v)}" }.join('&')
238
+ "/?#{query_string}"
239
+ end
240
+
241
+ # Executes a request through the connection pool
242
+ #
243
+ # @param sql [String] the SQL to execute
244
+ # @param params [Hash] query parameters
245
+ # @return [Net::HTTPResponse] the response
246
+ def execute_request(sql, params)
247
+ path = build_path(params)
248
+
249
+ @pool.with_connection do |conn|
250
+ log_query(sql) if @logger
251
+
252
+ response = conn.post(path, sql)
253
+ handle_response(response, sql)
254
+ response
255
+ end
256
+ end
257
+
258
+ # Handles HTTP response with CRITICAL status check
259
+ #
260
+ # IMPORTANT: This method ALWAYS checks status code FIRST before
261
+ # attempting to parse the body. This prevents the silent failure
262
+ # bug in clickhouse-activerecord where DELETE operations fail
263
+ # without raising errors.
264
+ #
265
+ # @param response [Net::HTTPResponse] the HTTP response
266
+ # @param sql [String] the SQL that was executed (for error context)
267
+ # @return [void]
268
+ # @raise [QueryError] if response indicates an error
269
+ def handle_response(response, sql)
270
+ # CRITICAL: Check status FIRST - never silently ignore errors
271
+ unless response.code == '200'
272
+ raise_clickhouse_error(response, sql)
273
+ end
274
+
275
+ # Response is successful - caller can now safely parse body
276
+ end
277
+
278
+ # Raises an appropriate ClickHouse error with full context
279
+ #
280
+ # Extracts error code and message from response body and
281
+ # maps to the appropriate error class.
282
+ #
283
+ # @param response [Net::HTTPResponse] the error response
284
+ # @param sql [String] the SQL that failed
285
+ # @raise [QueryError] always raises
286
+ def raise_clickhouse_error(response, sql)
287
+ body = response.body || ''
288
+ code = extract_error_code(body)
289
+ message = extract_error_message(body)
290
+
291
+ # Get the appropriate error class based on ClickHouse error code
292
+ error_class = ClickhouseRuby.error_class_for_code(code)
293
+
294
+ log_error(message, code, response.code, sql) if @logger
295
+
296
+ raise error_class.new(
297
+ message,
298
+ code: code,
299
+ http_status: response.code,
300
+ sql: truncate_sql(sql)
301
+ )
302
+ end
303
+
304
+ # Extracts ClickHouse error code from response body
305
+ #
306
+ # ClickHouse errors follow the pattern: "Code: 60."
307
+ #
308
+ # @param body [String] response body
309
+ # @return [Integer, nil] error code or nil
310
+ def extract_error_code(body)
311
+ match = body.match(/Code:\s*(\d+)/)
312
+ match ? match[1].to_i : nil
313
+ end
314
+
315
+ # Extracts error message from response body
316
+ #
317
+ # @param body [String] response body
318
+ # @return [String] error message
319
+ def extract_error_message(body)
320
+ # ClickHouse error format: "Code: 60. DB::Exception: Table ... doesn't exist."
321
+ # Try to extract just the meaningful part
322
+ if body =~ /DB::Exception:\s*(.+?)(?:\s*\(version|$)/m
323
+ $1.strip
324
+ else
325
+ body.strip.empty? ? 'Unknown ClickHouse error' : body.strip
326
+ end
327
+ end
328
+
329
+ # Parses successful response based on format
330
+ #
331
+ # @param response [Net::HTTPResponse] the response
332
+ # @param sql [String] original SQL (for error context)
333
+ # @param format [String] the response format
334
+ # @return [Result] parsed result
335
+ def parse_response(response, sql, format)
336
+ body = response.body
337
+
338
+ # Empty response (for commands)
339
+ return Result.empty if body.nil? || body.strip.empty?
340
+
341
+ case format
342
+ when 'JSONCompact'
343
+ parse_json_compact(body, sql)
344
+ when 'JSON'
345
+ parse_json(body, sql)
346
+ else
347
+ # For unknown formats, return raw body wrapped in result
348
+ Result.new(columns: ['result'], types: ['String'], data: [[body]])
349
+ end
350
+ end
351
+
352
+ # Parses JSONCompact format response
353
+ #
354
+ # @param body [String] response body
355
+ # @param sql [String] original SQL
356
+ # @return [Result]
357
+ def parse_json_compact(body, sql)
358
+ data = parse_json_body(body, sql)
359
+ Result.from_json_compact(data)
360
+ end
361
+
362
+ # Parses JSON format response
363
+ #
364
+ # @param body [String] response body
365
+ # @param sql [String] original SQL
366
+ # @return [Result]
367
+ def parse_json(body, sql)
368
+ data = parse_json_body(body, sql)
369
+
370
+ meta = data['meta'] || []
371
+ columns = meta.map { |m| m['name'] }
372
+ types = meta.map { |m| m['type'] }
373
+ rows = data['data'] || []
374
+
375
+ # JSON format returns rows as objects, convert to arrays
376
+ row_arrays = rows.map { |row| columns.map { |col| row[col] } }
377
+
378
+ Result.new(
379
+ columns: columns,
380
+ types: types,
381
+ data: row_arrays,
382
+ statistics: data['statistics'] || {}
383
+ )
384
+ end
385
+
386
+ # Parses JSON body with error handling
387
+ #
388
+ # @param body [String] JSON string
389
+ # @param sql [String] original SQL for error context
390
+ # @return [Hash] parsed JSON
391
+ # @raise [QueryError] if JSON parsing fails
392
+ def parse_json_body(body, sql)
393
+ JSON.parse(body)
394
+ rescue JSON::ParserError => e
395
+ raise QueryError.new(
396
+ "Failed to parse ClickHouse response: #{e.message}",
397
+ sql: truncate_sql(sql),
398
+ original_error: e
399
+ )
400
+ end
401
+
402
+ # Quotes an identifier (table or column name)
403
+ #
404
+ # @param identifier [String] the identifier
405
+ # @return [String] quoted identifier
406
+ def quote_identifier(identifier)
407
+ # ClickHouse uses backticks for identifiers
408
+ "`#{identifier.to_s.gsub('`', '``')}`"
409
+ end
410
+
411
+ # Serializes a Ruby value for JSON insertion
412
+ #
413
+ # @param value [Object] the value
414
+ # @return [Object] JSON-serializable value
415
+ def serialize_value(value)
416
+ case value
417
+ when Time, DateTime
418
+ # ClickHouse expects ISO8601 format for DateTime
419
+ value.strftime('%Y-%m-%d %H:%M:%S')
420
+ when Date
421
+ value.strftime('%Y-%m-%d')
422
+ when BigDecimal
423
+ value.to_f
424
+ when Symbol
425
+ value.to_s
426
+ else
427
+ value
428
+ end
429
+ end
430
+
431
+ # Truncates SQL for error messages
432
+ #
433
+ # @param sql [String] the SQL
434
+ # @param max_length [Integer] maximum length
435
+ # @return [String] truncated SQL
436
+ def truncate_sql(sql, max_length = 1000)
437
+ return sql if sql.length <= max_length
438
+
439
+ "#{sql[0, max_length]}... (truncated)"
440
+ end
441
+
442
+ # Logs a query if logger is configured
443
+ #
444
+ # @param sql [String] the SQL query
445
+ def log_query(sql)
446
+ return unless @logger
447
+
448
+ case @config.log_level
449
+ when :debug
450
+ @logger.debug("[ClickhouseRuby] #{sql}")
451
+ else
452
+ @logger.info("[ClickhouseRuby] Query executed")
453
+ end
454
+ end
455
+
456
+ # Logs an error if logger is configured
457
+ #
458
+ # @param message [String] error message
459
+ # @param code [Integer, nil] ClickHouse error code
460
+ # @param http_status [String] HTTP status
461
+ # @param sql [String] the SQL that failed
462
+ def log_error(message, code, http_status, sql)
463
+ return unless @logger
464
+
465
+ @logger.error(
466
+ "[ClickhouseRuby] ClickHouse error: #{message} " \
467
+ "(code: #{code || 'unknown'}, http: #{http_status}, sql: #{truncate_sql(sql, 200)})"
468
+ )
469
+ end
470
+ end
471
+ end