clickhouse-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +80 -0
  3. data/LICENSE +21 -0
  4. data/README.md +251 -0
  5. data/lib/clickhouse_ruby/active_record/arel_visitor.rb +468 -0
  6. data/lib/clickhouse_ruby/active_record/connection_adapter.rb +723 -0
  7. data/lib/clickhouse_ruby/active_record/railtie.rb +192 -0
  8. data/lib/clickhouse_ruby/active_record/schema_statements.rb +693 -0
  9. data/lib/clickhouse_ruby/active_record.rb +121 -0
  10. data/lib/clickhouse_ruby/client.rb +471 -0
  11. data/lib/clickhouse_ruby/configuration.rb +145 -0
  12. data/lib/clickhouse_ruby/connection.rb +328 -0
  13. data/lib/clickhouse_ruby/connection_pool.rb +301 -0
  14. data/lib/clickhouse_ruby/errors.rb +144 -0
  15. data/lib/clickhouse_ruby/result.rb +189 -0
  16. data/lib/clickhouse_ruby/types/array.rb +183 -0
  17. data/lib/clickhouse_ruby/types/base.rb +77 -0
  18. data/lib/clickhouse_ruby/types/boolean.rb +68 -0
  19. data/lib/clickhouse_ruby/types/date_time.rb +163 -0
  20. data/lib/clickhouse_ruby/types/float.rb +115 -0
  21. data/lib/clickhouse_ruby/types/integer.rb +157 -0
  22. data/lib/clickhouse_ruby/types/low_cardinality.rb +58 -0
  23. data/lib/clickhouse_ruby/types/map.rb +249 -0
  24. data/lib/clickhouse_ruby/types/nullable.rb +73 -0
  25. data/lib/clickhouse_ruby/types/parser.rb +244 -0
  26. data/lib/clickhouse_ruby/types/registry.rb +148 -0
  27. data/lib/clickhouse_ruby/types/string.rb +83 -0
  28. data/lib/clickhouse_ruby/types/tuple.rb +206 -0
  29. data/lib/clickhouse_ruby/types/uuid.rb +84 -0
  30. data/lib/clickhouse_ruby/types.rb +69 -0
  31. data/lib/clickhouse_ruby/version.rb +5 -0
  32. data/lib/clickhouse_ruby.rb +101 -0
  33. metadata +150 -0
@@ -0,0 +1,301 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thread'
4
+ require 'timeout'
5
+
6
+ module ClickhouseRuby
7
+ # Thread-safe connection pool for managing multiple ClickHouse connections
8
+ #
9
+ # Features:
10
+ # - Thread-safe checkout/checkin with mutex
11
+ # - Configurable pool size and timeout
12
+ # - Automatic health checks before returning connections
13
+ # - with_connection block pattern for safe usage
14
+ # - Idle connection cleanup
15
+ #
16
+ # @example Basic usage with block (recommended)
17
+ # pool = ClickhouseRuby::ConnectionPool.new(config)
18
+ # pool.with_connection do |conn|
19
+ # response = conn.post('/query', 'SELECT 1')
20
+ # end
21
+ #
22
+ # @example Manual checkout/checkin (use with caution)
23
+ # conn = pool.checkout
24
+ # begin
25
+ # response = conn.post('/query', 'SELECT 1')
26
+ # ensure
27
+ # pool.checkin(conn)
28
+ # end
29
+ #
30
+ class ConnectionPool
31
+ # @return [Integer] maximum number of connections in the pool
32
+ attr_reader :size
33
+
34
+ # @return [Integer] timeout in seconds when waiting for a connection
35
+ attr_reader :timeout
36
+
37
+ # Creates a new connection pool
38
+ #
39
+ # @param config [Configuration] connection configuration
40
+ # @param size [Integer] maximum pool size (default from config)
41
+ # @param timeout [Integer] wait timeout in seconds (default from config)
42
+ def initialize(config, size: nil, timeout: nil)
43
+ @config = config
44
+ @size = size || config.pool_size
45
+ @timeout = timeout || config.pool_timeout
46
+ @connection_options = config.to_connection_options
47
+
48
+ # Pool state
49
+ @available = [] # Connections available for checkout
50
+ @in_use = [] # Connections currently checked out
51
+ @all_connections = [] # All connections ever created
52
+
53
+ # Synchronization
54
+ @mutex = Mutex.new
55
+ @condition = ConditionVariable.new
56
+
57
+ # Stats
58
+ @total_checkouts = 0
59
+ @total_timeouts = 0
60
+ @created_at = Time.now
61
+ end
62
+
63
+ # Executes a block with a checked-out connection
64
+ #
65
+ # This is the recommended way to use the pool. The connection is
66
+ # automatically returned to the pool when the block completes,
67
+ # even if an exception is raised.
68
+ #
69
+ # @yield [Connection] the checked-out connection
70
+ # @return [Object] the block's return value
71
+ # @raise [PoolTimeout] if no connection becomes available
72
+ def with_connection
73
+ conn = checkout
74
+ begin
75
+ yield conn
76
+ ensure
77
+ checkin(conn)
78
+ end
79
+ end
80
+
81
+ # Checks out a connection from the pool
82
+ #
83
+ # If no connections are available and the pool is at capacity,
84
+ # waits up to @timeout seconds for one to become available.
85
+ #
86
+ # @return [Connection] a healthy connection
87
+ # @raise [PoolTimeout] if no connection available within timeout
88
+ # @raise [PoolExhausted] if pool is exhausted and timeout is 0
89
+ def checkout
90
+ deadline = Time.now + @timeout
91
+
92
+ @mutex.synchronize do
93
+ loop do
94
+ # Try to get an available connection
95
+ if (conn = get_available_connection)
96
+ @in_use << conn
97
+ @total_checkouts += 1
98
+ return conn
99
+ end
100
+
101
+ # Try to create a new connection if under capacity
102
+ if @all_connections.size < @size
103
+ conn = create_connection
104
+ @in_use << conn
105
+ @total_checkouts += 1
106
+ return conn
107
+ end
108
+
109
+ # Wait for a connection to be returned
110
+ remaining = deadline - Time.now
111
+ if remaining <= 0
112
+ @total_timeouts += 1
113
+ raise PoolTimeout.new(
114
+ "Could not obtain a connection from the pool within #{@timeout} seconds " \
115
+ "(pool size: #{@size}, in use: #{@in_use.size})"
116
+ )
117
+ end
118
+
119
+ @condition.wait(@mutex, remaining)
120
+ end
121
+ end
122
+ end
123
+
124
+ # Returns a connection to the pool
125
+ #
126
+ # @param connection [Connection] the connection to return
127
+ # @return [void]
128
+ def checkin(connection)
129
+ return unless connection
130
+
131
+ @mutex.synchronize do
132
+ @in_use.delete(connection)
133
+
134
+ # Only return healthy connections to the available pool
135
+ if connection.healthy? && !connection.stale?
136
+ @available << connection
137
+ else
138
+ # Disconnect unhealthy connections
139
+ connection.disconnect rescue nil
140
+ @all_connections.delete(connection)
141
+ end
142
+
143
+ @condition.signal
144
+ end
145
+ end
146
+
147
+ # Returns the number of currently available connections
148
+ #
149
+ # @return [Integer]
150
+ def available_count
151
+ @mutex.synchronize { @available.size }
152
+ end
153
+
154
+ # Returns the number of connections currently in use
155
+ #
156
+ # @return [Integer]
157
+ def in_use_count
158
+ @mutex.synchronize { @in_use.size }
159
+ end
160
+
161
+ # Returns the total number of connections (available + in use)
162
+ #
163
+ # @return [Integer]
164
+ def total_count
165
+ @mutex.synchronize { @all_connections.size }
166
+ end
167
+
168
+ # Checks if all connections are currently in use
169
+ #
170
+ # @return [Boolean]
171
+ def exhausted?
172
+ @mutex.synchronize do
173
+ @available.empty? && @all_connections.size >= @size
174
+ end
175
+ end
176
+
177
+ # Closes all connections and resets the pool
178
+ #
179
+ # This should be called when shutting down the application.
180
+ #
181
+ # @return [void]
182
+ def shutdown
183
+ @mutex.synchronize do
184
+ (@available + @in_use).each do |conn|
185
+ conn.disconnect rescue nil
186
+ end
187
+
188
+ @available.clear
189
+ @in_use.clear
190
+ @all_connections.clear
191
+ end
192
+ end
193
+
194
+ # Removes idle/unhealthy connections from the pool
195
+ #
196
+ # @param max_idle_seconds [Integer] maximum idle time before removal
197
+ # @return [Integer] number of connections removed
198
+ def cleanup(max_idle_seconds = 300)
199
+ removed = 0
200
+
201
+ @mutex.synchronize do
202
+ @available.reject! do |conn|
203
+ if conn.stale?(max_idle_seconds) || !conn.healthy?
204
+ conn.disconnect rescue nil
205
+ @all_connections.delete(conn)
206
+ removed += 1
207
+ true
208
+ else
209
+ false
210
+ end
211
+ end
212
+ end
213
+
214
+ removed
215
+ end
216
+
217
+ # Pings all available connections to check health
218
+ #
219
+ # @return [Hash] status report
220
+ def health_check
221
+ @mutex.synchronize do
222
+ healthy = 0
223
+ unhealthy = 0
224
+
225
+ @available.each do |conn|
226
+ if conn.ping
227
+ healthy += 1
228
+ else
229
+ unhealthy += 1
230
+ end
231
+ end
232
+
233
+ {
234
+ available: @available.size,
235
+ in_use: @in_use.size,
236
+ total: @all_connections.size,
237
+ capacity: @size,
238
+ healthy: healthy,
239
+ unhealthy: unhealthy
240
+ }
241
+ end
242
+ end
243
+
244
+ # Returns pool statistics
245
+ #
246
+ # @return [Hash] pool statistics
247
+ def stats
248
+ @mutex.synchronize do
249
+ {
250
+ size: @size,
251
+ available: @available.size,
252
+ in_use: @in_use.size,
253
+ total_connections: @all_connections.size,
254
+ total_checkouts: @total_checkouts,
255
+ total_timeouts: @total_timeouts,
256
+ uptime_seconds: Time.now - @created_at
257
+ }
258
+ end
259
+ end
260
+
261
+ # Returns a string representation of the pool
262
+ #
263
+ # @return [String]
264
+ def inspect
265
+ @mutex.synchronize do
266
+ "#<#{self.class.name} size=#{@size} available=#{@available.size} in_use=#{@in_use.size}>"
267
+ end
268
+ end
269
+
270
+ private
271
+
272
+ # Gets an available connection from the pool
273
+ #
274
+ # @return [Connection, nil] a healthy connection or nil
275
+ def get_available_connection
276
+ while (conn = @available.pop)
277
+ # Verify the connection is still healthy
278
+ if conn.healthy? && !conn.stale?
279
+ return conn
280
+ else
281
+ # Remove unhealthy connections
282
+ conn.disconnect rescue nil
283
+ @all_connections.delete(conn)
284
+ end
285
+ end
286
+
287
+ nil
288
+ end
289
+
290
+ # Creates a new connection
291
+ #
292
+ # @return [Connection] the new connection
293
+ # @raise [ConnectionNotEstablished] if connection fails
294
+ def create_connection
295
+ conn = Connection.new(**@connection_options)
296
+ conn.connect
297
+ @all_connections << conn
298
+ conn
299
+ end
300
+ end
301
+ end
@@ -0,0 +1,144 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickhouseRuby
4
+ # Base error class for all ClickhouseRuby errors
5
+ # All errors include context information to aid debugging
6
+ class Error < StandardError
7
+ # @return [Exception, nil] the original exception that caused this error
8
+ attr_reader :original_error
9
+
10
+ # @param message [String] the error message
11
+ # @param original_error [Exception, nil] the underlying exception
12
+ def initialize(message = nil, original_error: nil)
13
+ @original_error = original_error
14
+ super(message)
15
+ end
16
+ end
17
+
18
+ # Connection-related errors
19
+ # Raised when there are issues establishing or maintaining a connection
20
+ class ConnectionError < Error; end
21
+
22
+ # Raised when a connection cannot be established
23
+ # Common causes: wrong host/port, network issues, authentication failure
24
+ class ConnectionNotEstablished < ConnectionError; end
25
+
26
+ # Raised when a connection or query times out
27
+ class ConnectionTimeout < ConnectionError; end
28
+
29
+ # Raised for SSL/TLS related errors
30
+ # Common causes: certificate verification failure, SSL protocol mismatch
31
+ class SSLError < ConnectionError; end
32
+
33
+ # Query execution errors
34
+ # Raised when there are issues executing a query
35
+ class QueryError < Error
36
+ # @return [Integer, nil] ClickHouse error code
37
+ attr_reader :code
38
+
39
+ # @return [String, nil] HTTP status code from the response
40
+ attr_reader :http_status
41
+
42
+ # @return [String, nil] the SQL that caused the error
43
+ attr_reader :sql
44
+
45
+ # @param message [String] the error message
46
+ # @param code [Integer, nil] ClickHouse error code
47
+ # @param http_status [String, nil] HTTP response status
48
+ # @param sql [String, nil] the SQL query that failed
49
+ # @param original_error [Exception, nil] the underlying exception
50
+ def initialize(message = nil, code: nil, http_status: nil, sql: nil, original_error: nil)
51
+ @code = code
52
+ @http_status = http_status
53
+ @sql = sql
54
+ super(message, original_error: original_error)
55
+ end
56
+
57
+ # Returns a detailed error message including context
58
+ #
59
+ # @return [String] the detailed error message
60
+ def detailed_message
61
+ parts = [message]
62
+ parts << "Code: #{code}" if code
63
+ parts << "HTTP Status: #{http_status}" if http_status
64
+ parts << "SQL: #{sql}" if sql
65
+ parts.join(' | ')
66
+ end
67
+ end
68
+
69
+ # Raised for SQL syntax errors
70
+ class SyntaxError < QueryError; end
71
+
72
+ # Raised when a query is invalid (e.g., unknown table, column)
73
+ class StatementInvalid < QueryError; end
74
+
75
+ # Raised when a query exceeds its time limit
76
+ class QueryTimeout < QueryError; end
77
+
78
+ # Raised when a table doesn't exist
79
+ class UnknownTable < QueryError; end
80
+
81
+ # Raised when a column doesn't exist
82
+ class UnknownColumn < QueryError; end
83
+
84
+ # Raised when a database doesn't exist
85
+ class UnknownDatabase < QueryError; end
86
+
87
+ # Type conversion errors
88
+ # Raised when there are issues converting between Ruby and ClickHouse types
89
+ class TypeCastError < Error
90
+ # @return [String, nil] the source type
91
+ attr_reader :from_type
92
+
93
+ # @return [String, nil] the target type
94
+ attr_reader :to_type
95
+
96
+ # @return [Object, nil] the value that couldn't be converted
97
+ attr_reader :value
98
+
99
+ # @param message [String] the error message
100
+ # @param from_type [String, nil] the source type
101
+ # @param to_type [String, nil] the target type
102
+ # @param value [Object, nil] the value that failed conversion
103
+ def initialize(message = nil, from_type: nil, to_type: nil, value: nil)
104
+ @from_type = from_type
105
+ @to_type = to_type
106
+ @value = value
107
+ super(message)
108
+ end
109
+ end
110
+
111
+ # Configuration errors
112
+ # Raised when there are issues with configuration
113
+ class ConfigurationError < Error; end
114
+
115
+ # Pool errors
116
+ # Raised when there are issues with the connection pool
117
+ class PoolError < Error; end
118
+
119
+ # Raised when no connections are available in the pool
120
+ class PoolExhausted < PoolError; end
121
+
122
+ # Raised when waiting for a connection times out
123
+ class PoolTimeout < PoolError; end
124
+
125
+ # Maps ClickHouse error codes to exception classes
126
+ # See: https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ErrorCodes.cpp
127
+ ERROR_CODE_MAPPING = {
128
+ 60 => UnknownTable, # UNKNOWN_TABLE
129
+ 16 => UnknownColumn, # NO_SUCH_COLUMN_IN_TABLE
130
+ 81 => UnknownDatabase, # UNKNOWN_DATABASE
131
+ 62 => SyntaxError, # SYNTAX_ERROR
132
+ 159 => QueryTimeout, # TIMEOUT_EXCEEDED
133
+ }.freeze
134
+
135
+ class << self
136
+ # Maps a ClickHouse error code to the appropriate exception class
137
+ #
138
+ # @param code [Integer] the ClickHouse error code
139
+ # @return [Class] the exception class to use
140
+ def error_class_for_code(code)
141
+ ERROR_CODE_MAPPING.fetch(code, QueryError)
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClickhouseRuby
4
+ # Query result container that provides access to columns, types, and rows
5
+ #
6
+ # Result implements Enumerable for easy iteration over rows.
7
+ # Each row is returned as a Hash with column names as keys and
8
+ # properly deserialized values.
9
+ #
10
+ # @example Iterating over results
11
+ # result = client.execute('SELECT id, name FROM users')
12
+ # result.columns # => ['id', 'name']
13
+ # result.types # => ['UInt64', 'String']
14
+ #
15
+ # result.each do |row|
16
+ # puts "#{row['id']}: #{row['name']}"
17
+ # end
18
+ #
19
+ # @example Array-like access
20
+ # result.rows[0] # => { 'id' => 1, 'name' => 'Alice' }
21
+ # result.first # => { 'id' => 1, 'name' => 'Alice' }
22
+ # result.to_a # => [{ 'id' => 1, 'name' => 'Alice' }, ...]
23
+ #
24
+ # @example Metadata access
25
+ # result.count # => 100
26
+ # result.empty? # => false
27
+ # result.elapsed_time # => 0.023 (seconds)
28
+ #
29
+ class Result
30
+ include Enumerable
31
+
32
+ # @return [Array<String>] column names in order
33
+ attr_reader :columns
34
+
35
+ # @return [Array<String>] ClickHouse type strings for each column
36
+ attr_reader :types
37
+
38
+ # @return [Array<Hash>] rows as hashes with column names as keys
39
+ attr_reader :rows
40
+
41
+ # @return [Float, nil] query execution time in seconds (from ClickHouse)
42
+ attr_reader :elapsed_time
43
+
44
+ # @return [Integer, nil] number of rows read by ClickHouse
45
+ attr_reader :rows_read
46
+
47
+ # @return [Integer, nil] bytes read by ClickHouse
48
+ attr_reader :bytes_read
49
+
50
+ # Creates a new Result from parsed response data
51
+ #
52
+ # @param columns [Array<String>] column names
53
+ # @param types [Array<String>] ClickHouse type strings
54
+ # @param data [Array<Array>] raw row data (values in column order)
55
+ # @param statistics [Hash] optional query statistics from ClickHouse
56
+ # @param deserialize [Boolean] whether to deserialize values using type system
57
+ def initialize(columns:, types:, data:, statistics: {}, deserialize: true)
58
+ @columns = columns.freeze
59
+ @types = types.freeze
60
+ @elapsed_time = statistics['elapsed']
61
+ @rows_read = statistics['rows_read']
62
+ @bytes_read = statistics['bytes_read']
63
+
64
+ # Build type instances for deserialization
65
+ @type_instances = if deserialize
66
+ types.map { |t| Types.lookup(t) }
67
+ else
68
+ nil
69
+ end
70
+
71
+ # Convert raw data to row hashes
72
+ @rows = build_rows(data).freeze
73
+ end
74
+
75
+ # Iterates over each row
76
+ #
77
+ # @yield [Hash] each row as a hash
78
+ # @return [Enumerator] if no block given
79
+ def each(&block)
80
+ @rows.each(&block)
81
+ end
82
+
83
+ # Returns the number of rows
84
+ #
85
+ # @return [Integer] row count
86
+ def count
87
+ @rows.length
88
+ end
89
+ alias size count
90
+ alias length count
91
+
92
+ # Returns whether there are no rows
93
+ #
94
+ # @return [Boolean] true if no rows
95
+ def empty?
96
+ @rows.empty?
97
+ end
98
+
99
+ # Returns the first row
100
+ #
101
+ # @return [Hash, nil] the first row or nil
102
+ def first
103
+ @rows.first
104
+ end
105
+
106
+ # Returns the last row
107
+ #
108
+ # @return [Hash, nil] the last row or nil
109
+ def last
110
+ @rows.last
111
+ end
112
+
113
+ # Access a row by index
114
+ #
115
+ # @param index [Integer] row index
116
+ # @return [Hash, nil] the row or nil
117
+ def [](index)
118
+ @rows[index]
119
+ end
120
+
121
+ # Returns a specific column's values across all rows
122
+ #
123
+ # @param column_name [String] the column name
124
+ # @return [Array] values for that column
125
+ # @raise [ArgumentError] if column doesn't exist
126
+ def column_values(column_name)
127
+ index = @columns.index(column_name)
128
+ raise ArgumentError, "Unknown column: #{column_name}" if index.nil?
129
+
130
+ @rows.map { |row| row[column_name] }
131
+ end
132
+
133
+ # Returns column names mapped to their types
134
+ #
135
+ # @return [Hash<String, String>] column name => type mapping
136
+ def column_types
137
+ @columns.zip(@types).to_h
138
+ end
139
+
140
+ # Creates an empty result (for commands that don't return data)
141
+ #
142
+ # @return [Result] an empty result
143
+ def self.empty
144
+ new(columns: [], types: [], data: [])
145
+ end
146
+
147
+ # Creates a result from JSONCompact format response
148
+ #
149
+ # @param response_data [Hash] parsed JSON response
150
+ # @return [Result] the result
151
+ def self.from_json_compact(response_data)
152
+ meta = response_data['meta'] || []
153
+ columns = meta.map { |m| m['name'] }
154
+ types = meta.map { |m| m['type'] }
155
+ data = response_data['data'] || []
156
+ statistics = response_data['statistics'] || {}
157
+
158
+ new(columns: columns, types: types, data: data, statistics: statistics)
159
+ end
160
+
161
+ # Returns a human-readable string representation
162
+ #
163
+ # @return [String] string representation
164
+ def inspect
165
+ "#<#{self.class.name} columns=#{@columns.inspect} rows=#{count}>"
166
+ end
167
+
168
+ private
169
+
170
+ # Builds row hashes from raw data
171
+ #
172
+ # @param data [Array<Array>] raw row data
173
+ # @return [Array<Hash>] rows as hashes
174
+ def build_rows(data)
175
+ data.map do |row_values|
176
+ row = {}
177
+ @columns.each_with_index do |col, i|
178
+ value = row_values[i]
179
+ # Deserialize if we have type instances
180
+ if @type_instances
181
+ value = @type_instances[i].deserialize(value)
182
+ end
183
+ row[col] = value
184
+ end
185
+ row
186
+ end
187
+ end
188
+ end
189
+ end