clickhouse-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: a02afdcdcdd890944f581300c9ff98218ec8b7458d7dee112d35d8c0a1ce947b
4
+ data.tar.gz: d09c320b71796475a012155a2d9b8b8b845e638c947e7e9551f22bd6fcbb1288
5
+ SHA512:
6
+ metadata.gz: 9882fa7f3d7e3727922536b05523405678806ad3b57be3094bddfa4e6430fb10ae86148b05582a6e648efa662a15cea7f78f8bc6fc2064da0427d15dc4b95787
7
+ data.tar.gz: 0e4ff78680cdedd9ef3271b84cd4bf1b1550eaf27199644f4b64a30a1fad1bb30429d98f39e270630fc201505d094abf11e09194a4b527ce3e9eac816a3ab8b4
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.standard.yml ADDED
@@ -0,0 +1,3 @@
1
+ # For available configuration options, see:
2
+ # https://github.com/standardrb/standard
3
+ ruby_version: 3.4
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2025-12-28
4
+
5
+ - Initial release
data/README.md ADDED
@@ -0,0 +1,183 @@
1
+ # clickhouse-rb
2
+
3
+ Fast Ruby client for ClickHouse database using the Native binary format for efficient data transfer.
4
+
5
+ ## Features
6
+
7
+ - Native binary format parsing (faster than JSON/TSV)
8
+ - Persistent HTTP connections
9
+ - Connection pooling for thread-safe concurrent access
10
+ - Supports all common ClickHouse data types
11
+
12
+ ## Installation
13
+
14
+ Add to your Gemfile:
15
+
16
+ ```ruby
17
+ gem "clickhouse-rb"
18
+ ```
19
+
20
+ Then run:
21
+
22
+ ```bash
23
+ bundle install
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ ### Configuration
29
+
30
+ ```ruby
31
+ require "clickhouse"
32
+
33
+ Clickhouse.configure do |config|
34
+ config.host = "localhost"
35
+ config.port = 8123
36
+ config.database = "default"
37
+ config.username = "default"
38
+ config.password = ""
39
+ end
40
+ ```
41
+
42
+ Or configure via URL:
43
+
44
+ ```ruby
45
+ Clickhouse.configure do |config|
46
+ config.url = "http://user:pass@localhost:8123/mydb"
47
+ end
48
+ ```
49
+
50
+ ### Single Connection
51
+
52
+ ```ruby
53
+ conn = Clickhouse::Connection.new
54
+ response = conn.query("SELECT * FROM users WHERE id = 1")
55
+
56
+ if response.success?
57
+ response.rows.each do |row|
58
+ puts row.inspect
59
+ end
60
+ end
61
+ ```
62
+
63
+ ### Connection Pool
64
+
65
+ For multi-threaded applications:
66
+
67
+ ```ruby
68
+ pool = Clickhouse::Pool.new
69
+
70
+ # Thread-safe queries
71
+ threads = 10.times.map do
72
+ Thread.new { pool.query("SELECT 1") }
73
+ end
74
+ threads.each(&:join)
75
+ ```
76
+
77
+ Pool size and timeout are configured globally:
78
+
79
+ ```ruby
80
+ Clickhouse.configure do |config|
81
+ config.pool_size = 10
82
+ config.pool_timeout = 5
83
+ end
84
+ ```
85
+
86
+ ### Working with Results
87
+
88
+ ```ruby
89
+ response = conn.query("SELECT id, name, created_at FROM users")
90
+
91
+ # Access raw rows (arrays)
92
+ response.rows # => [[1, "Alice", 2024-01-01 00:00:00 UTC], ...]
93
+ response.columns # => ["id", "name", "created_at"]
94
+ response.types # => ["UInt64", "String", "DateTime"]
95
+
96
+ # Convert to array of hashes
97
+ response.to_a # => [{"id" => 1, "name" => "Alice", ...}, ...]
98
+
99
+ # Check for errors
100
+ response.success? # => true
101
+ response.failure? # => false
102
+ response.error # => nil (or error message string)
103
+
104
+ # Query summary from ClickHouse
105
+ response.summary # => {"read_rows" => "1", "read_bytes" => "42", ...}
106
+ ```
107
+
108
+ ### Query Parameters
109
+
110
+ ```ruby
111
+ response = conn.query(
112
+ "SELECT * FROM users WHERE id = {id:UInt64}",
113
+ params: { param_id: 123 }
114
+ )
115
+ ```
116
+
117
+ ## Supported Data Types
118
+
119
+ | ClickHouse Type | Ruby Type |
120
+ |-----------------|-----------|
121
+ | UInt8/16/32/64 | Integer |
122
+ | UInt128/256 | Integer |
123
+ | Int8/16/32/64 | Integer |
124
+ | Int128/256 | Integer |
125
+ | Float32/64 | Float |
126
+ | Decimal | BigDecimal |
127
+ | Bool | TrueClass/FalseClass |
128
+ | String, FixedString | String |
129
+ | Date, Date32 | Date |
130
+ | DateTime, DateTime64 | Time |
131
+ | UUID | String |
132
+ | IPv4, IPv6 | IPAddr |
133
+ | Enum8, Enum16 | Integer |
134
+ | Array | Array |
135
+ | Tuple | Array |
136
+ | Map | Hash |
137
+ | Nullable | nil or inner type |
138
+ | LowCardinality | inner type |
139
+
140
+ ## Configuration Options
141
+
142
+ | Option | Default | Description |
143
+ |--------|---------|-------------|
144
+ | `scheme` | `"http"` | URL scheme (http/https) |
145
+ | `host` | `"localhost"` | ClickHouse server host |
146
+ | `port` | `8123` | ClickHouse HTTP port |
147
+ | `database` | `"default"` | Database name |
148
+ | `username` | `""` | Authentication username |
149
+ | `password` | `""` | Authentication password |
150
+ | `connection_timeout` | `5` | Connection timeout in seconds |
151
+ | `pool_size` | `100` | Connection pool size |
152
+ | `pool_timeout` | `5` | Pool checkout timeout in seconds |
153
+
154
+ ## Error Handling
155
+
156
+ ```ruby
157
+ response = conn.query("INVALID SQL")
158
+
159
+ if response.failure?
160
+ puts "Query failed: #{response.error}"
161
+ end
162
+
163
+ # Unsupported types raise an exception
164
+ begin
165
+ conn.query("SELECT '{}'::JSON")
166
+ rescue Clickhouse::UnsupportedTypeError => e
167
+ puts "Unsupported type: #{e.message}"
168
+ end
169
+ ```
170
+
171
+ ## Development
172
+
173
+ ```bash
174
+ # Run tests (requires ClickHouse)
175
+ CLICKHOUSE_URL=http://default:password@localhost:8123/default bundle exec rspec
176
+
177
+ # Run linter
178
+ bundle exec standardrb
179
+ ```
180
+
181
+ ## License
182
+
183
+ MIT License
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Clickhouse
4
+ # Buffered binary reader for streaming IO.
5
+ # @api private
6
+ class BufferedReader
7
+ # Creates a new buffered reader.
8
+ #
9
+ # @param io [#readpartial] IO object supporting readpartial
10
+ def initialize(io)
11
+ @io = io
12
+ @buffer = String.new(encoding: Encoding::BINARY)
13
+ @eof = false
14
+ end
15
+
16
+ # Returns true if at end of stream.
17
+ #
18
+ # @return [Boolean]
19
+ def eof?
20
+ fill if @buffer.empty?
21
+ @buffer.empty?
22
+ end
23
+
24
+ # Reads exactly n bytes from the stream.
25
+ #
26
+ # @param n [Integer] number of bytes to read
27
+ # @return [String] binary string of n bytes
28
+ def read(n)
29
+ fill until @buffer.bytesize >= n
30
+ @buffer.slice!(0, n)
31
+ end
32
+
33
+ # Reads a single byte from the stream.
34
+ #
35
+ # @return [Integer, nil] byte value (0-255) or nil if at EOF
36
+ def read_byte
37
+ fill if @buffer.empty?
38
+ return if @buffer.empty?
39
+
40
+ @buffer.slice!(0, 1).ord
41
+ end
42
+
43
+ # Drains remaining data from the stream.
44
+ #
45
+ # @return [void]
46
+ def flush
47
+ @buffer.clear
48
+ nil while @io.readpartial
49
+ end
50
+
51
+ private
52
+
53
+ def fill
54
+ return if @eof
55
+
56
+ loop do
57
+ chunk = @io.readpartial
58
+ if chunk.nil?
59
+ @eof = true
60
+ break
61
+ elsif !chunk.empty?
62
+ @buffer << chunk
63
+ break
64
+ end
65
+ # Empty chunk - keep reading (gzip header processing)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module Clickhouse
6
+ # Configuration for ClickHouse connection.
7
+ #
8
+ # @example
9
+ # config = Clickhouse::Config.new(host: "db.example.com", port: 9000)
10
+ #
11
+ # @example Using URL
12
+ # config = Clickhouse::Config.new
13
+ # config.url = "http://user:pass@localhost:8123/mydb"
14
+ class Config
15
+ DEFAULTS = {
16
+ scheme: "http",
17
+ host: "localhost",
18
+ port: 8123,
19
+ database: "default",
20
+ username: "",
21
+ password: "",
22
+ connection_timeout: 5,
23
+ pool_size: 100,
24
+ pool_timeout: 5
25
+ }.freeze
26
+
27
+ # @return [String] URL scheme (http or https)
28
+ # @return [String] ClickHouse server hostname
29
+ # @return [Integer] ClickHouse server port
30
+ # @return [String] Database name
31
+ # @return [String] Username for authentication
32
+ # @return [String] Password for authentication
33
+ # @return [Integer] Connection timeout in seconds
34
+ # @return [Integer] Connection pool size
35
+ # @return [Integer] Pool checkout timeout in seconds
36
+ attr_accessor :scheme, :host, :port, :database, :username, :password, :connection_timeout, :pool_size, :pool_timeout
37
+
38
+ # Creates a new configuration instance.
39
+ #
40
+ # @param params [Hash] configuration options
41
+ # @option params [String] :scheme URL scheme (default: "http")
42
+ # @option params [String] :host server hostname (default: "localhost")
43
+ # @option params [Integer] :port server port (default: 8123)
44
+ # @option params [String] :database database name (default: "default")
45
+ # @option params [String] :username authentication username (default: "")
46
+ # @option params [String] :password authentication password (default: "")
47
+ # @option params [Integer] :connection_timeout timeout in seconds (default: 5)
48
+ # @option params [Integer] :pool_size connection pool size (default: 100)
49
+ # @option params [Integer] :pool_timeout pool checkout timeout (default: 5)
50
+ def initialize(params = {})
51
+ DEFAULTS.merge(params).each do |key, value|
52
+ send("#{key}=", value)
53
+ end
54
+ end
55
+
56
+ # Sets configuration from a URL string.
57
+ #
58
+ # @param url [String] ClickHouse connection URL
59
+ # @return [void]
60
+ def url=(url)
61
+ uri = URI(url)
62
+ @scheme = uri.scheme
63
+ @host = uri.host
64
+ @port = uri.port
65
+ @database = uri.path.delete_prefix("/")
66
+ @username = uri.user
67
+ @password = uri.password
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Clickhouse
4
+ # A single connection to ClickHouse server.
5
+ #
6
+ # @example
7
+ # conn = Clickhouse::Connection.new
8
+ # response = conn.query("SELECT * FROM users WHERE id = 1")
9
+ class Connection
10
+ # @return [Config] the configuration used by this connection
11
+ attr_reader :config
12
+
13
+ # Creates a new connection.
14
+ #
15
+ # @param config [Config] configuration instance (defaults to global config)
16
+ def initialize(config = Clickhouse.config)
17
+ @config = config
18
+ @transport = HttpTransport.new(config)
19
+ end
20
+
21
+ # Executes a SQL query and returns the response.
22
+ #
23
+ # @param sql [String] SQL query to execute
24
+ # @param options [Hash] query options
25
+ # @option options [Hash] :params query parameters
26
+ # @return [Response] query response with rows, columns, and metadata
27
+ def query(sql, options = {})
28
+ result = @transport.execute(sql, options)
29
+ return Response.new(error: result.error, summary: result.summary) unless result.success
30
+
31
+ NativeFormatParser.new(result.body).parse.with(summary: result.summary)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "http"
4
+ require "json"
5
+
6
+ module Clickhouse
7
+ # HTTP transport layer for ClickHouse communication.
8
+ # @api private
9
+ class HttpTransport
10
+ # Creates a new HTTP transport.
11
+ #
12
+ # @param config [Config] configuration instance
13
+ def initialize(config)
14
+ @config = config
15
+ @http_client = HTTP.persistent("#{config.scheme}://#{config.host}:#{config.port}")
16
+ .use(:auto_deflate)
17
+ .use(:auto_inflate)
18
+ .timeout(connect: config.connection_timeout)
19
+
20
+ @default_headers = {
21
+ "Accept-Encoding" => "gzip",
22
+ "X-ClickHouse-User" => config.username,
23
+ "X-ClickHouse-Key" => config.password,
24
+ "X-ClickHouse-Format" => "Native"
25
+ }
26
+ end
27
+
28
+ # Executes a SQL query via HTTP.
29
+ #
30
+ # @param sql [String] SQL query to execute
31
+ # @param options [Hash] query options
32
+ # @option options [Hash] :params query parameters
33
+ # @return [TransportResult] result containing body or error
34
+ def execute(sql, options = {})
35
+ query_params = {database: @config.database}.merge(options[:params] || {})
36
+ response = @http_client.post("/", params: query_params, body: sql, headers: @default_headers)
37
+
38
+ summary = JSON.parse(response.headers["X-ClickHouse-Summary"])
39
+
40
+ if response.status.success?
41
+ TransportResult.new(success: true, body: response.body, error: nil, summary: summary)
42
+ else
43
+ TransportResult.new(success: false, body: nil, error: response.body.to_s, summary: summary)
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,378 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bigdecimal"
4
+ require "ipaddr"
5
+
6
+ module Clickhouse
7
+ # Parser for ClickHouse Native binary format.
8
+ # @api private
9
+ class NativeFormatParser
10
+ DATE_EPOCH = Date.new(1970, 1, 1)
11
+
12
+ # Creates a new parser.
13
+ #
14
+ # @param body [#readpartial] response body to parse
15
+ def initialize(body)
16
+ @body = body
17
+ @columns = []
18
+ @types = []
19
+ @rows = []
20
+ end
21
+
22
+ # Parses the response body and returns a Response.
23
+ #
24
+ # @return [Response] parsed response with columns, types, and rows
25
+ # @raise [UnsupportedTypeError] if an unsupported data type is encountered
26
+ def parse
27
+ @reader = BufferedReader.new(@body)
28
+ parse_block until @reader.eof?
29
+ Response.new(columns: @columns, types: @types, rows: @rows)
30
+ ensure
31
+ @reader.flush
32
+ end
33
+
34
+ private
35
+
36
+ def parse_block
37
+ num_columns = read_varint
38
+ num_rows = read_varint
39
+
40
+ return if num_columns == 0 && num_rows == 0
41
+
42
+ columns_data = []
43
+
44
+ num_columns.times do
45
+ col_name = read_string
46
+ col_type = read_string
47
+
48
+ if @columns.length < num_columns
49
+ @columns << col_name
50
+ @types << col_type
51
+ end
52
+
53
+ columns_data << read_column(col_type, num_rows)
54
+ end
55
+
56
+ num_rows.times do |row_idx|
57
+ @rows << columns_data.map { |col| col[row_idx] }
58
+ end
59
+ end
60
+
61
+ def read_column(type, num_rows)
62
+ case type
63
+ # Integers
64
+ when "UInt8" then read_uint8_column(num_rows)
65
+ when "UInt16" then read_uint16_column(num_rows)
66
+ when "UInt32" then read_uint32_column(num_rows)
67
+ when "UInt64" then read_uint64_column(num_rows)
68
+ when "UInt128" then Array.new(num_rows) { read_uint128 }
69
+ when "UInt256" then Array.new(num_rows) { read_uint256 }
70
+ when "Int8" then read_int8_column(num_rows)
71
+ when "Int16" then read_int16_column(num_rows)
72
+ when "Int32" then read_int32_column(num_rows)
73
+ when "Int64" then read_int64_column(num_rows)
74
+ when "Int128" then Array.new(num_rows) { read_int128 }
75
+ when "Int256" then Array.new(num_rows) { read_int256 }
76
+
77
+ # Floats
78
+ when "Float32" then read_float32_column(num_rows)
79
+ when "Float64" then read_float64_column(num_rows)
80
+
81
+ # Boolean
82
+ when "Bool" then read_bool_column(num_rows)
83
+
84
+ # Strings
85
+ when "String" then Array.new(num_rows) { read_string }
86
+ when /^FixedString\((\d+)\)$/ then read_fixed_string_column($1.to_i, num_rows)
87
+
88
+ # Dates and Times
89
+ when "Date" then read_date_column(num_rows)
90
+ when "Date32" then read_date32_column(num_rows)
91
+ when "DateTime", /^DateTime\(.+\)$/ then read_datetime_column(num_rows)
92
+ when /^DateTime64\((\d+)(?:,.*)?\)$/ then read_datetime64_column($1.to_i, num_rows)
93
+
94
+ # UUID
95
+ when "UUID" then Array.new(num_rows) { read_uuid }
96
+
97
+ # IP addresses
98
+ when "IPv4" then Array.new(num_rows) { read_ipv4 }
99
+ when "IPv6" then Array.new(num_rows) { read_ipv6 }
100
+
101
+ # Decimals - ClickHouse always returns Decimal(precision, scale)
102
+ when /^Decimal\((\d+),\s*(\d+)\)$/ then read_decimal_column($1.to_i, $2.to_i, num_rows)
103
+
104
+ # Enums (stored as signed integers)
105
+ when /^Enum8\(.+\)$/ then read_int8_column(num_rows)
106
+ when /^Enum16\(.+\)$/ then read_int16_column(num_rows)
107
+
108
+ # Nullable
109
+ when /^Nullable\((.+)\)$/ then read_nullable_column($1, num_rows)
110
+
111
+ # LowCardinality
112
+ when /^LowCardinality\((.+)\)$/ then read_low_cardinality_column($1, num_rows)
113
+
114
+ # Arrays
115
+ when /^Array\((.+)\)$/ then read_array_column($1, num_rows)
116
+
117
+ # Tuples
118
+ when /^Tuple\((.*)\)$/ then read_tuple_column(parse_tuple_types($1), num_rows)
119
+
120
+ # Maps
121
+ when /^Map\((.+)\)$/
122
+ types = parse_tuple_types($1)
123
+ read_map_column(types[0], types[1], num_rows)
124
+
125
+ else
126
+ raise UnsupportedTypeError, "Unsupported column type: #{type}"
127
+ end
128
+ end
129
+
130
+ # --- Bulk Column Readers ---
131
+
132
+ def read_uint8_column(num_rows)
133
+ @reader.read(num_rows).bytes
134
+ end
135
+
136
+ def read_uint16_column(num_rows)
137
+ @reader.read(num_rows * 2).unpack("v*")
138
+ end
139
+
140
+ def read_uint32_column(num_rows)
141
+ @reader.read(num_rows * 4).unpack("V*")
142
+ end
143
+
144
+ def read_uint64_column(num_rows)
145
+ @reader.read(num_rows * 8).unpack("Q<*")
146
+ end
147
+
148
+ def read_int8_column(num_rows)
149
+ @reader.read(num_rows).unpack("c*")
150
+ end
151
+
152
+ def read_int16_column(num_rows)
153
+ @reader.read(num_rows * 2).unpack("s<*")
154
+ end
155
+
156
+ def read_int32_column(num_rows)
157
+ @reader.read(num_rows * 4).unpack("l<*")
158
+ end
159
+
160
+ def read_int64_column(num_rows)
161
+ @reader.read(num_rows * 8).unpack("q<*")
162
+ end
163
+
164
+ def read_float32_column(num_rows)
165
+ @reader.read(num_rows * 4).unpack("e*")
166
+ end
167
+
168
+ def read_float64_column(num_rows)
169
+ @reader.read(num_rows * 8).unpack("E*")
170
+ end
171
+
172
+ def read_bool_column(num_rows)
173
+ @reader.read(num_rows).bytes.map { |b| b == 1 }
174
+ end
175
+
176
+ def read_fixed_string_column(length, num_rows)
177
+ Array.new(num_rows) { @reader.read(length).force_encoding(Encoding::UTF_8) }
178
+ end
179
+
180
+ def read_date_column(num_rows)
181
+ @reader.read(num_rows * 2).unpack("v*").map { |days| DATE_EPOCH + days }
182
+ end
183
+
184
+ def read_date32_column(num_rows)
185
+ @reader.read(num_rows * 4).unpack("l<*").map { |days| DATE_EPOCH + days }
186
+ end
187
+
188
+ def read_datetime_column(num_rows)
189
+ @reader.read(num_rows * 4).unpack("V*").map { |ts| Time.at(ts).utc }
190
+ end
191
+
192
+ def read_datetime64_column(precision, num_rows)
193
+ scale = 10**(9 - precision)
194
+ @reader.read(num_rows * 8).unpack("q<*").map do |ticks|
195
+ nsec = ticks * scale
196
+ Time.at(nsec / 1_000_000_000, nsec % 1_000_000_000, :nanosecond).utc
197
+ end
198
+ end
199
+
200
+ def read_decimal_column(precision, scale, num_rows)
201
+ divisor = 10**scale
202
+ if precision <= 9
203
+ @reader.read(num_rows * 4).unpack("l<*").map { |v| BigDecimal(v) / divisor }
204
+ elsif precision <= 18
205
+ @reader.read(num_rows * 8).unpack("q<*").map { |v| BigDecimal(v) / divisor }
206
+ elsif precision <= 38
207
+ Array.new(num_rows) { BigDecimal(read_signed_le_bytes(16)) / divisor }
208
+ else
209
+ Array.new(num_rows) { BigDecimal(read_signed_le_bytes(32)) / divisor }
210
+ end
211
+ end
212
+
213
+ # --- Single Value Readers ---
214
+
215
+ def read_varint
216
+ result = 0
217
+ shift = 0
218
+ loop do
219
+ byte = @reader.read_byte
220
+ return result if byte.nil?
221
+ result |= (byte & 0x7F) << shift
222
+ break if (byte & 0x80) == 0
223
+ shift += 7
224
+ end
225
+ result
226
+ end
227
+
228
+ def read_string
229
+ @reader.read(read_varint).force_encoding(Encoding::UTF_8)
230
+ end
231
+
232
+ def read_uint64 = @reader.read(8).unpack1("Q<")
233
+ def read_uint128 = read_le_bytes(16)
234
+ def read_uint256 = read_le_bytes(32)
235
+ def read_int128 = read_signed_le_bytes(16)
236
+ def read_int256 = read_signed_le_bytes(32)
237
+
238
+ def read_uuid
239
+ first_half = @reader.read(8).bytes.reverse
240
+ second_half = @reader.read(8).bytes.reverse
241
+ hex = (first_half + second_half).pack("C*").unpack1("H*")
242
+ "#{hex[0, 8]}-#{hex[8, 4]}-#{hex[12, 4]}-#{hex[16, 4]}-#{hex[20, 12]}"
243
+ end
244
+
245
+ def read_ipv4
246
+ bytes = @reader.read(4).unpack("C4").reverse
247
+ IPAddr.new(bytes.join("."))
248
+ end
249
+
250
+ def read_ipv6
251
+ bytes = @reader.read(16)
252
+ IPAddr.new(bytes.unpack1("H*").scan(/.{4}/).join(":"), Socket::AF_INET6)
253
+ end
254
+
255
+ # --- Container Type Readers ---
256
+
257
+ # Nullable: nulls mask (uint8 per row, 1=null), then all values
258
+ def read_nullable_column(inner_type, num_rows)
259
+ nulls = @reader.read(num_rows).bytes
260
+ values = read_column(inner_type, num_rows)
261
+ values.each_with_index.map { |v, i| (nulls[i] == 1) ? nil : v }
262
+ end
263
+
264
+ # Array: cumulative offsets (uint64 per row), then all elements
265
+ def read_array_column(inner_type, num_rows)
266
+ offsets = read_uint64_column(num_rows)
267
+ total_elements = offsets.last || 0
268
+
269
+ return Array.new(num_rows) { [] } if total_elements == 0
270
+
271
+ elements = read_column(inner_type, total_elements)
272
+
273
+ arrays = []
274
+ prev_offset = 0
275
+ offsets.each do |offset|
276
+ arrays << elements[prev_offset...offset]
277
+ prev_offset = offset
278
+ end
279
+ arrays
280
+ end
281
+
282
+ # Map: cumulative offsets (uint64 per row), then all keys, then all values
283
+ def read_map_column(key_type, value_type, num_rows)
284
+ offsets = read_uint64_column(num_rows)
285
+ total_pairs = offsets.last || 0
286
+
287
+ return Array.new(num_rows) { {} } if total_pairs == 0
288
+
289
+ keys = read_column(key_type, total_pairs)
290
+ values = read_column(value_type, total_pairs)
291
+
292
+ maps = []
293
+ prev_offset = 0
294
+ offsets.each do |offset|
295
+ maps << keys[prev_offset...offset].zip(values[prev_offset...offset]).to_h
296
+ prev_offset = offset
297
+ end
298
+ maps
299
+ end
300
+
301
+ # Tuple: all values of element 0, then element 1, etc. (column-major)
302
+ # Empty tuples send 1 byte per row.
303
+ def read_tuple_column(element_types, num_rows)
304
+ if element_types.empty?
305
+ @reader.read(num_rows)
306
+ return Array.new(num_rows) { [] }
307
+ end
308
+
309
+ element_columns = element_types.map { |type| read_column(type, num_rows) }
310
+
311
+ Array.new(num_rows) { |i| element_columns.map { |col| col[i] } }
312
+ end
313
+
314
+ # LowCardinality: version, meta, dictionary, keys
315
+ def read_low_cardinality_column(inner_type, num_rows)
316
+ _version = read_uint64
317
+ meta = read_uint64
318
+ key_type = meta & 0xFF
319
+
320
+ dict_size = read_uint64
321
+ dictionary = read_column(inner_type, dict_size)
322
+
323
+ _num_keys = read_uint64
324
+ keys = case key_type
325
+ when 0 then read_uint8_column(num_rows)
326
+ when 1 then read_uint16_column(num_rows)
327
+ when 2 then read_uint32_column(num_rows)
328
+ else read_uint64_column(num_rows)
329
+ end
330
+
331
+ keys.map { |k| dictionary[k] }
332
+ end
333
+
334
+ # --- Helpers ---
335
+
336
+ def read_le_bytes(num_bytes)
337
+ bytes = @reader.read(num_bytes).bytes
338
+ result = 0
339
+ bytes.each_with_index { |b, i| result |= b << (8 * i) }
340
+ result
341
+ end
342
+
343
+ def read_signed_le_bytes(num_bytes)
344
+ value = read_le_bytes(num_bytes)
345
+ max_positive = 1 << (num_bytes * 8 - 1)
346
+ (value >= max_positive) ? value - (1 << (num_bytes * 8)) : value
347
+ end
348
+
349
+ def parse_tuple_types(types_str)
350
+ types = []
351
+ depth = 0
352
+ current = +""
353
+
354
+ types_str.each_char do |c|
355
+ case c
356
+ when "("
357
+ depth += 1
358
+ current << c
359
+ when ")"
360
+ depth -= 1
361
+ current << c
362
+ when ","
363
+ if depth == 0
364
+ types << current.strip
365
+ current = +""
366
+ else
367
+ current << c
368
+ end
369
+ else
370
+ current << c
371
+ end
372
+ end
373
+
374
+ types << current.strip unless current.empty?
375
+ types
376
+ end
377
+ end
378
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "connection_pool"
4
+
5
+ module Clickhouse
6
+ # Thread-safe connection pool for ClickHouse.
7
+ #
8
+ # @example
9
+ # pool = Clickhouse::Pool.new
10
+ # response = pool.query("SELECT * FROM users")
11
+ #
12
+ # @example Concurrent usage
13
+ # pool = Clickhouse::Pool.new
14
+ # threads = 10.times.map do
15
+ # Thread.new { pool.query("SELECT 1") }
16
+ # end
17
+ # threads.each(&:join)
18
+ class Pool
19
+ # Creates a new connection pool.
20
+ #
21
+ # @param config [Config] configuration instance (defaults to global config)
22
+ # Pool size and timeout are read from config.pool_size and config.pool_timeout
23
+ def initialize(config = Clickhouse.config)
24
+ @pool = ConnectionPool.new(size: config.pool_size, timeout: config.pool_timeout) do
25
+ Connection.new(config)
26
+ end
27
+ end
28
+
29
+ # Executes a SQL query using a pooled connection.
30
+ #
31
+ # @param sql [String] SQL query to execute
32
+ # @param options [Hash] query options
33
+ # @option options [Hash] :params query parameters
34
+ # @return [Response] query response with rows, columns, and metadata
35
+ def query(sql, options = {})
36
+ @pool.with { |conn| conn.query(sql, options) }
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Clickhouse
4
+ # Immutable response object containing query results.
5
+ #
6
+ # @example Successful response
7
+ # response = conn.query("SELECT id, name FROM users")
8
+ # response.success? # => true
9
+ # response.columns # => ["id", "name"]
10
+ # response.rows # => [[1, "Alice"], [2, "Bob"]]
11
+ # response.to_a # => [{"id" => 1, "name" => "Alice"}, ...]
12
+ #
13
+ # @example Failed response
14
+ # response = conn.query("INVALID SQL")
15
+ # response.failure? # => true
16
+ # response.error # => "Syntax error..."
17
+ Response = Data.define(:columns, :types, :rows, :error, :summary) do
18
+ # @param columns [Array<String>] column names
19
+ # @param types [Array<String>] column types
20
+ # @param rows [Array<Array>] row data
21
+ # @param error [String, nil] error message if query failed
22
+ # @param summary [Hash, nil] ClickHouse query summary
23
+ def initialize(columns: [], types: [], rows: [], error: nil, summary: nil)
24
+ super
25
+ end
26
+
27
+ # Returns true if the query succeeded.
28
+ # @return [Boolean]
29
+ def success? = error.nil?
30
+
31
+ # Returns true if the query failed.
32
+ # @return [Boolean]
33
+ def failure? = !success?
34
+
35
+ # Converts rows to an array of hashes.
36
+ # @return [Array<Hash>] rows as hashes with column names as keys
37
+ def to_a = rows.map { |row| columns.zip(row).to_h }
38
+ end
39
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Clickhouse
4
+ # Immutable result from transport layer.
5
+ # @api private
6
+ #
7
+ # @!attribute [r] success
8
+ # @return [Boolean] true if request succeeded
9
+ # @!attribute [r] body
10
+ # @return [HTTP::Response::Body, nil] response body for successful requests
11
+ # @!attribute [r] error
12
+ # @return [String, nil] error message for failed requests
13
+ # @!attribute [r] summary
14
+ # @return [Hash] ClickHouse query summary
15
+ TransportResult = Data.define(:success, :body, :error, :summary)
16
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Clickhouse
4
+ VERSION = "0.1.0"
5
+ end
data/lib/clickhouse.rb ADDED
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "clickhouse/version"
4
+ require_relative "clickhouse/config"
5
+ require_relative "clickhouse/transport_result"
6
+ require_relative "clickhouse/http_transport"
7
+ require_relative "clickhouse/connection"
8
+ require_relative "clickhouse/pool"
9
+ require_relative "clickhouse/response"
10
+ require_relative "clickhouse/buffered_reader"
11
+ require_relative "clickhouse/native_format_parser"
12
+
13
+ # Ruby client for ClickHouse database with Native format support.
14
+ #
15
+ # @example Basic usage
16
+ # Clickhouse.configure do |config|
17
+ # config.host = "localhost"
18
+ # config.port = 8123
19
+ # end
20
+ #
21
+ # conn = Clickhouse::Connection.new
22
+ # response = conn.query("SELECT 1")
23
+ #
24
+ # @example Using connection pool
25
+ # pool = Clickhouse::Pool.new
26
+ # response = pool.query("SELECT * FROM users")
27
+ module Clickhouse
28
+ # Base error class for all Clickhouse errors
29
+ class Error < StandardError; end
30
+
31
+ # Raised when encountering an unsupported ClickHouse data type
32
+ class UnsupportedTypeError < Error; end
33
+
34
+ # Returns the global configuration instance.
35
+ #
36
+ # @return [Config] the configuration instance
37
+ def self.config
38
+ @config ||= Config.new
39
+ end
40
+
41
+ # Yields the global configuration for modification.
42
+ #
43
+ # @yield [Config] the configuration instance
44
+ # @return [void]
45
+ def self.configure
46
+ yield(config) if block_given?
47
+ end
48
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clickhouse-rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Karol Bąk
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: bigdecimal
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '3.1'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '3.1'
26
+ - !ruby/object:Gem::Dependency
27
+ name: connection_pool
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: http
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '5.0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '5.0'
54
+ description: Fast Ruby client for ClickHouse database using the Native binary format
55
+ for efficient data transfer
56
+ email:
57
+ - kukicola@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".rspec"
63
+ - ".standard.yml"
64
+ - CHANGELOG.md
65
+ - README.md
66
+ - lib/clickhouse.rb
67
+ - lib/clickhouse/buffered_reader.rb
68
+ - lib/clickhouse/config.rb
69
+ - lib/clickhouse/connection.rb
70
+ - lib/clickhouse/http_transport.rb
71
+ - lib/clickhouse/native_format_parser.rb
72
+ - lib/clickhouse/pool.rb
73
+ - lib/clickhouse/response.rb
74
+ - lib/clickhouse/transport_result.rb
75
+ - lib/clickhouse/version.rb
76
+ homepage: https://github.com/kukicola/clickhouse-rb
77
+ licenses:
78
+ - MIT
79
+ metadata:
80
+ changelog_uri: https://github.com/kukicola/clickhouse-rb/blob/main/CHANGELOG.md
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 3.1.0
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubygems_version: 3.6.7
96
+ specification_version: 4
97
+ summary: Ruby client for ClickHouse with Native format support
98
+ test_files: []