ch_connect 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e4176a0227a2a6a7e94c379e75d37325d7cf51428c01131bb88ca9903a9a62e3
4
+ data.tar.gz: fa438af73ded93b0f5eae03cd77543414d596ba6fc19e23f2e6bc536bb28be21
5
+ SHA512:
6
+ metadata.gz: a5718c7457b517d5586e210b597500400bea09145e24a89088ff797b200be4d99522759e7dd1381e3dd79f65bee3c83fbf7a611a7c2abcf39bb3c1f6613a5fbd
7
+ data.tar.gz: be54de4f246642315f5a815afc2b78498dc30890172cb7318e2cff82a3f801be9f92b86e35c4977f0a30fc3074f1b19a36d7fcfe8f8e80766eaf589833164646
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.standard.yml ADDED
@@ -0,0 +1,3 @@
1
+ # For available configuration options, see:
2
+ # https://github.com/standardrb/standard
3
+ ruby_version: 3.4
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## Unreleased
2
+
3
+ ## [0.1.0] - 2026-01-31
4
+
5
+ - Initial release
data/README.md ADDED
@@ -0,0 +1,207 @@
1
+ # ch_connect
2
+
3
+ > **Note:** This gem was previously published as `clickhouse-rb` and has been renamed to `ch_connect` due to name conflicts.
4
+
5
+ Fast Ruby client for ClickHouse database using the Native binary format for efficient data transfer.
6
+
7
+ ## Features
8
+
9
+ - Native binary format parsing (faster than JSON/TSV)
10
+ - Persistent HTTP connections with built-in connection pooling
11
+ - Thread-safe concurrent access
12
+ - Supports all common ClickHouse data types
13
+
14
+ ## Installation
15
+
16
+ Add to your Gemfile:
17
+
18
+ ```ruby
19
+ gem "ch_connect"
20
+ ```
21
+
22
+ Then run:
23
+
24
+ ```bash
25
+ bundle install
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ### Configuration
31
+
32
+ ```ruby
33
+ require "ch_connect"
34
+
35
+ ChConnect.configure do |config|
36
+ config.host = "localhost"
37
+ config.port = 8123
38
+ config.database = "default"
39
+ config.username = "default"
40
+ config.password = ""
41
+ end
42
+ ```
43
+
44
+ Or configure via URL:
45
+
46
+ ```ruby
47
+ ChConnect.configure do |config|
48
+ config.url = "http://user:pass@localhost:8123/mydb"
49
+ end
50
+ ```
51
+
52
+ ### Single Connection
53
+
54
+ ```ruby
55
+ conn = ChConnect::Connection.new
56
+ response = conn.query("SELECT id, name FROM users WHERE active = true")
57
+
58
+ response.each do |row|
59
+ puts "#{row[:id]}: #{row[:name]}"
60
+ end
61
+ ```
62
+
63
+ ### Thread-Safe Usage
64
+
65
+ Connections use httpx's built-in connection pooling, making them safe for concurrent use:
66
+
67
+ ```ruby
68
+ conn = ChConnect::Connection.new
69
+
70
+ threads = 10.times.map do
71
+ Thread.new { conn.query("SELECT 1") }
72
+ end
73
+ threads.each(&:join)
74
+ ```
75
+
76
+ Pool settings are configured globally:
77
+
78
+ ```ruby
79
+ ChConnect.configure do |config|
80
+ config.pool_size = 10
81
+ config.pool_timeout = 5
82
+ end
83
+ ```
84
+
85
+ ### Working with Results
86
+
87
+ Response objects implement `Enumerable`, allowing direct iteration:
88
+
89
+ ```ruby
90
+ response = conn.query("SELECT id, name, created_at FROM users")
91
+
92
+ # Iterate over rows as hashes with symbol keys
93
+ response.each { |row| puts row[:name] }
94
+
95
+ # Use any Enumerable method
96
+ response.map { |row| row[:id] }
97
+ response.select { |row| row[:id] > 10 }
98
+ response.first # => {id: 1, name: "Alice", created_at: 2024-01-01 00:00:00 UTC}
99
+
100
+ # Access raw rows (arrays)
101
+ response.rows # => [[1, "Alice", 2024-01-01 00:00:00 UTC], ...]
102
+ response.columns # => [:id, :name, :created_at]
103
+ response.types # => [:UInt64, :String, :DateTime]
104
+
105
+ # Convert to array of hashes
106
+ response.to_a # => [{id: 1, name: "Alice", ...}, ...]
107
+
108
+ # Query summary from ClickHouse (symbol keys)
109
+ response.summary # => {read_rows: "1", read_bytes: "42", ...}
110
+ ```
111
+
112
+ ### Query Parameters
113
+
114
+ ```ruby
115
+ response = conn.query(
116
+ "SELECT * FROM users WHERE id = {id:UInt64}",
117
+ params: { param_id: 123 }
118
+ )
119
+ ```
120
+
121
+ ## Supported Data Types
122
+
123
+ | ClickHouse Type | Ruby Type |
124
+ |-----------------|-----------|
125
+ | UInt8/16/32/64 | Integer |
126
+ | UInt128/256 | Integer |
127
+ | Int8/16/32/64 | Integer |
128
+ | Int128/256 | Integer |
129
+ | Float32/64 | Float |
130
+ | Decimal | BigDecimal |
131
+ | Bool | TrueClass/FalseClass |
132
+ | String, FixedString | String |
133
+ | Date, Date32 | Date |
134
+ | DateTime, DateTime64 | Time |
135
+ | UUID | String |
136
+ | IPv4, IPv6 | IPAddr |
137
+ | Enum8, Enum16 | Integer |
138
+ | Array | Array |
139
+ | Tuple | Array |
140
+ | Map | Hash |
141
+ | Nullable | nil or inner type |
142
+ | LowCardinality | inner type |
143
+
144
+ ## Configuration Options
145
+
146
+ | Option | Default | Description |
147
+ |--------|---------|-------------|
148
+ | `scheme` | `"http"` | URL scheme (http/https) |
149
+ | `host` | `"localhost"` | ClickHouse server host |
150
+ | `port` | `8123` | ClickHouse HTTP port |
151
+ | `database` | `"default"` | Database name |
152
+ | `username` | `""` | Authentication username |
153
+ | `password` | `""` | Authentication password |
154
+ | `connection_timeout` | `5` | Connection timeout in seconds |
155
+ | `read_timeout` | `60` | Read timeout in seconds |
156
+ | `write_timeout` | `60` | Write timeout in seconds |
157
+ | `pool_size` | `100` | Connection pool size |
158
+ | `pool_timeout` | `5` | Pool checkout timeout in seconds |
159
+ | `instrumenter` | `NullInstrumenter` | Instrumenter for query instrumentation |
160
+
161
+ ## Instrumentation
162
+
163
+ You can instrument queries by providing an instrumenter that responds to `#instrument`:
164
+
165
+ ```ruby
166
+ ChConnect.configure do |config|
167
+ config.instrumenter = ActiveSupport::Notifications
168
+ end
169
+
170
+ # Subscribe to events
171
+ ActiveSupport::Notifications.subscribe("query.clickhouse") do |name, start, finish, id, payload|
172
+ puts "Query: #{payload[:sql]} took #{finish - start}s"
173
+ end
174
+ ```
175
+
176
+ The instrumenter receives event name `"query.clickhouse"` and payload `{sql: "..."}`.
177
+
178
+ ## Error Handling
179
+
180
+ ```ruby
181
+ begin
182
+ conn.query("INVALID SQL")
183
+ rescue ChConnect::QueryError => e
184
+ puts "Query failed: #{e.message}"
185
+ end
186
+
187
+ # Unsupported types raise an exception
188
+ begin
189
+ conn.query("SELECT '{}'::JSON")
190
+ rescue ChConnect::UnsupportedTypeError => e
191
+ puts "Unsupported type: #{e.message}"
192
+ end
193
+ ```
194
+
195
+ ## Development
196
+
197
+ ```bash
198
+ # Run tests (requires ClickHouse)
199
+ CLICKHOUSE_URL=http://default:password@localhost:8123/default bundle exec rspec
200
+
201
+ # Run linter
202
+ bundle exec standardrb
203
+ ```
204
+
205
+ ## License
206
+
207
+ MIT License
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChConnect
4
+ # Wrapper for HTTP response body providing position tracking and EOF detection.
5
+ # @api private
6
+ class BodyReader
7
+ # Creates a new body reader.
8
+ #
9
+ # @param body [#read, #bytesize, #close] HTTP response body
10
+ def initialize(body)
11
+ @body = body
12
+ @pos = 0
13
+ @size = body.bytesize
14
+ end
15
+
16
+ # Closes the underlying body.
17
+ #
18
+ # @return [void]
19
+ def close
20
+ @body.close
21
+ end
22
+
23
+ # Returns true if at end of stream.
24
+ #
25
+ # @return [Boolean]
26
+ def eof?
27
+ @pos >= @size
28
+ end
29
+
30
+ # Reads exactly n bytes from the body.
31
+ #
32
+ # @param n [Integer] number of bytes to read
33
+ # @return [String] binary string of n bytes
34
+ def read(n)
35
+ result = @body.read(n)
36
+ @pos += n
37
+ result
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module ChConnect
6
+ # Configuration for ClickHouse connection.
7
+ #
8
+ # @example
9
+ # config = ChConnect::Config.new(host: "db.example.com", port: 9000)
10
+ #
11
+ # @example Using URL
12
+ # config = ChConnect::Config.new
13
+ # config.url = "http://user:pass@localhost:8123/mydb"
14
+ class Config
15
+ DEFAULTS = {
16
+ scheme: "http",
17
+ host: "localhost",
18
+ port: 8123,
19
+ database: "default",
20
+ username: "",
21
+ password: "",
22
+ connection_timeout: 5,
23
+ read_timeout: 60,
24
+ write_timeout: 60,
25
+ pool_size: 100,
26
+ pool_timeout: 5,
27
+ instrumenter: NullInstrumenter.new
28
+ }.freeze
29
+
30
+ # @return [String] URL scheme (http or https)
31
+ # @return [String] ClickHouse server hostname
32
+ # @return [Integer] ClickHouse server port
33
+ # @return [String] Database name
34
+ # @return [String] Username for authentication
35
+ # @return [String] Password for authentication
36
+ # @return [Integer] Connection timeout in seconds
37
+ # @return [Integer] Read timeout in seconds
38
+ # @return [Integer] Write timeout in seconds
39
+ # @return [Integer] Connection pool size
40
+ # @return [Integer] Pool checkout timeout in seconds
41
+ # @return [#instrument] Instrumenter for query instrumentation
42
+ attr_accessor :scheme, :host, :port, :database, :username, :password, :connection_timeout, :read_timeout, :write_timeout, :pool_size, :pool_timeout, :instrumenter
43
+
44
+ # Creates a new configuration instance.
45
+ #
46
+ # @param params [Hash] configuration options
47
+ # @option params [String] :scheme URL scheme (default: "http")
48
+ # @option params [String] :host server hostname (default: "localhost")
49
+ # @option params [Integer] :port server port (default: 8123)
50
+ # @option params [String] :database database name (default: "default")
51
+ # @option params [String] :username authentication username (default: "")
52
+ # @option params [String] :password authentication password (default: "")
53
+ # @option params [Integer] :connection_timeout connection timeout in seconds (default: 5)
54
+ # @option params [Integer] :read_timeout read timeout in seconds (default: 60)
55
+ # @option params [Integer] :write_timeout write timeout in seconds (default: 60)
56
+ # @option params [Integer] :pool_size connection pool size (default: 100)
57
+ # @option params [Integer] :pool_timeout pool checkout timeout (default: 5)
58
+ def initialize(params = {})
59
+ DEFAULTS.merge(params).each do |key, value|
60
+ send("#{key}=", value)
61
+ end
62
+ end
63
+
64
+ # Sets configuration from a URL string.
65
+ #
66
+ # @param url [String] ClickHouse connection URL
67
+ # @return [void]
68
+ def url=(url)
69
+ uri = URI(url)
70
+ @scheme = uri.scheme
71
+ @host = uri.host
72
+ @port = uri.port
73
+ @database = uri.path.delete_prefix("/")
74
+ @username = uri.user
75
+ @password = uri.password
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChConnect
4
+ # A single connection to ClickHouse server.
5
+ #
6
+ # @example
7
+ # conn = ChConnect::Connection.new
8
+ # response = conn.query("SELECT * FROM users WHERE id = 1")
9
+ class Connection
10
+ # @return [Config] the configuration used by this connection
11
+ attr_reader :config
12
+
13
+ # Creates a new connection.
14
+ #
15
+ # @param config [Config] configuration instance (defaults to global config)
16
+ def initialize(config = ChConnect.config)
17
+ @config = config
18
+ @transport = HttpTransport.new(config)
19
+ end
20
+
21
+ # Executes a SQL query and returns the response.
22
+ #
23
+ # @param sql [String] SQL query to execute
24
+ # @param options [Hash] query options
25
+ # @option options [Hash] :params query parameters
26
+ # @return [Response] query response with rows, columns, and metadata
27
+ # @raise [QueryError] if the query fails
28
+ def query(sql, options = {})
29
+ @config.instrumenter.instrument("query.clickhouse", {sql: sql}) do
30
+ result = @transport.execute(sql, options)
31
+ NativeFormatParser.new(result.body).parse.with(summary: result.summary)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "httpx"
4
+ require "json"
5
+
6
+ module ChConnect
7
+ # HTTP transport layer for ClickHouse communication.
8
+ # @api private
9
+ class HttpTransport
10
+ # Creates a new HTTP transport.
11
+ #
12
+ # @param config [Config] configuration instance
13
+ def initialize(config)
14
+ @config = config
15
+ @base_url = "#{config.scheme}://#{config.host}:#{config.port}"
16
+ @http_client = HTTPX.plugin(:persistent, close_on_fork: true)
17
+ .with(
18
+ timeout: {
19
+ connect_timeout: config.connection_timeout,
20
+ read_timeout: config.read_timeout,
21
+ write_timeout: config.write_timeout
22
+ },
23
+ pool_options: {
24
+ max_connections_per_origin: config.pool_size,
25
+ pool_timeout: config.pool_timeout
26
+ }
27
+ )
28
+
29
+ @default_headers = {
30
+ "Accept-Encoding" => "gzip",
31
+ "X-ClickHouse-User" => config.username,
32
+ "X-ClickHouse-Key" => config.password,
33
+ "X-ClickHouse-Format" => "Native"
34
+ }
35
+ end
36
+
37
+ # Executes a SQL query via HTTP.
38
+ #
39
+ # @param sql [String] SQL query to execute
40
+ # @param options [Hash] query options
41
+ # @option options [Hash] :params query parameters
42
+ # @return [TransportResult] result containing body and summary
43
+ # @raise [QueryError] if the query fails
44
+ def execute(sql, options = {})
45
+ query_params = {database: @config.database}.merge(options[:params] || {})
46
+ response = @http_client.post(@base_url, params: query_params, body: sql, headers: @default_headers)
47
+
48
+ raise QueryError, response.error.message if response.error
49
+
50
+ summary = JSON.parse(response.headers["x-clickhouse-summary"], symbolize_names: true)
51
+
52
+ raise QueryError, response.body.to_s unless response.status == 200
53
+
54
+ TransportResult.new(body: response.body, summary: summary)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,406 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bigdecimal"
4
+ require "ipaddr"
5
+
6
+ module ChConnect
7
+ # Parser for ClickHouse Native binary format.
8
+ # @api private
9
+ class NativeFormatParser
10
+ DATE_EPOCH = Date.new(1970, 1, 1)
11
+
12
+ # Creates a new parser.
13
+ #
14
+ # @param body [#read] response body to parse
15
+ def initialize(body)
16
+ @reader = BodyReader.new(body)
17
+ @columns = []
18
+ @types = []
19
+ @rows = []
20
+ end
21
+
22
+ # Parses the response body and returns a Response.
23
+ #
24
+ # @return [Response] parsed response with columns, types, and rows
25
+ # @raise [UnsupportedTypeError] if an unsupported data type is encountered
26
+ def parse
27
+ parse_block until @reader.eof?
28
+ Response.new(columns: @columns, types: @types, rows: @rows)
29
+ ensure
30
+ @reader.close
31
+ end
32
+
33
+ private
34
+
35
+ def parse_block
36
+ num_columns = read_varint
37
+ num_rows = read_varint
38
+
39
+ return if num_columns == 0 && num_rows == 0
40
+
41
+ columns_data = []
42
+
43
+ num_columns.times do
44
+ col_name = read_string
45
+ col_type = read_string
46
+
47
+ if @columns.length < num_columns
48
+ @columns << col_name.to_sym
49
+ @types << col_type.to_sym
50
+ end
51
+
52
+ columns_data << read_column(col_type, num_rows)
53
+ end
54
+
55
+ num_rows.times do |row_idx|
56
+ @rows << columns_data.map { |col| col[row_idx] }
57
+ end
58
+ end
59
+
60
+ def read_column(type, num_rows)
61
+ case type
62
+ # Integers
63
+ when "UInt8" then read_uint8_column(num_rows)
64
+ when "UInt16" then read_uint16_column(num_rows)
65
+ when "UInt32" then read_uint32_column(num_rows)
66
+ when "UInt64" then read_uint64_column(num_rows)
67
+ when "UInt128" then read_uint128_column(num_rows)
68
+ when "UInt256" then read_uint256_column(num_rows)
69
+ when "Int8" then read_int8_column(num_rows)
70
+ when "Int16" then read_int16_column(num_rows)
71
+ when "Int32" then read_int32_column(num_rows)
72
+ when "Int64" then read_int64_column(num_rows)
73
+ when "Int128" then read_int128_column(num_rows)
74
+ when "Int256" then read_int256_column(num_rows)
75
+
76
+ # Floats
77
+ when "Float32" then read_float32_column(num_rows)
78
+ when "Float64" then read_float64_column(num_rows)
79
+
80
+ # Boolean
81
+ when "Bool" then read_bool_column(num_rows)
82
+
83
+ # Strings
84
+ when "String" then read_string_column(num_rows)
85
+ when /^FixedString\((\d+)\)$/ then read_fixed_string_column($1.to_i, num_rows)
86
+
87
+ # Dates and Times
88
+ when "Date" then read_date_column(num_rows)
89
+ when "Date32" then read_date32_column(num_rows)
90
+ when "DateTime", /^DateTime\(.+\)$/ then read_datetime_column(num_rows)
91
+ when /^DateTime64\((\d+)(?:,.*)?\)$/ then read_datetime64_column($1.to_i, num_rows)
92
+
93
+ # UUID
94
+ when "UUID" then read_uuid_column(num_rows)
95
+
96
+ # IP addresses
97
+ when "IPv4" then read_ipv4_column(num_rows)
98
+ when "IPv6" then read_ipv6_column(num_rows)
99
+
100
+ # Decimals - ClickHouse always returns Decimal(precision, scale)
101
+ when /^Decimal\((\d+),\s*(\d+)\)$/ then read_decimal_column($1.to_i, $2.to_i, num_rows)
102
+
103
+ # Enums (stored as signed integers)
104
+ when /^Enum8\(.+\)$/ then read_int8_column(num_rows)
105
+ when /^Enum16\(.+\)$/ then read_int16_column(num_rows)
106
+
107
+ # Nullable
108
+ when /^Nullable\((.+)\)$/ then read_nullable_column($1, num_rows)
109
+
110
+ # LowCardinality
111
+ when /^LowCardinality\((.+)\)$/ then read_low_cardinality_column($1, num_rows)
112
+
113
+ # Arrays
114
+ when /^Array\((.+)\)$/ then read_array_column($1, num_rows)
115
+
116
+ # Tuples
117
+ when /^Tuple\((.*)\)$/ then read_tuple_column(parse_tuple_types($1), num_rows)
118
+
119
+ # Maps
120
+ when /^Map\((.+)\)$/
121
+ types = parse_tuple_types($1)
122
+ read_map_column(types[0], types[1], num_rows)
123
+
124
+ else
125
+ raise UnsupportedTypeError, "Unsupported column type: #{type}"
126
+ end
127
+ end
128
+
129
+ # --- Bulk Column Readers ---
130
+
131
+ def read_uint8_column(num_rows)
132
+ @reader.read(num_rows).bytes
133
+ end
134
+
135
+ def read_uint16_column(num_rows)
136
+ @reader.read(num_rows * 2).unpack("v*")
137
+ end
138
+
139
+ def read_uint32_column(num_rows)
140
+ @reader.read(num_rows * 4).unpack("V*")
141
+ end
142
+
143
+ def read_uint64_column(num_rows)
144
+ @reader.read(num_rows * 8).unpack("Q<*")
145
+ end
146
+
147
+ def read_uint128_column(num_rows)
148
+ Array.new(num_rows) { read_le_bytes(16) }
149
+ end
150
+
151
+ def read_uint256_column(num_rows)
152
+ Array.new(num_rows) { read_le_bytes(32) }
153
+ end
154
+
155
+ def read_int8_column(num_rows)
156
+ @reader.read(num_rows).unpack("c*")
157
+ end
158
+
159
+ def read_int16_column(num_rows)
160
+ @reader.read(num_rows * 2).unpack("s<*")
161
+ end
162
+
163
+ def read_int32_column(num_rows)
164
+ @reader.read(num_rows * 4).unpack("l<*")
165
+ end
166
+
167
+ def read_int64_column(num_rows)
168
+ @reader.read(num_rows * 8).unpack("q<*")
169
+ end
170
+
171
+ def read_int128_column(num_rows)
172
+ Array.new(num_rows) { read_signed_le_bytes(16) }
173
+ end
174
+
175
+ def read_int256_column(num_rows)
176
+ Array.new(num_rows) { read_signed_le_bytes(32) }
177
+ end
178
+
179
+ def read_float32_column(num_rows)
180
+ @reader.read(num_rows * 4).unpack("e*")
181
+ end
182
+
183
+ def read_float64_column(num_rows)
184
+ @reader.read(num_rows * 8).unpack("E*")
185
+ end
186
+
187
+ def read_bool_column(num_rows)
188
+ @reader.read(num_rows).bytes.map { |b| b == 1 }
189
+ end
190
+
191
+ def read_string_column(num_rows)
192
+ Array.new(num_rows) { read_string }
193
+ end
194
+
195
+ def read_fixed_string_column(length, num_rows)
196
+ Array.new(num_rows) { @reader.read(length).force_encoding(Encoding::UTF_8) }
197
+ end
198
+
199
+ def read_date_column(num_rows)
200
+ @reader.read(num_rows * 2).unpack("v*").map { |days| DATE_EPOCH + days }
201
+ end
202
+
203
+ def read_date32_column(num_rows)
204
+ @reader.read(num_rows * 4).unpack("l<*").map { |days| DATE_EPOCH + days }
205
+ end
206
+
207
+ def read_datetime_column(num_rows)
208
+ @reader.read(num_rows * 4).unpack("V*").map { |ts| Time.at(ts).utc }
209
+ end
210
+
211
+ def read_datetime64_column(precision, num_rows)
212
+ scale = 10**(9 - precision)
213
+ @reader.read(num_rows * 8).unpack("q<*").map do |ticks|
214
+ nsec = ticks * scale
215
+ Time.at(nsec / 1_000_000_000, nsec % 1_000_000_000, :nanosecond).utc
216
+ end
217
+ end
218
+
219
+ def read_uuid_column(num_rows)
220
+ Array.new(num_rows) { read_uuid }
221
+ end
222
+
223
+ def read_ipv4_column(num_rows)
224
+ Array.new(num_rows) { read_ipv4 }
225
+ end
226
+
227
+ def read_ipv6_column(num_rows)
228
+ Array.new(num_rows) { read_ipv6 }
229
+ end
230
+
231
+ def read_decimal_column(precision, scale, num_rows)
232
+ divisor = 10**scale
233
+ if precision <= 9
234
+ @reader.read(num_rows * 4).unpack("l<*").map { |v| BigDecimal(v) / divisor }
235
+ elsif precision <= 18
236
+ @reader.read(num_rows * 8).unpack("q<*").map { |v| BigDecimal(v) / divisor }
237
+ elsif precision <= 38
238
+ Array.new(num_rows) { BigDecimal(read_signed_le_bytes(16)) / divisor }
239
+ else
240
+ Array.new(num_rows) { BigDecimal(read_signed_le_bytes(32)) / divisor }
241
+ end
242
+ end
243
+
244
+ # --- Single Value Readers ---
245
+
246
+ def read_varint
247
+ result = 0
248
+ shift = 0
249
+ loop do
250
+ byte_str = @reader.read(1)
251
+ return result if byte_str.nil? || byte_str.empty?
252
+ byte = byte_str.ord
253
+ result |= (byte & 0x7F) << shift
254
+ break if (byte & 0x80) == 0
255
+ shift += 7
256
+ end
257
+ result
258
+ end
259
+
260
+ def read_string
261
+ @reader.read(read_varint).force_encoding(Encoding::UTF_8)
262
+ end
263
+
264
+ def read_uint64 = @reader.read(8).unpack1("Q<")
265
+
266
+ def read_uuid
267
+ first_half = @reader.read(8).bytes.reverse
268
+ second_half = @reader.read(8).bytes.reverse
269
+ hex = (first_half + second_half).pack("C*").unpack1("H*")
270
+ "#{hex[0, 8]}-#{hex[8, 4]}-#{hex[12, 4]}-#{hex[16, 4]}-#{hex[20, 12]}"
271
+ end
272
+
273
+ def read_ipv4
274
+ bytes = @reader.read(4).unpack("C4").reverse
275
+ IPAddr.new(bytes.join("."))
276
+ end
277
+
278
+ def read_ipv6
279
+ bytes = @reader.read(16)
280
+ IPAddr.new(bytes.unpack1("H*").scan(/.{4}/).join(":"), Socket::AF_INET6)
281
+ end
282
+
283
+ # --- Container Type Readers ---
284
+
285
+ # Nullable: nulls mask (uint8 per row, 1=null), then all values
286
+ def read_nullable_column(inner_type, num_rows)
287
+ nulls = @reader.read(num_rows).bytes
288
+ values = read_column(inner_type, num_rows)
289
+ values.each_with_index.map { |v, i| (nulls[i] == 1) ? nil : v }
290
+ end
291
+
292
+ # Array: cumulative offsets (uint64 per row), then all elements
293
+ def read_array_column(inner_type, num_rows)
294
+ offsets = read_uint64_column(num_rows)
295
+ total_elements = offsets.last || 0
296
+
297
+ return Array.new(num_rows) { [] } if total_elements == 0
298
+
299
+ elements = read_column(inner_type, total_elements)
300
+
301
+ arrays = []
302
+ prev_offset = 0
303
+ offsets.each do |offset|
304
+ arrays << elements[prev_offset...offset]
305
+ prev_offset = offset
306
+ end
307
+ arrays
308
+ end
309
+
310
+ # Map: cumulative offsets (uint64 per row), then all keys, then all values
311
+ def read_map_column(key_type, value_type, num_rows)
312
+ offsets = read_uint64_column(num_rows)
313
+ total_pairs = offsets.last || 0
314
+
315
+ return Array.new(num_rows) { {} } if total_pairs == 0
316
+
317
+ keys = read_column(key_type, total_pairs)
318
+ values = read_column(value_type, total_pairs)
319
+
320
+ maps = []
321
+ prev_offset = 0
322
+ offsets.each do |offset|
323
+ maps << keys[prev_offset...offset].zip(values[prev_offset...offset]).to_h
324
+ prev_offset = offset
325
+ end
326
+ maps
327
+ end
328
+
329
+ # Tuple: all values of element 0, then element 1, etc. (column-major)
330
+ # Empty tuples send 1 byte per row.
331
+ def read_tuple_column(element_types, num_rows)
332
+ if element_types.empty?
333
+ @reader.read(num_rows)
334
+ return Array.new(num_rows) { [] }
335
+ end
336
+
337
+ element_columns = element_types.map { |type| read_column(type, num_rows) }
338
+
339
+ Array.new(num_rows) { |i| element_columns.map { |col| col[i] } }
340
+ end
341
+
342
+ # LowCardinality: version, meta, dictionary, keys
343
+ def read_low_cardinality_column(inner_type, num_rows)
344
+ _version = read_uint64
345
+ meta = read_uint64
346
+ key_type = meta & 0xFF
347
+
348
+ dict_size = read_uint64
349
+ dictionary = read_column(inner_type, dict_size)
350
+
351
+ _num_keys = read_uint64
352
+ keys = case key_type
353
+ when 0 then read_uint8_column(num_rows)
354
+ when 1 then read_uint16_column(num_rows)
355
+ when 2 then read_uint32_column(num_rows)
356
+ else read_uint64_column(num_rows)
357
+ end
358
+
359
+ keys.map { |k| dictionary[k] }
360
+ end
361
+
362
+ # --- Helpers ---
363
+
364
+ def read_le_bytes(num_bytes)
365
+ bytes = @reader.read(num_bytes).bytes
366
+ result = 0
367
+ bytes.each_with_index { |b, i| result |= b << (8 * i) }
368
+ result
369
+ end
370
+
371
+ def read_signed_le_bytes(num_bytes)
372
+ value = read_le_bytes(num_bytes)
373
+ max_positive = 1 << (num_bytes * 8 - 1)
374
+ (value >= max_positive) ? value - (1 << (num_bytes * 8)) : value
375
+ end
376
+
377
+ def parse_tuple_types(types_str)
378
+ types = []
379
+ depth = 0
380
+ current = +""
381
+
382
+ types_str.each_char do |c|
383
+ case c
384
+ when "("
385
+ depth += 1
386
+ current << c
387
+ when ")"
388
+ depth -= 1
389
+ current << c
390
+ when ","
391
+ if depth == 0
392
+ types << current.strip
393
+ current = +""
394
+ else
395
+ current << c
396
+ end
397
+ else
398
+ current << c
399
+ end
400
+ end
401
+
402
+ types << current.strip unless current.empty?
403
+ types
404
+ end
405
+ end
406
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChConnect
4
+ # Null instrumenter that does nothing.
5
+ # This is the default instrumenter used when no custom instrumenter is configured.
6
+ class NullInstrumenter
7
+ # Executes block without any instrumentation.
8
+ #
9
+ # @param _name [String] event name (ignored)
10
+ # @param _payload [Hash] event payload (ignored)
11
+ # @yield block to execute
12
+ # @return [Object] result of the block
13
+ def instrument(_name, _payload = {})
14
+ yield
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChConnect
4
+ # Immutable response object containing query results.
5
+ #
6
+ # @example
7
+ # response = conn.query("SELECT id, name FROM users")
8
+ # response.columns # => [:id, :name]
9
+ # response.rows # => [[1, "Alice"], [2, "Bob"]]
10
+ # response.each { |row| puts row[:name] }
11
+ Response = Data.define(:columns, :types, :rows, :summary) do
12
+ include Enumerable
13
+
14
+ # @param columns [Array<Symbol>] column names
15
+ # @param types [Array<Symbol>] column types
16
+ # @param rows [Array<Array>] row data
17
+ # @param summary [Hash, nil] ClickHouse query summary with symbol keys
18
+ def initialize(columns: [], types: [], rows: [], summary: nil)
19
+ super
20
+ end
21
+
22
+ # Iterates over rows as hashes with symbol keys.
23
+ # @yield [Hash] each row as a hash
24
+ # @return [Enumerator] if no block given
25
+ def each
26
+ return to_enum(:each) unless block_given?
27
+
28
+ rows.each { |row| yield columns.zip(row).to_h }
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChConnect
4
+ # Immutable result from transport layer.
5
+ # @api private
6
+ #
7
+ # @!attribute [r] body
8
+ # @return [HTTP::Response::Body] response body
9
+ # @!attribute [r] summary
10
+ # @return [Hash] ClickHouse query summary
11
+ TransportResult = Data.define(:body, :summary)
12
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ChConnect
4
+ VERSION = "0.1.0"
5
+ end
data/lib/ch_connect.rb ADDED
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ch_connect/version"
4
+ require_relative "ch_connect/null_instrumenter"
5
+ require_relative "ch_connect/config"
6
+ require_relative "ch_connect/transport_result"
7
+ require_relative "ch_connect/http_transport"
8
+ require_relative "ch_connect/connection"
9
+ require_relative "ch_connect/response"
10
+ require_relative "ch_connect/body_reader"
11
+ require_relative "ch_connect/native_format_parser"
12
+
13
+ # Ruby client for ClickHouse database with Native format support.
14
+ #
15
+ # @example Basic usage
16
+ # ChConnect.configure do |config|
17
+ # config.host = "localhost"
18
+ # config.port = 8123
19
+ # end
20
+ #
21
+ # conn = ChConnect::Connection.new
22
+ # response = conn.query("SELECT 1")
23
+ #
24
+ # @example Using connection pool
25
+ # pool = ChConnect::Pool.new
26
+ # response = pool.query("SELECT * FROM users")
27
+ module ChConnect
28
+ # Base error class for all ChConnect errors
29
+ class Error < StandardError; end
30
+
31
+ # Raised when a query fails (syntax error, unknown table, etc.)
32
+ class QueryError < Error; end
33
+
34
+ # Raised when encountering an unsupported ClickHouse data type
35
+ class UnsupportedTypeError < Error; end
36
+
37
+ # Returns the global configuration instance.
38
+ #
39
+ # @return [Config] the configuration instance
40
+ def self.config
41
+ @config ||= Config.new
42
+ end
43
+
44
+ # Yields the global configuration for modification.
45
+ #
46
+ # @yield [Config] the configuration instance
47
+ # @return [void]
48
+ def self.configure
49
+ yield(config) if block_given?
50
+ end
51
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ch_connect
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Karol Bąk
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: bigdecimal
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '3.1'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '3.1'
26
+ - !ruby/object:Gem::Dependency
27
+ name: httpx
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.0'
40
+ description: Fast Ruby client for ClickHouse database using the Native binary format
41
+ for efficient data transfer
42
+ email:
43
+ - kukicola@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".rspec"
49
+ - ".standard.yml"
50
+ - CHANGELOG.md
51
+ - README.md
52
+ - lib/ch_connect.rb
53
+ - lib/ch_connect/body_reader.rb
54
+ - lib/ch_connect/config.rb
55
+ - lib/ch_connect/connection.rb
56
+ - lib/ch_connect/http_transport.rb
57
+ - lib/ch_connect/native_format_parser.rb
58
+ - lib/ch_connect/null_instrumenter.rb
59
+ - lib/ch_connect/response.rb
60
+ - lib/ch_connect/transport_result.rb
61
+ - lib/ch_connect/version.rb
62
+ homepage: https://github.com/kukicola/ch_connect
63
+ licenses:
64
+ - MIT
65
+ metadata:
66
+ changelog_uri: https://github.com/kukicola/ch_connect/blob/main/CHANGELOG.md
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 3.1.0
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubygems_version: 3.6.7
82
+ specification_version: 4
83
+ summary: Ruby client for ClickHouse with Native format support
84
+ test_files: []