ch_connect 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +24 -0
- data/lib/ch_connect/body_reader.rb +44 -5
- data/lib/ch_connect/config.rb +4 -1
- data/lib/ch_connect/http_transport.rb +1 -0
- data/lib/ch_connect/native_format_parser.rb +12 -13
- data/lib/ch_connect/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 50be174654df1074c817ad1cf895a206bc165452dda7fc47b4b6209aa622565f
|
|
4
|
+
data.tar.gz: e0b9467ebc013a701653ff16c277651e0e81fac81d225ea6d9cc5f2d64b5bb57
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 27992acbdb31511f7838201941f09f36587f2786e2df1b0bb000acc308405fd93d494955c202d0c732c2eace0bde0b0c5ffb106887f5ac9321c028e09d832384
|
|
7
|
+
data.tar.gz: 6e64f8873f628a856e21f199462c9eef435216ef0c8ea72f7f959a744e10115d85b63cf51f0700babd7919a8f8eb514f13557adc54077a14b01d5e49e1c2a753
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
## Unreleased
|
|
2
2
|
|
|
3
|
+
## [0.2.1] - 2026-02-08
|
|
4
|
+
|
|
5
|
+
- Added automatic retries on connection errors with configurable `max_retries` (default: 3) ([#6](https://github.com/kukicola/ch_connect/pull/6))
|
|
6
|
+
|
|
7
|
+
## [0.2.0] - 2026-01-31
|
|
8
|
+
|
|
9
|
+
- Added benchmark suite comparing against other ClickHouse Ruby gems ([#5](https://github.com/kukicola/ch_connect/pull/5))
|
|
10
|
+
- Optimized BodyReader with chunked buffering for better memory efficiency ([#5](https://github.com/kukicola/ch_connect/pull/5))
|
|
11
|
+
- Optimized NativeFormatParser with transpose-based row building ([#5](https://github.com/kukicola/ch_connect/pull/5))
|
|
12
|
+
|
|
3
13
|
## [0.1.0] - 2026-01-31
|
|
4
14
|
|
|
5
15
|
- Initial release
|
data/README.md
CHANGED
|
@@ -8,9 +8,32 @@ Fast Ruby client for ClickHouse database using the Native binary format for effi
|
|
|
8
8
|
|
|
9
9
|
- Native binary format parsing (faster than JSON/TSV)
|
|
10
10
|
- Persistent HTTP connections with built-in connection pooling
|
|
11
|
+
- Automatic retries on connection errors
|
|
11
12
|
- Thread-safe concurrent access
|
|
12
13
|
- Supports all common ClickHouse data types
|
|
13
14
|
|
|
15
|
+
## Benchmarks
|
|
16
|
+
|
|
17
|
+
Compared against other Ruby ClickHouse gems ([click_house](https://github.com/shlima/click_house), [clickhouse](https://github.com/archan937/clickhouse), [click_house-client](https://gitlab.com/gitlab-org/ruby/gems/clickhouse-client)) on Ruby 3.4.3:
|
|
18
|
+
|
|
19
|
+
**Speed (iterations/second, higher is better):**
|
|
20
|
+
|
|
21
|
+
| Scenario | ch_connect | click_house | clickhouse | click_house-client |
|
|
22
|
+
|----------|------------|-------------|------------|--------------------|
|
|
23
|
+
| Small queries (10 rows) | **680 i/s** | 342 i/s (2.0x slower) | 293 i/s (2.3x slower) | 346 i/s (2.0x slower) |
|
|
24
|
+
| Large queries (100K rows) | **3.5 i/s** | 1.1 i/s (3.3x slower) | 0.5 i/s (6.9x slower) | 1.6 i/s (2.2x slower) |
|
|
25
|
+
|
|
26
|
+
**Memory (large query, lower is better):**
|
|
27
|
+
|
|
28
|
+
| Gem | Allocated |
|
|
29
|
+
|-----|-----------|
|
|
30
|
+
| ch_connect | **130 MB** |
|
|
31
|
+
| click_house | 205 MB (1.6x more) |
|
|
32
|
+
| clickhouse | 483 MB (3.7x more) |
|
|
33
|
+
| click_house-client | 210 MB (1.6x more) |
|
|
34
|
+
|
|
35
|
+
See `benchmark/` directory for full benchmark suite and methodology.
|
|
36
|
+
|
|
14
37
|
## Installation
|
|
15
38
|
|
|
16
39
|
Add to your Gemfile:
|
|
@@ -156,6 +179,7 @@ response = conn.query(
|
|
|
156
179
|
| `write_timeout` | `60` | Write timeout in seconds |
|
|
157
180
|
| `pool_size` | `100` | Connection pool size |
|
|
158
181
|
| `pool_timeout` | `5` | Pool checkout timeout in seconds |
|
|
182
|
+
| `max_retries` | `3` | Max retry attempts on connection errors (0 to disable) |
|
|
159
183
|
| `instrumenter` | `NullInstrumenter` | Instrumenter for query instrumentation |
|
|
160
184
|
|
|
161
185
|
## Instrumentation
|
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module ChConnect
|
|
4
|
-
# Wrapper for HTTP response body providing
|
|
4
|
+
# Wrapper for HTTP response body providing buffered reads.
|
|
5
|
+
# Reads data in chunks for efficient small reads.
|
|
5
6
|
# @api private
|
|
6
7
|
class BodyReader
|
|
8
|
+
CHUNK_SIZE = 64 * 1024 # 64KB chunks
|
|
9
|
+
|
|
7
10
|
# Creates a new body reader.
|
|
8
11
|
#
|
|
9
12
|
# @param body [#read, #bytesize, #close] HTTP response body
|
|
10
13
|
def initialize(body)
|
|
11
14
|
@body = body
|
|
12
|
-
@pos = 0
|
|
13
15
|
@size = body.bytesize
|
|
16
|
+
@buffer = "".b
|
|
17
|
+
@buffer_pos = 0
|
|
18
|
+
@eof = false
|
|
14
19
|
end
|
|
15
20
|
|
|
16
21
|
# Closes the underlying body.
|
|
@@ -24,7 +29,8 @@ module ChConnect
|
|
|
24
29
|
#
|
|
25
30
|
# @return [Boolean]
|
|
26
31
|
def eof?
|
|
27
|
-
@
|
|
32
|
+
fill_buffer(1) if @buffer_pos >= @buffer.bytesize && !@eof
|
|
33
|
+
@eof && @buffer_pos >= @buffer.bytesize
|
|
28
34
|
end
|
|
29
35
|
|
|
30
36
|
# Reads exactly n bytes from the body.
|
|
@@ -32,9 +38,42 @@ module ChConnect
|
|
|
32
38
|
# @param n [Integer] number of bytes to read
|
|
33
39
|
# @return [String] binary string of n bytes
|
|
34
40
|
def read(n)
|
|
35
|
-
|
|
36
|
-
@
|
|
41
|
+
fill_buffer(n)
|
|
42
|
+
result = @buffer.byteslice(@buffer_pos, n)
|
|
43
|
+
@buffer_pos += n
|
|
44
|
+
compact_buffer if @buffer_pos > CHUNK_SIZE
|
|
37
45
|
result
|
|
38
46
|
end
|
|
47
|
+
|
|
48
|
+
# Reads a single byte as integer, returns nil at EOF.
|
|
49
|
+
#
|
|
50
|
+
# @return [Integer, nil] byte value or nil at EOF
|
|
51
|
+
def getbyte
|
|
52
|
+
fill_buffer(1)
|
|
53
|
+
return nil if @buffer_pos >= @buffer.bytesize
|
|
54
|
+
|
|
55
|
+
byte = @buffer.getbyte(@buffer_pos)
|
|
56
|
+
@buffer_pos += 1
|
|
57
|
+
compact_buffer if @buffer_pos > CHUNK_SIZE
|
|
58
|
+
byte
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def fill_buffer(needed)
|
|
64
|
+
while !@eof && (@buffer.bytesize - @buffer_pos) < needed
|
|
65
|
+
chunk = @body.read(CHUNK_SIZE)
|
|
66
|
+
if chunk.nil? || chunk.empty?
|
|
67
|
+
@eof = true
|
|
68
|
+
else
|
|
69
|
+
@buffer << chunk
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def compact_buffer
|
|
75
|
+
@buffer = @buffer.byteslice(@buffer_pos..-1) || "".b
|
|
76
|
+
@buffer_pos = 0
|
|
77
|
+
end
|
|
39
78
|
end
|
|
40
79
|
end
|
data/lib/ch_connect/config.rb
CHANGED
|
@@ -24,6 +24,7 @@ module ChConnect
|
|
|
24
24
|
write_timeout: 60,
|
|
25
25
|
pool_size: 100,
|
|
26
26
|
pool_timeout: 5,
|
|
27
|
+
max_retries: 3,
|
|
27
28
|
instrumenter: NullInstrumenter.new
|
|
28
29
|
}.freeze
|
|
29
30
|
|
|
@@ -38,8 +39,9 @@ module ChConnect
|
|
|
38
39
|
# @return [Integer] Write timeout in seconds
|
|
39
40
|
# @return [Integer] Connection pool size
|
|
40
41
|
# @return [Integer] Pool checkout timeout in seconds
|
|
42
|
+
# @return [Integer] Max retry attempts on connection errors
|
|
41
43
|
# @return [#instrument] Instrumenter for query instrumentation
|
|
42
|
-
attr_accessor :scheme, :host, :port, :database, :username, :password, :connection_timeout, :read_timeout, :write_timeout, :pool_size, :pool_timeout, :instrumenter
|
|
44
|
+
attr_accessor :scheme, :host, :port, :database, :username, :password, :connection_timeout, :read_timeout, :write_timeout, :pool_size, :pool_timeout, :max_retries, :instrumenter
|
|
43
45
|
|
|
44
46
|
# Creates a new configuration instance.
|
|
45
47
|
#
|
|
@@ -55,6 +57,7 @@ module ChConnect
|
|
|
55
57
|
# @option params [Integer] :write_timeout write timeout in seconds (default: 60)
|
|
56
58
|
# @option params [Integer] :pool_size connection pool size (default: 100)
|
|
57
59
|
# @option params [Integer] :pool_timeout pool checkout timeout (default: 5)
|
|
60
|
+
# @option params [Integer] :max_retries max retry attempts on connection errors (default: 3)
|
|
58
61
|
def initialize(params = {})
|
|
59
62
|
DEFAULTS.merge(params).each do |key, value|
|
|
60
63
|
send("#{key}=", value)
|
|
@@ -14,6 +14,7 @@ module ChConnect
|
|
|
14
14
|
@config = config
|
|
15
15
|
@base_url = "#{config.scheme}://#{config.host}:#{config.port}"
|
|
16
16
|
@http_client = HTTPX.plugin(:persistent, close_on_fork: true)
|
|
17
|
+
.plugin(:retries, max_retries: config.max_retries, retry_change_requests: true)
|
|
17
18
|
.with(
|
|
18
19
|
timeout: {
|
|
19
20
|
connect_timeout: config.connection_timeout,
|
|
@@ -52,9 +52,7 @@ module ChConnect
|
|
|
52
52
|
columns_data << read_column(col_type, num_rows)
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
@rows << columns_data.map { |col| col[row_idx] }
|
|
57
|
-
end
|
|
55
|
+
@rows.concat(columns_data.transpose) if num_rows > 0
|
|
58
56
|
end
|
|
59
57
|
|
|
60
58
|
def read_column(type, num_rows)
|
|
@@ -247,9 +245,8 @@ module ChConnect
|
|
|
247
245
|
result = 0
|
|
248
246
|
shift = 0
|
|
249
247
|
loop do
|
|
250
|
-
|
|
251
|
-
return result if
|
|
252
|
-
byte = byte_str.ord
|
|
248
|
+
byte = @reader.getbyte
|
|
249
|
+
return result if byte.nil?
|
|
253
250
|
result |= (byte & 0x7F) << shift
|
|
254
251
|
break if (byte & 0x80) == 0
|
|
255
252
|
shift += 7
|
|
@@ -286,7 +283,8 @@ module ChConnect
|
|
|
286
283
|
def read_nullable_column(inner_type, num_rows)
|
|
287
284
|
nulls = @reader.read(num_rows).bytes
|
|
288
285
|
values = read_column(inner_type, num_rows)
|
|
289
|
-
|
|
286
|
+
num_rows.times { |i| values[i] = nil if nulls[i] == 1 }
|
|
287
|
+
values
|
|
290
288
|
end
|
|
291
289
|
|
|
292
290
|
# Array: cumulative offsets (uint64 per row), then all elements
|
|
@@ -298,10 +296,10 @@ module ChConnect
|
|
|
298
296
|
|
|
299
297
|
elements = read_column(inner_type, total_elements)
|
|
300
298
|
|
|
301
|
-
arrays =
|
|
299
|
+
arrays = Array.new(num_rows)
|
|
302
300
|
prev_offset = 0
|
|
303
|
-
offsets.
|
|
304
|
-
arrays
|
|
301
|
+
offsets.each_with_index do |offset, i|
|
|
302
|
+
arrays[i] = elements.slice(prev_offset, offset - prev_offset)
|
|
305
303
|
prev_offset = offset
|
|
306
304
|
end
|
|
307
305
|
arrays
|
|
@@ -317,10 +315,11 @@ module ChConnect
|
|
|
317
315
|
keys = read_column(key_type, total_pairs)
|
|
318
316
|
values = read_column(value_type, total_pairs)
|
|
319
317
|
|
|
320
|
-
maps =
|
|
318
|
+
maps = Array.new(num_rows)
|
|
321
319
|
prev_offset = 0
|
|
322
|
-
offsets.
|
|
323
|
-
|
|
320
|
+
offsets.each_with_index do |offset, i|
|
|
321
|
+
len = offset - prev_offset
|
|
322
|
+
maps[i] = keys.slice(prev_offset, len).zip(values.slice(prev_offset, len)).to_h
|
|
324
323
|
prev_offset = offset
|
|
325
324
|
end
|
|
326
325
|
maps
|
data/lib/ch_connect/version.rb
CHANGED