ch_connect 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +22 -0
- data/lib/ch_connect/body_reader.rb +44 -5
- data/lib/ch_connect/native_format_parser.rb +12 -13
- data/lib/ch_connect/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 200d184420e49176978c3e733563dee1bce43abd3ae68ec00c8d8dedba2a3d93
|
|
4
|
+
data.tar.gz: 701d1c607774c4751b78e7abe8deb8f0927ae671887159e2aba4f5386568a63c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 544a541108591c31bbff5381d73b3659f278013f29bd380b25b056fe9a2066402691a70d4fd705dd35513fa425b4267e35d3e8be06885a5e10ec7d5ed9a81701
|
|
7
|
+
data.tar.gz: 6d1ca3c49203d5c7eea71886aba4dfa70447fa1adbad832fd6b9736a6ffad4a02a58f425c56921a1e1b109a1a879de4178889fa1c33c04beeb7cb01830fcd57f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
## Unreleased
|
|
2
2
|
|
|
3
|
+
## [0.2.0] - 2026-01-31
|
|
4
|
+
|
|
5
|
+
- Added benchmark suite comparing against other ClickHouse Ruby gems ([#5](https://github.com/kukicola/ch_connect/pull/5))
|
|
6
|
+
- Optimized BodyReader with chunked buffering for better memory efficiency ([#5](https://github.com/kukicola/ch_connect/pull/5))
|
|
7
|
+
- Optimized NativeFormatParser with transpose-based row building ([#5](https://github.com/kukicola/ch_connect/pull/5))
|
|
8
|
+
|
|
3
9
|
## [0.1.0] - 2026-01-31
|
|
4
10
|
|
|
5
11
|
- Initial release
|
data/README.md
CHANGED
|
@@ -11,6 +11,28 @@ Fast Ruby client for ClickHouse database using the Native binary format for effi
|
|
|
11
11
|
- Thread-safe concurrent access
|
|
12
12
|
- Supports all common ClickHouse data types
|
|
13
13
|
|
|
14
|
+
## Benchmarks
|
|
15
|
+
|
|
16
|
+
Compared against other Ruby ClickHouse gems ([click_house](https://github.com/shlima/click_house), [clickhouse](https://github.com/archan937/clickhouse), [click_house-client](https://gitlab.com/gitlab-org/ruby/gems/clickhouse-client)) on Ruby 3.4.3:
|
|
17
|
+
|
|
18
|
+
**Speed (iterations/second, higher is better):**
|
|
19
|
+
|
|
20
|
+
| Scenario | ch_connect | click_house | clickhouse | click_house-client |
|
|
21
|
+
|----------|------------|-------------|------------|--------------------|
|
|
22
|
+
| Small queries (10 rows) | **680 i/s** | 342 i/s (2.0x slower) | 293 i/s (2.3x slower) | 346 i/s (2.0x slower) |
|
|
23
|
+
| Large queries (100K rows) | **3.5 i/s** | 1.1 i/s (3.3x slower) | 0.5 i/s (6.9x slower) | 1.6 i/s (2.2x slower) |
|
|
24
|
+
|
|
25
|
+
**Memory (large query, lower is better):**
|
|
26
|
+
|
|
27
|
+
| Gem | Allocated |
|
|
28
|
+
|-----|-----------|
|
|
29
|
+
| ch_connect | **130 MB** |
|
|
30
|
+
| click_house | 205 MB (1.6x more) |
|
|
31
|
+
| clickhouse | 483 MB (3.7x more) |
|
|
32
|
+
| click_house-client | 210 MB (1.6x more) |
|
|
33
|
+
|
|
34
|
+
See `benchmark/` directory for full benchmark suite and methodology.
|
|
35
|
+
|
|
14
36
|
## Installation
|
|
15
37
|
|
|
16
38
|
Add to your Gemfile:
|
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module ChConnect
|
|
4
|
-
# Wrapper for HTTP response body providing
|
|
4
|
+
# Wrapper for HTTP response body providing buffered reads.
|
|
5
|
+
# Reads data in chunks for efficient small reads.
|
|
5
6
|
# @api private
|
|
6
7
|
class BodyReader
|
|
8
|
+
CHUNK_SIZE = 64 * 1024 # 64KB chunks
|
|
9
|
+
|
|
7
10
|
# Creates a new body reader.
|
|
8
11
|
#
|
|
9
12
|
# @param body [#read, #bytesize, #close] HTTP response body
|
|
10
13
|
def initialize(body)
|
|
11
14
|
@body = body
|
|
12
|
-
@pos = 0
|
|
13
15
|
@size = body.bytesize
|
|
16
|
+
@buffer = "".b
|
|
17
|
+
@buffer_pos = 0
|
|
18
|
+
@eof = false
|
|
14
19
|
end
|
|
15
20
|
|
|
16
21
|
# Closes the underlying body.
|
|
@@ -24,7 +29,8 @@ module ChConnect
|
|
|
24
29
|
#
|
|
25
30
|
# @return [Boolean]
|
|
26
31
|
def eof?
|
|
27
|
-
@
|
|
32
|
+
fill_buffer(1) if @buffer_pos >= @buffer.bytesize && !@eof
|
|
33
|
+
@eof && @buffer_pos >= @buffer.bytesize
|
|
28
34
|
end
|
|
29
35
|
|
|
30
36
|
# Reads exactly n bytes from the body.
|
|
@@ -32,9 +38,42 @@ module ChConnect
|
|
|
32
38
|
# @param n [Integer] number of bytes to read
|
|
33
39
|
# @return [String] binary string of n bytes
|
|
34
40
|
def read(n)
|
|
35
|
-
|
|
36
|
-
@
|
|
41
|
+
fill_buffer(n)
|
|
42
|
+
result = @buffer.byteslice(@buffer_pos, n)
|
|
43
|
+
@buffer_pos += n
|
|
44
|
+
compact_buffer if @buffer_pos > CHUNK_SIZE
|
|
37
45
|
result
|
|
38
46
|
end
|
|
47
|
+
|
|
48
|
+
# Reads a single byte as integer, returns nil at EOF.
|
|
49
|
+
#
|
|
50
|
+
# @return [Integer, nil] byte value or nil at EOF
|
|
51
|
+
def getbyte
|
|
52
|
+
fill_buffer(1)
|
|
53
|
+
return nil if @buffer_pos >= @buffer.bytesize
|
|
54
|
+
|
|
55
|
+
byte = @buffer.getbyte(@buffer_pos)
|
|
56
|
+
@buffer_pos += 1
|
|
57
|
+
compact_buffer if @buffer_pos > CHUNK_SIZE
|
|
58
|
+
byte
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def fill_buffer(needed)
|
|
64
|
+
while !@eof && (@buffer.bytesize - @buffer_pos) < needed
|
|
65
|
+
chunk = @body.read(CHUNK_SIZE)
|
|
66
|
+
if chunk.nil? || chunk.empty?
|
|
67
|
+
@eof = true
|
|
68
|
+
else
|
|
69
|
+
@buffer << chunk
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def compact_buffer
|
|
75
|
+
@buffer = @buffer.byteslice(@buffer_pos..-1) || "".b
|
|
76
|
+
@buffer_pos = 0
|
|
77
|
+
end
|
|
39
78
|
end
|
|
40
79
|
end
|
|
@@ -52,9 +52,7 @@ module ChConnect
|
|
|
52
52
|
columns_data << read_column(col_type, num_rows)
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
@rows << columns_data.map { |col| col[row_idx] }
|
|
57
|
-
end
|
|
55
|
+
@rows.concat(columns_data.transpose) if num_rows > 0
|
|
58
56
|
end
|
|
59
57
|
|
|
60
58
|
def read_column(type, num_rows)
|
|
@@ -247,9 +245,8 @@ module ChConnect
|
|
|
247
245
|
result = 0
|
|
248
246
|
shift = 0
|
|
249
247
|
loop do
|
|
250
|
-
|
|
251
|
-
return result if
|
|
252
|
-
byte = byte_str.ord
|
|
248
|
+
byte = @reader.getbyte
|
|
249
|
+
return result if byte.nil?
|
|
253
250
|
result |= (byte & 0x7F) << shift
|
|
254
251
|
break if (byte & 0x80) == 0
|
|
255
252
|
shift += 7
|
|
@@ -286,7 +283,8 @@ module ChConnect
|
|
|
286
283
|
def read_nullable_column(inner_type, num_rows)
|
|
287
284
|
nulls = @reader.read(num_rows).bytes
|
|
288
285
|
values = read_column(inner_type, num_rows)
|
|
289
|
-
|
|
286
|
+
num_rows.times { |i| values[i] = nil if nulls[i] == 1 }
|
|
287
|
+
values
|
|
290
288
|
end
|
|
291
289
|
|
|
292
290
|
# Array: cumulative offsets (uint64 per row), then all elements
|
|
@@ -298,10 +296,10 @@ module ChConnect
|
|
|
298
296
|
|
|
299
297
|
elements = read_column(inner_type, total_elements)
|
|
300
298
|
|
|
301
|
-
arrays =
|
|
299
|
+
arrays = Array.new(num_rows)
|
|
302
300
|
prev_offset = 0
|
|
303
|
-
offsets.
|
|
304
|
-
arrays
|
|
301
|
+
offsets.each_with_index do |offset, i|
|
|
302
|
+
arrays[i] = elements.slice(prev_offset, offset - prev_offset)
|
|
305
303
|
prev_offset = offset
|
|
306
304
|
end
|
|
307
305
|
arrays
|
|
@@ -317,10 +315,11 @@ module ChConnect
|
|
|
317
315
|
keys = read_column(key_type, total_pairs)
|
|
318
316
|
values = read_column(value_type, total_pairs)
|
|
319
317
|
|
|
320
|
-
maps =
|
|
318
|
+
maps = Array.new(num_rows)
|
|
321
319
|
prev_offset = 0
|
|
322
|
-
offsets.
|
|
323
|
-
|
|
320
|
+
offsets.each_with_index do |offset, i|
|
|
321
|
+
len = offset - prev_offset
|
|
322
|
+
maps[i] = keys.slice(prev_offset, len).zip(values.slice(prev_offset, len)).to_h
|
|
324
323
|
prev_offset = offset
|
|
325
324
|
end
|
|
326
325
|
maps
|
data/lib/ch_connect/version.rb
CHANGED