ch_connect 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e4176a0227a2a6a7e94c379e75d37325d7cf51428c01131bb88ca9903a9a62e3
4
- data.tar.gz: fa438af73ded93b0f5eae03cd77543414d596ba6fc19e23f2e6bc536bb28be21
3
+ metadata.gz: 200d184420e49176978c3e733563dee1bce43abd3ae68ec00c8d8dedba2a3d93
4
+ data.tar.gz: 701d1c607774c4751b78e7abe8deb8f0927ae671887159e2aba4f5386568a63c
5
5
  SHA512:
6
- metadata.gz: a5718c7457b517d5586e210b597500400bea09145e24a89088ff797b200be4d99522759e7dd1381e3dd79f65bee3c83fbf7a611a7c2abcf39bb3c1f6613a5fbd
7
- data.tar.gz: be54de4f246642315f5a815afc2b78498dc30890172cb7318e2cff82a3f801be9f92b86e35c4977f0a30fc3074f1b19a36d7fcfe8f8e80766eaf589833164646
6
+ metadata.gz: 544a541108591c31bbff5381d73b3659f278013f29bd380b25b056fe9a2066402691a70d4fd705dd35513fa425b4267e35d3e8be06885a5e10ec7d5ed9a81701
7
+ data.tar.gz: 6d1ca3c49203d5c7eea71886aba4dfa70447fa1adbad832fd6b9736a6ffad4a02a58f425c56921a1e1b109a1a879de4178889fa1c33c04beeb7cb01830fcd57f
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## Unreleased
2
2
 
3
+ ## [0.2.0] - 2026-01-31
4
+
5
+ - Added benchmark suite comparing against other ClickHouse Ruby gems ([#5](https://github.com/kukicola/ch_connect/pull/5))
6
+ - Optimized BodyReader with chunked buffering for better memory efficiency ([#5](https://github.com/kukicola/ch_connect/pull/5))
7
+ - Optimized NativeFormatParser with transpose-based row building ([#5](https://github.com/kukicola/ch_connect/pull/5))
8
+
3
9
  ## [0.1.0] - 2026-01-31
4
10
 
5
11
  - Initial release
data/README.md CHANGED
@@ -11,6 +11,28 @@ Fast Ruby client for ClickHouse database using the Native binary format for effi
11
11
  - Thread-safe concurrent access
12
12
  - Supports all common ClickHouse data types
13
13
 
14
+ ## Benchmarks
15
+
16
+ Compared against other Ruby ClickHouse gems ([click_house](https://github.com/shlima/click_house), [clickhouse](https://github.com/archan937/clickhouse), [click_house-client](https://gitlab.com/gitlab-org/ruby/gems/clickhouse-client)) on Ruby 3.4.3:
17
+
18
+ **Speed (iterations/second, higher is better):**
19
+
20
+ | Scenario | ch_connect | click_house | clickhouse | click_house-client |
21
+ |----------|------------|-------------|------------|--------------------|
22
+ | Small queries (10 rows) | **680 i/s** | 342 i/s (2.0x slower) | 293 i/s (2.3x slower) | 346 i/s (2.0x slower) |
23
+ | Large queries (100K rows) | **3.5 i/s** | 1.1 i/s (3.3x slower) | 0.5 i/s (6.9x slower) | 1.6 i/s (2.2x slower) |
24
+
25
+ **Memory (large query, lower is better):**
26
+
27
+ | Gem | Allocated |
28
+ |-----|-----------|
29
+ | ch_connect | **130 MB** |
30
+ | click_house | 205 MB (1.6x more) |
31
+ | clickhouse | 483 MB (3.7x more) |
32
+ | click_house-client | 210 MB (1.6x more) |
33
+
34
+ See `benchmark/` directory for full benchmark suite and methodology.
35
+
14
36
  ## Installation
15
37
 
16
38
  Add to your Gemfile:
@@ -1,16 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ChConnect
4
- # Wrapper for HTTP response body providing position tracking and EOF detection.
4
+ # Wrapper for HTTP response body providing buffered reads.
5
+ # Reads data in chunks for efficient small reads.
5
6
  # @api private
6
7
  class BodyReader
8
+ CHUNK_SIZE = 64 * 1024 # 64KB chunks
9
+
7
10
  # Creates a new body reader.
8
11
  #
9
12
  # @param body [#read, #bytesize, #close] HTTP response body
10
13
  def initialize(body)
11
14
  @body = body
12
- @pos = 0
13
15
  @size = body.bytesize
16
+ @buffer = "".b
17
+ @buffer_pos = 0
18
+ @eof = false
14
19
  end
15
20
 
16
21
  # Closes the underlying body.
@@ -24,7 +29,8 @@ module ChConnect
24
29
  #
25
30
  # @return [Boolean]
26
31
  def eof?
27
- @pos >= @size
32
+ fill_buffer(1) if @buffer_pos >= @buffer.bytesize && !@eof
33
+ @eof && @buffer_pos >= @buffer.bytesize
28
34
  end
29
35
 
30
36
  # Reads exactly n bytes from the body.
@@ -32,9 +38,42 @@ module ChConnect
32
38
  # @param n [Integer] number of bytes to read
33
39
  # @return [String] binary string of n bytes
34
40
  def read(n)
35
- result = @body.read(n)
36
- @pos += n
41
+ fill_buffer(n)
42
+ result = @buffer.byteslice(@buffer_pos, n)
43
+ @buffer_pos += n
44
+ compact_buffer if @buffer_pos > CHUNK_SIZE
37
45
  result
38
46
  end
47
+
48
+ # Reads a single byte as integer, returns nil at EOF.
49
+ #
50
+ # @return [Integer, nil] byte value or nil at EOF
51
+ def getbyte
52
+ fill_buffer(1)
53
+ return nil if @buffer_pos >= @buffer.bytesize
54
+
55
+ byte = @buffer.getbyte(@buffer_pos)
56
+ @buffer_pos += 1
57
+ compact_buffer if @buffer_pos > CHUNK_SIZE
58
+ byte
59
+ end
60
+
61
+ private
62
+
63
+ def fill_buffer(needed)
64
+ while !@eof && (@buffer.bytesize - @buffer_pos) < needed
65
+ chunk = @body.read(CHUNK_SIZE)
66
+ if chunk.nil? || chunk.empty?
67
+ @eof = true
68
+ else
69
+ @buffer << chunk
70
+ end
71
+ end
72
+ end
73
+
74
+ def compact_buffer
75
+ @buffer = @buffer.byteslice(@buffer_pos..-1) || "".b
76
+ @buffer_pos = 0
77
+ end
39
78
  end
40
79
  end
@@ -52,9 +52,7 @@ module ChConnect
52
52
  columns_data << read_column(col_type, num_rows)
53
53
  end
54
54
 
55
- num_rows.times do |row_idx|
56
- @rows << columns_data.map { |col| col[row_idx] }
57
- end
55
+ @rows.concat(columns_data.transpose) if num_rows > 0
58
56
  end
59
57
 
60
58
  def read_column(type, num_rows)
@@ -247,9 +245,8 @@ module ChConnect
247
245
  result = 0
248
246
  shift = 0
249
247
  loop do
250
- byte_str = @reader.read(1)
251
- return result if byte_str.nil? || byte_str.empty?
252
- byte = byte_str.ord
248
+ byte = @reader.getbyte
249
+ return result if byte.nil?
253
250
  result |= (byte & 0x7F) << shift
254
251
  break if (byte & 0x80) == 0
255
252
  shift += 7
@@ -286,7 +283,8 @@ module ChConnect
286
283
  def read_nullable_column(inner_type, num_rows)
287
284
  nulls = @reader.read(num_rows).bytes
288
285
  values = read_column(inner_type, num_rows)
289
- values.each_with_index.map { |v, i| (nulls[i] == 1) ? nil : v }
286
+ num_rows.times { |i| values[i] = nil if nulls[i] == 1 }
287
+ values
290
288
  end
291
289
 
292
290
  # Array: cumulative offsets (uint64 per row), then all elements
@@ -298,10 +296,10 @@ module ChConnect
298
296
 
299
297
  elements = read_column(inner_type, total_elements)
300
298
 
301
- arrays = []
299
+ arrays = Array.new(num_rows)
302
300
  prev_offset = 0
303
- offsets.each do |offset|
304
- arrays << elements[prev_offset...offset]
301
+ offsets.each_with_index do |offset, i|
302
+ arrays[i] = elements.slice(prev_offset, offset - prev_offset)
305
303
  prev_offset = offset
306
304
  end
307
305
  arrays
@@ -317,10 +315,11 @@ module ChConnect
317
315
  keys = read_column(key_type, total_pairs)
318
316
  values = read_column(value_type, total_pairs)
319
317
 
320
- maps = []
318
+ maps = Array.new(num_rows)
321
319
  prev_offset = 0
322
- offsets.each do |offset|
323
- maps << keys[prev_offset...offset].zip(values[prev_offset...offset]).to_h
320
+ offsets.each_with_index do |offset, i|
321
+ len = offset - prev_offset
322
+ maps[i] = keys.slice(prev_offset, len).zip(values.slice(prev_offset, len)).to_h
324
323
  prev_offset = offset
325
324
  end
326
325
  maps
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ChConnect
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ch_connect
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Karol Bąk