@clickhouse/client 1.23.0-head.fae5998.1 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/CHANGELOG.md +1342 -0
  2. package/README.md +18 -6
  3. package/dist/common/index.d.ts +2 -2
  4. package/dist/common/index.js +2 -2
  5. package/dist/common/index.js.map +1 -1
  6. package/dist/common/parse/column_types.d.ts +30 -2
  7. package/dist/common/parse/column_types.js +8 -0
  8. package/dist/common/parse/column_types.js.map +1 -1
  9. package/dist/index.d.ts +2 -0
  10. package/dist/index.js +2 -0
  11. package/dist/index.js.map +1 -1
  12. package/dist/version.d.ts +1 -1
  13. package/dist/version.js +1 -1
  14. package/dist/version.js.map +1 -1
  15. package/package.json +7 -6
  16. package/skills/AGENTS.md +8 -0
  17. package/skills/clickhouse-js-node-rowbinary/AGENTS.md +44 -0
  18. package/skills/clickhouse-js-node-rowbinary/CHANGELOG.md +49 -0
  19. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/README.md +85 -14
  20. package/skills/clickhouse-js-node-rowbinary/SKILL.md +111 -0
  21. package/skills/{clickhouse-js-node-rowbinary-parser/SKILL.md → clickhouse-js-node-rowbinary/reader.md} +59 -123
  22. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/carts.ts +9 -5
  23. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/events.ts +5 -5
  24. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/iot.ts +4 -4
  25. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/ledger.ts +3 -3
  26. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/logs.ts +4 -4
  27. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/observability.ts +9 -10
  28. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/orders.ts +10 -9
  29. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/profiles.ts +5 -5
  30. package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/telemetry.ts +6 -6
  31. package/skills/clickhouse-js-node-rowbinary/src/readers/compile.ts +328 -0
  32. package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/dynamic.ts +12 -8
  33. package/skills/clickhouse-js-node-rowbinary/src/readers/enums.ts +40 -0
  34. package/skills/clickhouse-js-node-rowbinary/src/readers/header.ts +29 -0
  35. package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/reader.ts +17 -0
  36. package/skills/clickhouse-js-node-rowbinary/src/readers/rowBinaryWithNamesAndTypes.ts +155 -0
  37. package/skills/clickhouse-js-node-rowbinary/src/writers/aggregateFunction.ts +18 -0
  38. package/skills/clickhouse-js-node-rowbinary/src/writers/bool.ts +10 -0
  39. package/skills/clickhouse-js-node-rowbinary/src/writers/composite.ts +140 -0
  40. package/skills/clickhouse-js-node-rowbinary/src/writers/core.ts +92 -0
  41. package/skills/clickhouse-js-node-rowbinary/src/writers/datetime.ts +123 -0
  42. package/skills/clickhouse-js-node-rowbinary/src/writers/decimals.ts +51 -0
  43. package/skills/clickhouse-js-node-rowbinary/src/writers/enums.ts +18 -0
  44. package/skills/clickhouse-js-node-rowbinary/src/writers/floats.ts +40 -0
  45. package/skills/clickhouse-js-node-rowbinary/src/writers/geo.ts +125 -0
  46. package/skills/clickhouse-js-node-rowbinary/src/writers/integers.ts +90 -0
  47. package/skills/clickhouse-js-node-rowbinary/src/writers/interval.ts +11 -0
  48. package/skills/clickhouse-js-node-rowbinary/src/writers/ip.ts +121 -0
  49. package/skills/clickhouse-js-node-rowbinary/src/writers/lowCardinality.ts +12 -0
  50. package/skills/clickhouse-js-node-rowbinary/src/writers/nested.ts +17 -0
  51. package/skills/clickhouse-js-node-rowbinary/src/writers/nothing.ts +21 -0
  52. package/skills/clickhouse-js-node-rowbinary/src/writers/rows.ts +144 -0
  53. package/skills/clickhouse-js-node-rowbinary/src/writers/simpleAggregateFunction.ts +12 -0
  54. package/skills/clickhouse-js-node-rowbinary/src/writers/strings.ts +77 -0
  55. package/skills/clickhouse-js-node-rowbinary/src/writers/time.ts +54 -0
  56. package/skills/clickhouse-js-node-rowbinary/src/writers/uuid.ts +60 -0
  57. package/skills/clickhouse-js-node-rowbinary/src/writers/varint.ts +64 -0
  58. package/skills/clickhouse-js-node-rowbinary/src/writers/writer.ts +101 -0
  59. package/skills/clickhouse-js-node-rowbinary/writer.md +96 -0
  60. package/skills/clickhouse-js-node-rowbinary-parser/src/enums.ts +0 -28
  61. /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/EXAMPLES.md +0 -0
  62. /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/iot-rowbinary-vs-json.md +0 -0
  63. /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/ledger-rowbinary-vs-json.md +0 -0
  64. /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/logs-json-wins.md +0 -0
  65. /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/wasm-vs-js.md +0 -0
  66. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/aggregateFunction.ts +0 -0
  67. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/bool.ts +0 -0
  68. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/columnar.ts +0 -0
  69. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/composite.ts +0 -0
  70. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/core.ts +0 -0
  71. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/datetime.ts +0 -0
  72. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/decimals.ts +0 -0
  73. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/floats.ts +0 -0
  74. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/geo.ts +0 -0
  75. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/integers.ts +0 -0
  76. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/interval.ts +0 -0
  77. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/ip.ts +0 -0
  78. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/json.ts +0 -0
  79. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/lowCardinality.ts +0 -0
  80. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/nested.ts +0 -0
  81. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/nothing.ts +0 -0
  82. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/rows.ts +0 -0
  83. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/simpleAggregateFunction.ts +0 -0
  84. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/stream.ts +0 -0
  85. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/strings.ts +0 -0
  86. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/time.ts +0 -0
  87. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/uuid.ts +0 -0
  88. /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/varint.ts +0 -0
@@ -1,10 +1,10 @@
1
- # ClickHouse Node.js RowBinary Parser Generator
1
+ # ClickHouse Node.js RowBinary Codec Generator
2
2
 
3
- **If JS had a -O3 compiler flag, this skill would be it.** (for RowBinary parsing)
3
+ **If JS had a -O3 compiler flag, this skill would be it.** (for RowBinary read & write)
4
4
 
5
- A skill and a library that lets a coding agent generate bespoke RowBinary parsers on the first pass from the column type definitions of a ClickHouse response. The [spirit](#the-spirit) behind the approach.
5
+ A skill and a library that lets a coding agent generate bespoke RowBinary codecs on the first pass from the column type definitions of a ClickHouse response. The [spirit](#the-spirit) behind the approach.
6
6
 
7
- **Reader only** for now. Today this covers reading (decoding) RowBinary streams. A matching RowBinary writer (encoding) is planned.
7
+ **Reads and writes.** Both directions are covered: readers (decode bytes → values) and writers (encode values bytes), split under `src/readers/` and `src/writers/`. The reader path is the more mature one — the writers mirror it type-for-type, with a few decode-only paths (`Dynamic`, `JSON`, the runtime header/compile path, and the columnar typed-array path) not yet mirrored.
8
8
 
9
9
  ## Status
10
10
 
@@ -12,7 +12,7 @@ A skill and a library that lets a coding agent generate bespoke RowBinary parser
12
12
  - ✅ Opus 4.8: 71% -> 94.7% pass rate
13
13
  - ✅ Haiku 4.5: 52% -> 86.0% pass rate
14
14
  - ✅ Composer 2.5 Fast: 3x parser performance
15
- - ✅ 469/469 tests
15
+ - ✅ 724/724 tests (readers + writers)
16
16
  - ✅ type-checked
17
17
  - ✅ benchmarked
18
18
 
@@ -35,7 +35,7 @@ const readOrderRow: Reader<OrderRow> = (s) => ({
35
35
  id: readUInt8(s),
36
36
  uid: formatUUID(readUUID(s)),
37
37
  price: readDecimal64(2)(s),
38
- status: readEnum8(s),
38
+ status: readInt8(s), // raw enum int; `readEnum8(map)` resolves it to the name
39
39
  });
40
40
  ```
41
41
 
@@ -70,14 +70,74 @@ npx skills-npm setup
70
70
  As a skill only:
71
71
 
72
72
  ```bash
73
- npx skills add ClickHouse/clickhouse-js/skills/clickhouse-js-node-rowbinary-parser
73
+ npx skills add ClickHouse/clickhouse-js/skills/clickhouse-js-node-rowbinary
74
74
  ```
75
75
 
76
76
  ```console
77
- > Hey, Claude, tell me what the rowbinary parser skill can do for me.
78
- > A lot! It generates custom, high-performance RowBinary parsers
79
- > Super, generate a parser for the queries in app/src/model.ts.
80
- < Reading skill clickhouse-js-node-rowbinary-parser
77
+ > Hey, Claude, tell me what the rowbinary skill can do for me.
78
+ > A lot! It generates custom, high-performance RowBinary readers and writers
79
+ > Super, generate a reader for the queries in app/src/model.ts.
80
+ < Reading skill clickhouse-js-node-rowbinary…
81
+ ```
82
+
83
+ ## Using it with the ClickHouse JS client
84
+
85
+ This library only **decodes** the bytes — it doesn't open connections. Pair it
86
+ with the official client to fetch a `RowBinary` response and feed the byte chunks
87
+ into `streamRowBatches(chunks, readRow)`.
88
+
89
+ `RowBinary` isn't one of the formats the client decodes itself, so don't use
90
+ `client.query({ format: ... })` for it. Instead use `client.exec({ query })` with
91
+ the `FORMAT RowBinary` clause written into the SQL yourself — `exec` hands back the
92
+ **raw, undecoded byte stream** of the response, which is exactly what this library
93
+ consumes. (Use plain `RowBinary`, not `RowBinaryWithNamesAndTypes`, unless your
94
+ reader also skips the leading names/types header.)
95
+
96
+ The row reader below is the `orders` example from [EXAMPLES.md](EXAMPLES.md); swap
97
+ in the reader the skill generates for your own columns.
98
+
99
+ ```ts
100
+ import {
101
+ type Reader,
102
+ readUInt8,
103
+ readInt8,
104
+ readUUID,
105
+ formatUUID,
106
+ readDecimal64,
107
+ type DecimalValue,
108
+ streamRowBatches,
109
+ } from "@clickhouse/rowbinary";
110
+ import { createClient } from "@clickhouse/client";
111
+
112
+ type OrderRow = {
113
+ id: number;
114
+ uid: string;
115
+ price: DecimalValue;
116
+ status: number;
117
+ };
118
+
119
+ const readOrderRow: Reader<OrderRow> = (s) => ({
120
+ id: readUInt8(s),
121
+ uid: formatUUID(readUUID(s)),
122
+ price: readDecimal64(2)(s),
123
+ status: readInt8(s), // raw enum int; `readEnum8(map)` resolves it to the name
124
+ });
125
+
126
+ // `exec` resolves to a Node `Stream.Readable`. It is already an
127
+ // `AsyncIterable<Uint8Array>` (chunks are `Buffer`/`Uint8Array`, which
128
+ // `streamRowBatches` normalizes), so pass `stream` straight in:
129
+
130
+ const client = createClient();
131
+
132
+ const { stream } = await client.exec({
133
+ query: "SELECT id, uid, price, status FROM orders FORMAT RowBinary",
134
+ });
135
+
136
+ for await (const rows of streamRowBatches(stream, readOrderRow)) {
137
+ for (const row of rows) console.log(row); // { id, uid, price: [unscaled, scale], status }
138
+ }
139
+
140
+ await client.close();
81
141
  ```
82
142
 
83
143
  ## Why it's worth it
@@ -129,6 +189,13 @@ than shipping a generic, runtime-driven decoder, it emits straight-line code
129
189
  that reads each column in order, so the parser only contains the logic the
130
190
  specific result shape needs.
131
191
 
192
+ **Schema only known at runtime?** `compileRowBinaryWithNamesAndTypes(cursor)`
193
+ reads the `RowBinaryWithNamesAndTypes` header and folds each column type into a
194
+ reader on the fly (type strings parsed by `@clickhouse/datatype-parser`),
195
+ returning a `readRows` driver for the rest of the stream — a generic, no-codegen
196
+ path for dynamic schemas. The specialized codegen above stays the fast path when
197
+ the types are fixed.
198
+
132
199
  ## Correctness on the gotcha-heavy types
133
200
 
134
201
  For a plain `UInt64, String, DateTime` result a strong model already writes fast,
@@ -201,18 +268,22 @@ Measure, don't assume.
201
268
 
202
269
  ## Scope
203
270
 
204
- - **In scope:** `RowBinary`, `RowBinaryWithNames`, and
271
+ - **In scope (reading):** `RowBinary`, `RowBinaryWithNames`, and
205
272
  `RowBinaryWithNamesAndTypes` decoding for Node.js — full-buffer and streaming
206
273
  (chunked) via `advance()`/`NeedMoreData`, `readRows()`, and the async
207
274
  `streamRowBatches()` (with a built-in small-chunk warning and the optional
208
275
  `coalesceChunks()` debounce filter).
209
- - **Planned:** RowBinary **writing / encoding** (the inverse of everything above)
276
+ - **In scope (writing):** the inverse encode path — a `writeX` mirroring every
277
+ `readX`, appending bytes to a `Sink`, plus `writeRows()`. Imported from
278
+ `@clickhouse/rowbinary/writer`. A handful of decode-only paths are not yet
279
+ mirrored: `Dynamic`, `JSON`, the runtime header/compile path, and the columnar
280
+ typed-array path.
210
281
  - **Out of scope (for now):** browsers and Edge runtimes, non-RowBinary formats
211
282
  (JSON / CSV / TSV / Parquet), and big-endian hosts.
212
283
 
213
284
  ## The spirit
214
285
 
215
- A RowBinary parser generator is a narrow thing. But it's built as an instance of
286
+ A RowBinary codec generator is a narrow thing. But it's built as an instance of
216
287
  a broader bet about what libraries become once a capable LLM is part of the
217
288
  toolchain. Three shifts, each already visible in this repo:
218
289
 
@@ -0,0 +1,111 @@
1
+ ---
2
+ name: clickhouse-js-node-rowbinary
3
+ description: >
4
+ Generate TypeScript/JavaScript code that reads/decodes AND writes/encodes
5
+ ClickHouse RowBinary streams for the ClickHouse HTTP server.
6
+ Use this skill whenever a user wants to parse or produce `RowBinary`,
7
+ `RowBinaryWithNames`, or `RowBinaryWithNamesAndTypes`.
8
+ Node.js only, doesn't cover browsers.
9
+ ---
10
+
11
+ # ClickHouse JS RowBinary Codec Generator for Node.js
12
+
13
+ This skill generates both directions of the wire format: **readers** (decode
14
+ bytes → values) and **writers** (encode values → bytes, the mirror). A given
15
+ task normally needs only one side. This file is the shared entry point — the
16
+ format gate plus the principles common to both directions; the per-direction
17
+ decisions, guidance, and the per-type reference tables live in two sibling files.
18
+
19
+ **Pick your side — read only the one you need:**
20
+
21
+ - **Decoding a `RowBinary*` response** from ClickHouse into JS values →
22
+ **[reader.md](reader.md)**. Streaming vs whole-buffer, row-objects vs columnar,
23
+ fixed vs runtime schema, and the per-type reader reference.
24
+ - **Encoding JS values into a `RowBinary` payload** to send to ClickHouse →
25
+ **[writer.md](writer.md)**. The `Sink`/`writeX` building blocks, `writeRows`
26
+ streaming, and the per-type writer reference.
27
+
28
+ The per-type code is real, split by direction under `src/readers/` and
29
+ `src/writers/`.
30
+
31
+ ## First: is RowBinary even the right format?
32
+
33
+ RowBinary exists for throughput, but it is **not automatically the fastest
34
+ path** — match the format to the shape of the data before committing to a
35
+ bespoke parser.
36
+
37
+ **Prefer a `JSON*` format (e.g. `JSONEachRow`) when** the result is mostly
38
+ strings / JSON-like values that you consume wholesale — randomly accessing
39
+ essentially every field, running string/regexp methods on them, treating values
40
+ as text. V8's native `JSON.parse` is heavily optimized C++ and builds JS strings
41
+ and objects faster than a JS-level RowBinary decoder can; pair it with HTTP
42
+ response compression (`gzip` / `zstd`, which crushes JSON's repetitive keys) and
43
+ the wire cost shrinks too.
44
+
45
+ **RowBinary clearly wins when** the result is dominated by:
46
+
47
+ - **Wide numerics** — `Int128`/`Int256`/`UInt128`/`UInt256`,
48
+ `Decimal128`/`Decimal256`.
49
+ - **Binary / fixed-width blobs** — `IPv4`, `IPv6`, `UUID`, `FixedString`.
50
+ - **High-volume fixed-width numeric columns** generally, where each value is a
51
+ single `DataView` read.
52
+
53
+ **Prefer the `Native` format when** columnar load and client-side analytics are
54
+ the main goal (fold/scan/filter columns, feed typed arrays to a Worker or WASM).
55
+ `Native` is column-major, so it loads straight into one typed array per column
56
+ with no transpose.
57
+
58
+ For help choosing and consuming a `JSON*` format (or CSV / TSV) instead, use the
59
+ **`clickhouse-js-node-coding`** skill.
60
+
61
+ ## Core guidance (both directions)
62
+
63
+ These principles apply whether you are generating a reader or a writer; the
64
+ side-specific operational guidance is in [reader.md](reader.md) /
65
+ [writer.md](writer.md).
66
+
67
+ - **Little-endian only.** RowBinary is little-endian; target x86/ARM. Read and
68
+ write every multi-byte number with `DataView` accessors passing a **literal**
69
+ `true` for the `littleEndian` flag.
70
+
71
+ - **Correct first, then optimize.** First emit a correct codec built from the
72
+ plain per-type API. Only after it's correct (and tested) specialize it. Don't
73
+ bake performance assumptions in before correctness.
74
+
75
+ - **Monomorphize generic/composite types.** Emit specialized, inlined code per
76
+ type combination instead of passing functions as arguments where the type is
77
+ known ahead of time.
78
+
79
+ - **Inline the leaf ops.** The per-type `readX`/`writeX` functions are the
80
+ correct, composable reference; the generated codec should INLINE their bodies,
81
+ not call them, so the row loop is straight-line with no per-field indirection
82
+ (and so the fixed-width coalescing can fold the offset arithmetic together).
83
+
84
+ - **Annotate the type per column.** Inlining erases the type structure, so put a
85
+ short comment above each column's encode/decode block naming the ClickHouse
86
+ type it handles.
87
+
88
+ - **Shared scratch is not reentrant.** Some hot methods reuse a module-level
89
+ scratch buffer as a write-then-read pair — correct only because the access is
90
+ fully synchronous. An `async`/`yield` boundary between populating and reading
91
+ it corrupts the value.
92
+
93
+ - **TypeScript by default.** Generate TypeScript code and helpers unless the user
94
+ explicitly asks for plain JavaScript.
95
+
96
+ ## Worked examples
97
+
98
+ Six end-to-end examples with real speedup are catalogued in [EXAMPLES.md](EXAMPLES.md).
99
+
100
+ ## Out of scope
101
+
102
+ - **JSON / CSV / TSV / Parquet parsing** → use `clickhouse-js-node-coding`.
103
+ - **Connection errors, hangs, type mismatches** → use
104
+ `clickhouse-js-node-troubleshooting`.
105
+ - **Browser / Web Worker / Edge** → `@clickhouse/client-web`.
106
+
107
+ ## Still Stuck?
108
+
109
+ - [ClickHouse RowBinary format](https://clickhouse.com/docs/interfaces/formats#rowbinary)
110
+ - [ClickHouse data types](https://clickhouse.com/docs/sql-reference/data-types)
111
+ - [ClickHouse JS client docs](https://clickhouse.com/docs/integrations/javascript)
@@ -1,46 +1,12 @@
1
- ---
2
- name: clickhouse-js-node-rowbinary-parser
3
- description: >
4
- Generate TypeScript/JavaScript code that reads and decodes ClickHouse
5
- RowBinary streams from the ClickHouse HTTP server.
6
- Use this skill whenever a user wants to parse `RowBinary`,
7
- `RowBinaryWithNames`, or `RowBinaryWithNamesAndTypes`.
8
- Node.js only, doesn't cover browsers.
9
- ---
10
-
11
- # ClickHouse JS RowBinary Parser Generator for Node.js
12
-
13
- ## First: is RowBinary even the right format?
14
-
15
- RowBinary exists for throughput, but it is **not automatically the fastest
16
- path** — match the format to the shape of the data before committing to a
17
- bespoke parser.
18
-
19
- **Prefer a `JSON*` format (e.g. `JSONEachRow`) when** the result is mostly
20
- strings / JSON-like values that you consume wholesale — randomly accessing
21
- essentially every field, running string/regexp methods on them, treating values
22
- as text. V8's native `JSON.parse` is heavily optimized C++ and builds JS strings
23
- and objects faster than a JS-level RowBinary decoder can; pair it with HTTP
24
- response compression (`gzip` / `zstd`, which crushes JSON's repetitive keys) and
25
- the wire cost shrinks too.
26
-
27
- **RowBinary clearly wins when** the result is dominated by:
28
-
29
- - **Wide numerics** — `Int128`/`Int256`/`UInt128`/`UInt256`,
30
- `Decimal128`/`Decimal256`.
31
- - **Binary / fixed-width blobs** — `IPv4`, `IPv6`, `UUID`, `FixedString`.
32
- - **High-volume fixed-width numeric columns** generally, where each value is a
33
- single `DataView` read.
34
-
35
- **Prefer the `Native` format when** columnar load and client-side analytics are
36
- the main goal (fold/scan/filter columns, feed typed arrays to a Worker or WASM).
37
- `Native` is column-major, so it loads straight into one typed array per column
38
- with no transpose.
39
-
40
- For help choosing and consuming a `JSON*` format (or CSV / TSV) instead, use the
41
- **`clickhouse-js-node-coding`** skill.
42
-
43
- ## Second: complete buffer, or incremental stream?
1
+ # RowBinary reader (decode) for Node.js
2
+
3
+ Decoding a `RowBinary` / `RowBinaryWithNames` / `RowBinaryWithNamesAndTypes`
4
+ response from ClickHouse into JS values. Read [SKILL.md](SKILL.md) first for the
5
+ format gate ("is RowBinary even the right format?") and the principles that
6
+ apply to **both** directions; this file covers the decisions and the per-type
7
+ reference specific to **reading**. Writing? See [writer.md](writer.md).
8
+
9
+ ## First: complete buffer, or incremental stream?
44
10
 
45
11
  Decide this before writing the reader — it changes the shape of the code and is
46
12
  a real performance fork.
@@ -56,7 +22,7 @@ a real performance fork.
56
22
 
57
23
  The exposed API is streaming by default and requires an optimisation pass.
58
24
 
59
- ## Third: row objects, or columnar (typed arrays)?
25
+ ## Second: row objects, or columnar (typed arrays)?
60
26
 
61
27
  The default output is one object per row (array-of-structs). For a **numeric,
62
28
  fixed-width result that the consumer reads column-wise**, decode instead into one
@@ -77,43 +43,39 @@ bandwidth). Measured in `tests/iot.columnar.bench.ts`; rationale in
77
43
  complete-row count is `(chunk.length / stride) | 0`, and the leftover bytes
78
44
  carry to the next chunk. Yield one typed-array batch per chunk, each owning a
79
45
  fresh transferable `ArrayBuffer` (see `streamSensorColumns` in
80
- `src/columnar.ts`).
46
+ `src/readers/columnar.ts`).
81
47
  - **Stay row-oriented when** downstream code is row-shaped, the row is
82
48
  string-dominated (columnar's win is numeric — a JS string allocates either
83
49
  way), or the schema is nested/heterogeneous (`Array`/`Map`/`Tuple`).
84
50
  - **Hybrid:** store columnar, expose a lazy `rowAt(i)` accessor that builds an
85
51
  object only for rows actually touched (see `iotRowAt` in `src/examples/iot.ts`).
86
52
 
87
- ## Core guidance
88
-
89
- When generating a parser, follow these:
53
+ ## Third: are the column types known ahead of time?
90
54
 
91
- - **Little-endian only.** RowBinary is little-endian; target x86/ARM. Read every
92
- multi-byte number with `DataView` accessors passing a **literal** `true` for
93
- the `littleEndian` flag.
55
+ - **Known (the default).** Generate a straight-line reader specialized to those
56
+ types everything below.
57
+ - **Only at runtime** (the schema varies, or you just want to decode an arbitrary
58
+ `RowBinaryWithNamesAndTypes` stream). Call
59
+ `compileRowBinaryWithNamesAndTypes(cursor)` (`src/readers/rowBinaryWithNamesAndTypes.ts`):
60
+ it reads the header, folds each column type's AST into a `Reader`
61
+ (`astToReader`, `src/readers/compile.ts`; type strings parsed by
62
+ `@clickhouse/datatype-parser`), and returns a `readRows` driver for the rest of
63
+ the stream. Generic and unoptimized (no codegen), so prefer the specialized
64
+ path whenever the types are fixed.
94
65
 
95
- - **Correct first, then optimize.** First emit a correct reader built from the
96
- plain per-type API. Only after it's correct (and tested) specialize it. Don't
97
- bake performance assumptions in before correctness.
66
+ ## Reader guidance
98
67
 
99
- - **Monomorphize generic/composite types.** Emit specialized, inlined code per
100
- type combination instead of passing functions as arguments where the type
101
- is known ahead of time.
68
+ On top of the shared principles in [SKILL.md](SKILL.md), the read path has its own:
102
69
 
103
70
  - **Streaming: throw + restart, not generators.** To signal "need more bytes",
104
71
  a synchronous reader that throws a sentinel (`NeedMoreData`) and restarts the
105
- row beats generators for realistic chunk sizes;
72
+ row beats generators for realistic chunk sizes.
106
73
 
107
74
  - **Keep an eye on chunk sizes.** Partial trailing rows, small chunks are a silent
108
75
  throughput killer: `streamRowBatches` warns once when
109
76
  rows-per-chunk falls too low, and `coalesceChunks(source, { minSize, timeoutMs })`
110
77
  merges small chunks in front of it when the source size isn't yours to raise.
111
78
 
112
- - **Shared scratch is not reentrant.** Some hot methods reuse a module-level
113
- scratch buffer as a write-then-read pair — correct only because reads are fully
114
- synchronous. An `async`/`yield` boundary between populating and reading it
115
- corrupts the value.
116
-
117
79
  - **Hoist the cursor into locals.** Prefer the working buffer and view declared
118
80
  once at the top of the generated reader, and keep the read offset in a **local variable**,
119
81
  operating on it directly instead of re-reading from an object.
@@ -122,69 +84,43 @@ When generating a parser, follow these:
122
84
  neighbouring fixed-width columns has a known combined size, so bounds-check it
123
85
  ONCE.
124
86
 
125
- - **Inline the leaf reads.** The per-type `readX` functions are the correct,
126
- composable reference; the generated parser should INLINE their bodies, not call
127
- them, so the row reader is straight-line with no per-field indirection (and so
128
- the two points above can fold the offset arithmetic together).
129
-
130
- - **Annotate the decoded type per column.** Inlining erases the type structure,
131
- so put a short comment above each column's decode block naming the ClickHouse
132
- type it reads.
133
-
134
87
  - **Pre-allocate small result arrays.** RowBinary gives every array/map its
135
88
  element count up front (the LEB128 prefix), so DEFAULT is to `new Array(n)`.
136
89
  NOTE: for **large** arrays the application will iterate or compute over repeatedly,
137
90
  prefer `[]` + `push` (faster to traverse in V8) — or a typed array (`Float64Array`…)
138
91
  for numeric elements.
139
92
 
140
- - **TypeScript by default.** Generate TypeScript parsers and helpers unless the
141
- user explicitly asks for plain JavaScript.
142
-
143
- ## Type family references
144
-
145
- The readers live as real code under `src/`, split by type family.
146
-
147
- | Result contains (trigger) | Open |
148
- | ---------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
149
- | **Always** — cursor state, `advance()`, `NeedMoreData`, `Reader<T>` | `src/core.ts` |
150
- | LEB128 length/count prefixes for `String`/`Array`/`Map` (`readUVarint`) | `src/varint.ts` |
151
- | `Int8`–`Int256`, `UInt8`–`UInt256` | `src/integers.ts` |
152
- | `Bool` | `src/bool.ts` |
153
- | `Enum8`, `Enum16` | `src/enums.ts` |
154
- | `Float32`, `Float64`, `BFloat16` | `src/floats.ts` |
155
- | `Decimal32/64/128/256`, `Decimal(P, S)` | `src/decimals.ts` |
156
- | `String`, `FixedString(N)` | `src/strings.ts` |
157
- | `UUID` | `src/uuid.ts` |
158
- | `IPv4`, `IPv6` | `src/ip.ts` |
159
- | `Date`, `Date32`, `DateTime`, `DateTime(tz)`, `DateTime64(P[, tz])` | `src/datetime.ts` |
160
- | `Time`, `Time64(P)` | `src/time.ts` |
161
- | `IntervalNanosecond` `IntervalYear` | `src/interval.ts` |
162
- | `Array(T)`, `Map(K, V)`, `Tuple(...)`, `Nullable(T)`, `Variant(...)`, `QBit(...)` | `src/composite.ts` |
163
- | `Point`, `Ring`, `LineString`, `MultiLineString`, `Polygon`, `MultiPolygon`, `Geometry` | `src/geo.ts` |
164
- | `Dynamic` (and `Variant`/`Interval`/`Nested`/`Dynamic` nested inside it) | `src/dynamic.ts` |
165
- | `JSON` | `src/json.ts` |
166
- | The whole result loop rows to EOF (`readRows`) | `src/rows.ts` |
167
- | A chunked HTTP response — `streamRowBatches`, `coalesceChunks` | `src/stream.ts` |
168
- | **Numeric/fixed-width result read column-wise** (aggregate/scan/plot, hand to a Worker/WASM) → decode into typed arrays, not row objects (~4x) | `src/columnar.ts` (`streamSensorColumns` — streaming, yields transferable typed-array batches); `decodeIotColumnar` in `src/examples/iot.ts` is the whole-buffer form |
169
- | `LowCardinality(T)` — transparent, decode as `T` | `src/lowCardinality.ts` |
170
- | `SimpleAggregateFunction(f, T)` — transparent, decode as `T` | `src/simpleAggregateFunction.ts` |
171
- | `Nested(...)` — no wire of its own; `Array(Tuple(...))` | `src/nested.ts` |
172
- | `Nothing` — zero-width, never decoded (only wrapped) | `src/nothing.ts` |
173
- | `AggregateFunction(...)` — opaque state; finalize server-side | `src/aggregateFunction.ts` |
174
-
175
- ## Worked examples
176
-
177
- Six end-to-end examples with real speedup are catalogued in [EXAMPLES.md](EXAMPLES.md).
178
-
179
- ## Out of scope
180
-
181
- - **JSON / CSV / TSV / Parquet parsing** → use `clickhouse-js-node-coding`.
182
- - **Connection errors, hangs, type mismatches** → use
183
- `clickhouse-js-node-troubleshooting`.
184
- - **Browser / Web Worker / Edge** → `@clickhouse/client-web`.
185
-
186
- ## Still Stuck?
187
-
188
- - [ClickHouse RowBinary format](https://clickhouse.com/docs/interfaces/formats#rowbinary)
189
- - [ClickHouse data types](https://clickhouse.com/docs/sql-reference/data-types)
190
- - [ClickHouse JS client docs](https://clickhouse.com/docs/integrations/javascript)
93
+ ## Reader type family references
94
+
95
+ The readers live as real code under `src/readers/`, split by type family.
96
+
97
+ | Result contains (trigger) | Open |
98
+ | ---------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
99
+ | **Always** — cursor state, `advance()`, `NeedMoreData`, `Reader<T>` | `src/readers/core.ts` |
100
+ | LEB128 length/count prefixes for `String`/`Array`/`Map` (`readUVarint`) | `src/readers/varint.ts` |
101
+ | `Int8`–`Int256`, `UInt8`–`UInt256` | `src/readers/integers.ts` |
102
+ | `Bool` | `src/readers/bool.ts` |
103
+ | `Enum8`, `Enum16` (resolve to the value's name; `readInt8`/`readInt16` for the raw int) | `src/readers/enums.ts` |
104
+ | `Float32`, `Float64`, `BFloat16` | `src/readers/floats.ts` |
105
+ | `Decimal32/64/128/256`, `Decimal(P, S)` | `src/readers/decimals.ts` |
106
+ | `String`, `FixedString(N)` | `src/readers/strings.ts` |
107
+ | `UUID` | `src/readers/uuid.ts` |
108
+ | `IPv4`, `IPv6` | `src/readers/ip.ts` |
109
+ | `Date`, `Date32`, `DateTime`, `DateTime(tz)`, `DateTime64(P[, tz])` | `src/readers/datetime.ts` |
110
+ | `Time`, `Time64(P)` | `src/readers/time.ts` |
111
+ | `IntervalNanosecond` `IntervalYear` | `src/readers/interval.ts` |
112
+ | `Array(T)`, `Map(K, V)`, `Tuple(...)`, `Nullable(T)`, `Variant(...)`, `QBit(...)` | `src/readers/composite.ts` |
113
+ | `Point`, `Ring`, `LineString`, `MultiLineString`, `Polygon`, `MultiPolygon`, `Geometry` | `src/readers/geo.ts` |
114
+ | `Dynamic` (and `Variant`/`Interval`/`Nested`/`Dynamic` nested inside it) | `src/readers/dynamic.ts` |
115
+ | `JSON` | `src/readers/json.ts` |
116
+ | The whole result loop rows to EOF (`readRows`) | `src/readers/rows.ts` |
117
+ | A chunked HTTP response `streamRowBatches`, `coalesceChunks` | `src/readers/stream.ts` |
118
+ | The `RowBinaryWithNamesAndTypes` header — column names + type strings (`readHeader`) | `src/readers/header.ts` |
119
+ | Fold one parsed type AST into a `Reader` (`astToReader`) — AST in, reader out | `src/readers/compile.ts` |
120
+ | **Types known only at runtime** compile a whole header into a row reader (`compileRowBinaryWithNamesAndTypes`, `typeStringToReader`) | `src/readers/rowBinaryWithNamesAndTypes.ts` |
121
+ | **Numeric/fixed-width result read column-wise** (aggregate/scan/plot, hand to a Worker/WASM) → decode into typed arrays, not row objects (~4x) | `src/readers/columnar.ts` (`streamSensorColumns` — streaming, yields transferable typed-array batches); `decodeIotColumnar` in `src/examples/iot.ts` is the whole-buffer form |
122
+ | `LowCardinality(T)` — transparent, decode as `T` | `src/readers/lowCardinality.ts` |
123
+ | `SimpleAggregateFunction(f, T)` — transparent, decode as `T` | `src/readers/simpleAggregateFunction.ts` |
124
+ | `Nested(...)` — no wire of its own; `Array(Tuple(...))` | `src/readers/nested.ts` |
125
+ | `Nothing` — zero-width, never decoded (only wrapped) | `src/readers/nothing.ts` |
126
+ | `AggregateFunction(...)` — opaque state; finalize server-side | `src/readers/aggregateFunction.ts` |
@@ -1,8 +1,12 @@
1
- import { readArray, readNullable, readTupleNamed } from "../composite.js";
2
- import { type Reader, advance } from "../core.js";
3
- import { readInt32, readUInt16, readUInt32 } from "../integers.js";
4
- import { readString } from "../strings.js";
5
- import { readUVarint } from "../varint.js";
1
+ import {
2
+ readArray,
3
+ readNullable,
4
+ readTupleNamed,
5
+ } from "../readers/composite.js";
6
+ import { type Reader, advance } from "../readers/core.js";
7
+ import { readInt32, readUInt16, readUInt32 } from "../readers/integers.js";
8
+ import { readString } from "../readers/strings.js";
9
+ import { readUVarint } from "../readers/varint.js";
6
10
 
7
11
  /**
8
12
  * Example: a carts table — nested generics.
@@ -1,8 +1,8 @@
1
- import { type Reader, advance } from "../core.js";
2
- import { readDateTime } from "../datetime.js";
3
- import { readUInt64 } from "../integers.js";
4
- import { readString } from "../strings.js";
5
- import { readUVarint } from "../varint.js";
1
+ import { type Reader, advance } from "../readers/core.js";
2
+ import { readDateTime } from "../readers/datetime.js";
3
+ import { readUInt64 } from "../readers/integers.js";
4
+ import { readString } from "../readers/strings.js";
5
+ import { readUVarint } from "../readers/varint.js";
6
6
 
7
7
  /**
8
8
  * Example: a plain events table — the scalar baseline.
@@ -1,7 +1,7 @@
1
- import { type Reader, advance } from "../core.js";
2
- import { readDateTime64P3 } from "../datetime.js";
3
- import { readFloat32, readFloat64 } from "../floats.js";
4
- import { readUInt8, readUInt32 } from "../integers.js";
1
+ import { type Reader, advance } from "../readers/core.js";
2
+ import { readDateTime64P3 } from "../readers/datetime.js";
3
+ import { readFloat32, readFloat64 } from "../readers/floats.js";
4
+ import { readUInt8, readUInt32 } from "../readers/integers.js";
5
5
 
6
6
  /**
7
7
  * Example: a table of IoT sensor readings — the dense, fixed-width NUMERIC case
@@ -1,10 +1,10 @@
1
- import { type Reader, advance } from "../core.js";
1
+ import { type Reader, advance } from "../readers/core.js";
2
2
  import {
3
3
  type DecimalValue,
4
4
  readDecimal64,
5
5
  readDecimal128,
6
- } from "../decimals.js";
7
- import { readInt64, readUInt128, readUInt256 } from "../integers.js";
6
+ } from "../readers/decimals.js";
7
+ import { readInt64, readUInt128, readUInt256 } from "../readers/integers.js";
8
8
 
9
9
  /**
10
10
  * Example: a financial ledger — the WIDE-NUMERIC case where RowBinary wins on
@@ -1,7 +1,7 @@
1
- import { type Reader, advance } from "../core.js";
2
- import { readDateTime } from "../datetime.js";
3
- import { readString } from "../strings.js";
4
- import { readUVarint } from "../varint.js";
1
+ import { type Reader, advance } from "../readers/core.js";
2
+ import { readDateTime } from "../readers/datetime.js";
3
+ import { readString } from "../readers/strings.js";
4
+ import { readUVarint } from "../readers/varint.js";
5
5
 
6
6
  /**
7
7
  * Example: an application log table — the STRING-HEAVY case where the skill
@@ -4,15 +4,14 @@ import {
4
4
  readNullable,
5
5
  readTupleNamed,
6
6
  readVariant,
7
- } from "../composite.js";
8
- import { type Reader, advance } from "../core.js";
9
- import { readDateTime64P3 } from "../datetime.js";
10
- import { readEnum8 } from "../enums.js";
11
- import { readFloat64 } from "../floats.js";
12
- import { readInt64, readUInt64 } from "../integers.js";
13
- import { readString } from "../strings.js";
14
- import { formatUUID, formatUUIDTable, readUUID } from "../uuid.js";
15
- import { readUVarint } from "../varint.js";
7
+ } from "../readers/composite.js";
8
+ import { type Reader, advance } from "../readers/core.js";
9
+ import { readDateTime64P3 } from "../readers/datetime.js";
10
+ import { readFloat64 } from "../readers/floats.js";
11
+ import { readInt8, readInt64, readUInt64 } from "../readers/integers.js";
12
+ import { readString } from "../readers/strings.js";
13
+ import { formatUUID, formatUUIDTable, readUUID } from "../readers/uuid.js";
14
+ import { readUVarint } from "../readers/varint.js";
16
15
 
17
16
  /**
18
17
  * Example: an observability/events table — the gotcha-heavy one. It packs the
@@ -60,7 +59,7 @@ export type ObsRow = {
60
59
  export const readObsRow: Reader<ObsRow> = (s) => ({
61
60
  id: readUInt64(s),
62
61
  ts: readDateTime64P3(s).toISOString(),
63
- level: readEnum8(s),
62
+ level: readInt8(s),
64
63
  traceId: formatUUID(readUUID(s)),
65
64
  payload: readVariant([readFloat64, readInt64, readString])(s),
66
65
  tags: readMap(readString, readString)(s),