@clickhouse/client 1.22.0 → 1.23.0-head.287977a.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/README.md +2 -1
  2. package/dist/client.d.ts +2 -2
  3. package/dist/client.js +11 -4
  4. package/dist/client.js.map +1 -1
  5. package/dist/common/clickhouse_types.d.ts +98 -0
  6. package/dist/common/clickhouse_types.js +30 -0
  7. package/dist/common/clickhouse_types.js.map +1 -0
  8. package/dist/common/client.d.ts +233 -0
  9. package/dist/common/client.js +414 -0
  10. package/dist/common/client.js.map +1 -0
  11. package/dist/common/config.d.ts +234 -0
  12. package/dist/common/config.js +364 -0
  13. package/dist/common/config.js.map +1 -0
  14. package/dist/common/connection.d.ts +124 -0
  15. package/dist/common/connection.js +3 -0
  16. package/dist/common/connection.js.map +1 -0
  17. package/dist/common/data_formatter/format_query_params.d.ts +11 -0
  18. package/dist/common/data_formatter/format_query_params.js +128 -0
  19. package/dist/common/data_formatter/format_query_params.js.map +1 -0
  20. package/dist/common/data_formatter/format_query_settings.d.ts +2 -0
  21. package/dist/common/data_formatter/format_query_settings.js +20 -0
  22. package/dist/common/data_formatter/format_query_settings.js.map +1 -0
  23. package/dist/common/data_formatter/formatter.d.ts +41 -0
  24. package/dist/common/data_formatter/formatter.js +78 -0
  25. package/dist/common/data_formatter/formatter.js.map +1 -0
  26. package/dist/common/data_formatter/index.d.ts +3 -0
  27. package/dist/common/data_formatter/index.js +24 -0
  28. package/dist/common/data_formatter/index.js.map +1 -0
  29. package/dist/common/error/error.d.ts +20 -0
  30. package/dist/common/error/error.js +73 -0
  31. package/dist/common/error/error.js.map +1 -0
  32. package/dist/common/error/index.d.ts +1 -0
  33. package/dist/common/error/index.js +18 -0
  34. package/dist/common/error/index.js.map +1 -0
  35. package/dist/common/index.d.ts +67 -0
  36. package/dist/common/index.js +97 -0
  37. package/dist/common/index.js.map +1 -0
  38. package/dist/common/logger.d.ts +80 -0
  39. package/dist/common/logger.js +154 -0
  40. package/dist/common/logger.js.map +1 -0
  41. package/dist/common/parse/column_types.d.ts +155 -0
  42. package/dist/common/parse/column_types.js +594 -0
  43. package/dist/common/parse/column_types.js.map +1 -0
  44. package/dist/common/parse/index.d.ts +2 -0
  45. package/dist/common/parse/index.js +19 -0
  46. package/dist/common/parse/index.js.map +1 -0
  47. package/dist/common/parse/json_handling.d.ts +19 -0
  48. package/dist/common/parse/json_handling.js +8 -0
  49. package/dist/common/parse/json_handling.js.map +1 -0
  50. package/dist/common/result.d.ts +90 -0
  51. package/dist/common/result.js +3 -0
  52. package/dist/common/result.js.map +1 -0
  53. package/dist/common/settings.d.ts +2007 -0
  54. package/dist/common/settings.js +19 -0
  55. package/dist/common/settings.js.map +1 -0
  56. package/dist/common/tracing.d.ts +146 -0
  57. package/dist/common/tracing.js +76 -0
  58. package/dist/common/tracing.js.map +1 -0
  59. package/dist/common/ts_utils.d.ts +4 -0
  60. package/dist/common/ts_utils.js +3 -0
  61. package/dist/common/ts_utils.js.map +1 -0
  62. package/dist/common/utils/connection.d.ts +21 -0
  63. package/dist/common/utils/connection.js +43 -0
  64. package/dist/common/utils/connection.js.map +1 -0
  65. package/dist/common/utils/index.d.ts +5 -0
  66. package/dist/common/utils/index.js +22 -0
  67. package/dist/common/utils/index.js.map +1 -0
  68. package/dist/common/utils/multipart.d.ts +34 -0
  69. package/dist/common/utils/multipart.js +81 -0
  70. package/dist/common/utils/multipart.js.map +1 -0
  71. package/dist/common/utils/sleep.d.ts +4 -0
  72. package/dist/common/utils/sleep.js +12 -0
  73. package/dist/common/utils/sleep.js.map +1 -0
  74. package/dist/common/utils/stream.d.ts +15 -0
  75. package/dist/common/utils/stream.js +50 -0
  76. package/dist/common/utils/stream.js.map +1 -0
  77. package/dist/common/utils/url.d.ts +20 -0
  78. package/dist/common/utils/url.js +67 -0
  79. package/dist/common/utils/url.js.map +1 -0
  80. package/dist/common/version.d.ts +2 -0
  81. package/dist/common/version.js +4 -0
  82. package/dist/common/version.js.map +1 -0
  83. package/dist/config.d.ts +22 -2
  84. package/dist/config.js +2 -2
  85. package/dist/config.js.map +1 -1
  86. package/dist/connection/compression.d.ts +2 -2
  87. package/dist/connection/compression.js +4 -4
  88. package/dist/connection/compression.js.map +1 -1
  89. package/dist/connection/create_connection.d.ts +1 -1
  90. package/dist/connection/node_base_connection.d.ts +3 -3
  91. package/dist/connection/node_base_connection.js +22 -22
  92. package/dist/connection/node_base_connection.js.map +1 -1
  93. package/dist/connection/node_custom_agent_connection.js +2 -2
  94. package/dist/connection/node_custom_agent_connection.js.map +1 -1
  95. package/dist/connection/node_http_connection.js +2 -2
  96. package/dist/connection/node_http_connection.js.map +1 -1
  97. package/dist/connection/node_https_connection.d.ts +1 -1
  98. package/dist/connection/node_https_connection.js +3 -3
  99. package/dist/connection/node_https_connection.js.map +1 -1
  100. package/dist/connection/socket_pool.d.ts +1 -1
  101. package/dist/connection/socket_pool.js +30 -30
  102. package/dist/connection/socket_pool.js.map +1 -1
  103. package/dist/connection/stream.d.ts +1 -1
  104. package/dist/connection/stream.js +9 -9
  105. package/dist/connection/stream.js.map +1 -1
  106. package/dist/index.d.ts +9 -7
  107. package/dist/index.js +26 -24
  108. package/dist/index.js.map +1 -1
  109. package/dist/result_set.d.ts +1 -1
  110. package/dist/result_set.js +10 -10
  111. package/dist/result_set.js.map +1 -1
  112. package/dist/utils/encoder.d.ts +1 -1
  113. package/dist/utils/encoder.js +5 -5
  114. package/dist/utils/encoder.js.map +1 -1
  115. package/dist/version.d.ts +1 -1
  116. package/dist/version.js +1 -1
  117. package/dist/version.js.map +1 -1
  118. package/package.json +7 -5
  119. package/skills/clickhouse-js-node-rowbinary-parser/EXAMPLES.md +48 -0
  120. package/skills/clickhouse-js-node-rowbinary-parser/README.md +255 -0
  121. package/skills/clickhouse-js-node-rowbinary-parser/SKILL.md +206 -0
  122. package/skills/clickhouse-js-node-rowbinary-parser/case-studies/iot-rowbinary-vs-json.md +83 -0
  123. package/skills/clickhouse-js-node-rowbinary-parser/case-studies/ledger-rowbinary-vs-json.md +103 -0
  124. package/skills/clickhouse-js-node-rowbinary-parser/case-studies/logs-json-wins.md +86 -0
  125. package/skills/clickhouse-js-node-rowbinary-parser/case-studies/wasm-vs-js.md +172 -0
  126. package/skills/clickhouse-js-node-rowbinary-parser/src/aggregateFunction.ts +34 -0
  127. package/skills/clickhouse-js-node-rowbinary-parser/src/bool.ts +10 -0
  128. package/skills/clickhouse-js-node-rowbinary-parser/src/columnar.ts +125 -0
  129. package/skills/clickhouse-js-node-rowbinary-parser/src/compile.ts +318 -0
  130. package/skills/clickhouse-js-node-rowbinary-parser/src/composite.ts +181 -0
  131. package/skills/clickhouse-js-node-rowbinary-parser/src/core.ts +77 -0
  132. package/skills/clickhouse-js-node-rowbinary-parser/src/datetime.ts +113 -0
  133. package/skills/clickhouse-js-node-rowbinary-parser/src/decimals.ts +57 -0
  134. package/skills/clickhouse-js-node-rowbinary-parser/src/dynamic.ts +328 -0
  135. package/skills/clickhouse-js-node-rowbinary-parser/src/enums.ts +28 -0
  136. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/carts.ts +71 -0
  137. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/events.ts +51 -0
  138. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/iot.ts +158 -0
  139. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/ledger.ts +98 -0
  140. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/logs.ts +73 -0
  141. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/observability.ts +142 -0
  142. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/orders.ts +65 -0
  143. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/profiles.ts +60 -0
  144. package/skills/clickhouse-js-node-rowbinary-parser/src/examples/telemetry.ts +102 -0
  145. package/skills/clickhouse-js-node-rowbinary-parser/src/floats.ts +32 -0
  146. package/skills/clickhouse-js-node-rowbinary-parser/src/geo.ts +109 -0
  147. package/skills/clickhouse-js-node-rowbinary-parser/src/header.ts +29 -0
  148. package/skills/clickhouse-js-node-rowbinary-parser/src/integers.ts +95 -0
  149. package/skills/clickhouse-js-node-rowbinary-parser/src/interval.ts +54 -0
  150. package/skills/clickhouse-js-node-rowbinary-parser/src/ip.ts +93 -0
  151. package/skills/clickhouse-js-node-rowbinary-parser/src/json.ts +33 -0
  152. package/skills/clickhouse-js-node-rowbinary-parser/src/lowCardinality.ts +18 -0
  153. package/skills/clickhouse-js-node-rowbinary-parser/src/nested.ts +23 -0
  154. package/skills/clickhouse-js-node-rowbinary-parser/src/nothing.ts +29 -0
  155. package/skills/clickhouse-js-node-rowbinary-parser/src/reader.ts +68 -0
  156. package/skills/clickhouse-js-node-rowbinary-parser/src/rowBinaryWithNamesAndTypes.ts +155 -0
  157. package/skills/clickhouse-js-node-rowbinary-parser/src/rows.ts +58 -0
  158. package/skills/clickhouse-js-node-rowbinary-parser/src/simpleAggregateFunction.ts +20 -0
  159. package/skills/clickhouse-js-node-rowbinary-parser/src/stream.ts +276 -0
  160. package/skills/clickhouse-js-node-rowbinary-parser/src/strings.ts +55 -0
  161. package/skills/clickhouse-js-node-rowbinary-parser/src/time.ts +61 -0
  162. package/skills/clickhouse-js-node-rowbinary-parser/src/uuid.ts +153 -0
  163. package/skills/clickhouse-js-node-rowbinary-parser/src/varint.ts +70 -0
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Barrel re-export of the RowBinary reader, split by type family into the
3
+ * sibling modules. Import from here for everything in one place, or from a
4
+ * specific module (e.g. `./integers.js`, `./strings.js`) to pull in only the
5
+ * sub-parsers a given result actually needs — the latter is what a generated
6
+ * parser should do, copying just the modules its column types require.
7
+ *
8
+ * - core — Cursor, Reader<T>, advance, NeedMoreData
9
+ * - varint — readUVarint
10
+ * - integers — readUInt8..readUInt256, readInt8..readInt256
11
+ * - bool / enums / floats
12
+ * - decimals — DecimalValue, formatDecimal, readDecimal32..256
13
+ * - strings — readString, readFixedString, readFixedStringBytes
14
+ * - uuid — readUUID(+BigInt/HiLo), formatUUID(+Table)
15
+ * - ip — readIPv4/6, formatIPv4/6
16
+ * - datetime / time / interval
17
+ * - composite — readArray/Map/Tuple/TupleNamed/Nullable/Variant/QBit
18
+ * - rows — readRows
19
+ * - geo — Point, readPoint/Ring/LineString/Polygon/MultiLineString/MultiPolygon/Geometry
20
+ * - dynamic — readDynamic, readDynamicType
21
+ * - json — readJSON
22
+ * - stream — streamRowBatches, coalesceChunks
23
+ * - transparent / special wrappers (mostly documentation; see each file):
24
+ * lowCardinality (readLowCardinality), simpleAggregateFunction
25
+ * (readSimpleAggregateFunction), nested (readNested), nothing (readNothing),
26
+ * aggregateFunction (readAggregateFunction)
27
+ *
28
+ * Runtime schema path — compile a reader from the type STRINGS rather than
29
+ * hand-/code-generating one. Use this when the column types are not known until
30
+ * the response arrives (or you just want a generic decoder); for a fixed,
31
+ * known schema the specialized straight-line reader is faster.
32
+ * - header — readHeader: the RowBinaryWithNamesAndTypes preamble (column
33
+ * names + type strings) off the cursor
34
+ * - compile — astToReader: fold one parsed type AST (from
35
+ * `@clickhouse/datatype-parser`) into a value Reader. AST in,
36
+ * reader out — the type-to-combinator mapping, nothing else
37
+ * - rowBinaryWithNamesAndTypes — typeStringToReader (parse a type string +
38
+ * fold) and compileRowBinaryWithNamesAndTypes (read the header,
39
+ * compile every column, return a `readRows` driver for the rest of
40
+ * the stream): the end-to-end runtime entry point
41
+ */
42
+ export * from "./core.js";
43
+ export * from "./varint.js";
44
+ export * from "./integers.js";
45
+ export * from "./bool.js";
46
+ export * from "./enums.js";
47
+ export * from "./floats.js";
48
+ export * from "./decimals.js";
49
+ export * from "./strings.js";
50
+ export * from "./uuid.js";
51
+ export * from "./ip.js";
52
+ export * from "./datetime.js";
53
+ export * from "./time.js";
54
+ export * from "./interval.js";
55
+ export * from "./composite.js";
56
+ export * from "./rows.js";
57
+ export * from "./geo.js";
58
+ export * from "./dynamic.js";
59
+ export * from "./json.js";
60
+ export * from "./stream.js";
61
+ export * from "./lowCardinality.js";
62
+ export * from "./simpleAggregateFunction.js";
63
+ export * from "./nested.js";
64
+ export * from "./nothing.js";
65
+ export * from "./aggregateFunction.js";
66
+ export * from "./header.js";
67
+ export * from "./compile.js";
68
+ export * from "./rowBinaryWithNamesAndTypes.js";
@@ -0,0 +1,155 @@
1
+ /**
2
+ * The `RowBinaryWithNamesAndTypes` entry point: read the header off a cursor,
3
+ * compile each column's type string into a reader, and hand back a driver that
4
+ * decodes the rest of the stream.
5
+ *
6
+ * This ties together the pieces: {@link readHeader} (wire), the parser
7
+ * (`@clickhouse/datatype-parser`), and {@link astToReader} (the AST → reader
8
+ * fold in `compile.ts`), then assembles a named-tuple row reader over the
9
+ * columns and a {@link readRows} driver for the row data.
10
+ */
11
+
12
+ import { parseDataType } from "@clickhouse/datatype-parser";
13
+
14
+ import type { Reader, Cursor } from "./core.js";
15
+ import { readHeader } from "./header.js";
16
+ import { readRows } from "./rows.js";
17
+ import { astToReader, RowBinaryTypeError } from "./compile.js";
18
+
19
+ /** One decoded row, keyed by column name. */
20
+ export type Row = Record<string, unknown>;
21
+
22
+ /**
23
+ * The product of compiling a `RowBinaryWithNamesAndTypes` header: the column
24
+ * metadata, the per-column readers, and — the headline — `readRows`, the
25
+ * {@link Reader} that decodes every remaining row of the stream.
26
+ */
27
+ export interface CompiledStream {
28
+ /** Column names, in stream order (from the header). */
29
+ names: string[];
30
+ /** Column type strings, in stream order (from the header). */
31
+ types: string[];
32
+ /** One folded reader per column, in stream order. */
33
+ columnReaders: Reader<unknown>[];
34
+ /** Reads exactly one row into a `{ [name]: value }` object. */
35
+ readRow: Reader<Row>;
36
+ /**
37
+ * Reads the REST of the stream (all rows after the header) into an array.
38
+ * Streaming-aware via {@link readRows}: on a partial trailing row it rewinds
39
+ * to the last complete row and returns what it has.
40
+ */
41
+ readRows: Reader<Row[]>;
42
+ }
43
+
44
+ /**
45
+ * Parse one ClickHouse type string and fold it into a {@link Reader}. Throws a
46
+ * {@link RowBinaryTypeError} if the parser rejects the string (e.g. the
47
+ * deliberately unsupported `AggregateFunction` / `SimpleAggregateFunction`) —
48
+ * carrying the `typeString` and the parse `position`.
49
+ */
50
+ export function typeStringToReader(typeStr: string): Reader<unknown> {
51
+ const result = parseDataType(typeStr);
52
+ if (!result.ok()) {
53
+ const err = result.error!;
54
+ throw new RowBinaryTypeError(
55
+ `cannot compile type ${JSON.stringify(typeStr)}: ${err.message}`,
56
+ { typeString: typeStr, position: err.position },
57
+ );
58
+ }
59
+ return astToReader(result.ast!);
60
+ }
61
+
62
+ /** Resolves a ClickHouse type string to a reader — `typeStringToReader` or a cache wrapping it. */
63
+ export type TypeReaderResolver = (typeStr: string) => Reader<unknown>;
64
+
65
+ /**
66
+ * Build an LRU-cached {@link typeStringToReader}. The full ClickHouse type
67
+ * STRING is a perfect cache key: two columns of the same type compile to the
68
+ * same reader, and a reader is stateless (it only ever touches the cursor it is
69
+ * handed), so one instance is safe to share across columns and across streams —
70
+ * a cache hit skips the parse + AST fold entirely.
71
+ *
72
+ * Worth it when you decode many `RowBinaryWithNamesAndTypes` responses whose
73
+ * schemas overlap (e.g. the same query run repeatedly): keep one cache and pass
74
+ * it to {@link compileRowBinaryWithNamesAndTypes}, so a recurring type is
75
+ * compiled once rather than once per response. A single response rarely repeats
76
+ * a type across its own columns, so the win is across calls, not within one.
77
+ *
78
+ * Classic Map-based LRU: a `Map` iterates in insertion order, so on a HIT we
79
+ * delete + re-set the entry to move it to the most-recently-used end, and on
80
+ * overflow we evict the oldest key (the first the `Map` yields). `maxSize` caps
81
+ * memory. A parse FAILURE is never cached — {@link typeStringToReader} throws
82
+ * before anything is stored — so fixing a bad type is not shadowed by a cached
83
+ * error.
84
+ */
85
+ export function createTypeReaderCache(maxSize = 256): TypeReaderResolver {
86
+ const cache = new Map<string, Reader<unknown>>();
87
+ return (typeStr) => {
88
+ const cached = cache.get(typeStr);
89
+ if (cached !== undefined) {
90
+ // Touch on hit. A Map iterates in INSERTION order, not usage order — so on
91
+ // its own `keys().next()` would give the oldest-added key, not the
92
+ // least-recently-USED one. Deleting and re-inserting moves this key to the
93
+ // tail, which is what turns insertion order INTO recency order: every
94
+ // access (hit here, or miss below) lands the key at the tail, leaving the
95
+ // head as the genuine least-recently-used entry.
96
+ cache.delete(typeStr);
97
+ cache.set(typeStr, cached);
98
+ return cached;
99
+ }
100
+ const reader = typeStringToReader(typeStr); // may throw — then nothing is cached
101
+ cache.set(typeStr, reader);
102
+ if (cache.size > maxSize) {
103
+ // The head is the least-recently-used key (see touch-on-hit above), so it
104
+ // is the correct one to evict.
105
+ const lru = cache.keys().next().value;
106
+ if (lru !== undefined) cache.delete(lru);
107
+ }
108
+ return reader;
109
+ };
110
+ }
111
+
112
+ /**
113
+ * The headline entry point. Reads the `RowBinaryWithNamesAndTypes` header off
114
+ * `state`, compiles each column type into a combinator reader, and returns the
115
+ * column metadata plus the readers — including `readRows`, the reader for the
116
+ * REST of the stream. After this call the cursor sits at the first row, so:
117
+ *
118
+ * const s = new Cursor(buf);
119
+ * const { names, readRows } = compileRowBinaryWithNamesAndTypes(s);
120
+ * const rows = readRows(s); // decode every remaining row
121
+ *
122
+ * Pass `resolveType` to reuse readers across calls — e.g. a shared
123
+ * {@link createTypeReaderCache}. It defaults to {@link typeStringToReader}
124
+ * (compile every column afresh).
125
+ */
126
+ export function compileRowBinaryWithNamesAndTypes(
127
+ state: Cursor,
128
+ resolveType: TypeReaderResolver = typeStringToReader,
129
+ ): CompiledStream {
130
+ const { names, types } = readHeader(state);
131
+ const columnReaders = types.map((t) => resolveType(t));
132
+
133
+ // Build the row reader POSITIONALLY — by column index, NOT by keying the
134
+ // readers on column name and handing them to `readTupleNamed`. The header is
135
+ // an ordered list and RowBinary has no row delimiter, so every row MUST read
136
+ // exactly these readers, in exactly this order. Keying readers by name first
137
+ // would corrupt the stream on legal-but-awkward headers:
138
+ // - duplicate column names (e.g. two `SELECT 1 AS x, 2 AS x`) collapse to a
139
+ // single entry in a `Record`, so fewer readers run than there are columns;
140
+ // - integer-like names (`0`, `1`, …) are reordered ahead of string keys by
141
+ // `Object.keys()`, so the readers would run out of header order.
142
+ // Either desyncs the cursor and misreads every subsequent row. Reading by
143
+ // index sidesteps both. The row OBJECT is still keyed by name; on a duplicate
144
+ // name the last column with that name wins in the object, but every column is
145
+ // still consumed off the wire in order, so the cursor stays in sync.
146
+ const readRow: Reader<Row> = (s) => {
147
+ const row: Row = {};
148
+ for (let i = 0; i < columnReaders.length; i++) {
149
+ row[names[i]!] = columnReaders[i]!(s);
150
+ }
151
+ return row;
152
+ };
153
+
154
+ return { names, types, columnReaders, readRow, readRows: readRows(readRow) };
155
+ }
@@ -0,0 +1,58 @@
1
+ import { NeedMoreData, type Reader } from "./core.js";
2
+
3
+ /**
4
+ * Drive `readRow` over every row of a plain `RowBinary` result into an array.
5
+ * Curried: `readRows(readRow)` returns a `Reader<T[]>`. Rows are concatenated on
6
+ * the wire with no count, length prefix, or delimiter, so the result is exhausted
7
+ * only when the cursor reaches the buffer end.
8
+ *
9
+ * `readRow` must consume EXACTLY one row's bytes — a byte short or long compounds
10
+ * across rows and the cursor overshoots or never lands on `buf.length`. Returns
11
+ * `[]` for an empty buffer. When generating code, inline the per-column reads
12
+ * into the loop body:
13
+ *
14
+ * function readRowsUser(s) {
15
+ * const out = [];
16
+ * while (s.pos < s.buf.length) {
17
+ * out.push({ id: readUInt64(s), name: readString(s) });
18
+ * }
19
+ * return out;
20
+ * }
21
+ *
22
+ * STREAMING (partial trailing row): a chunk of a still-arriving response may end
23
+ * mid-row. `pos` is committed only AFTER a row reads cleanly, so when a row
24
+ * starves and `readRow` throws {@link NeedMoreData}, this catches it, rewinds
25
+ * `pos` to the last complete row boundary, and returns the rows so far — never a
26
+ * half-built row. The cursor is left at the straddling row, a commit point the
27
+ * driver carries forward:
28
+ *
29
+ * const drive = readRows(readRow);
30
+ * let committed = 0;
31
+ * for (const chunk of chunks) { // chunk = growing prefix
32
+ * const s = new Cursor(chunk);
33
+ * s.pos = committed;
34
+ * emit(drive(s)); // complete rows in this chunk
35
+ * committed = s.pos; // start of the straddling row
36
+ * }
37
+ *
38
+ * On a complete buffer no read starves, so the catch never runs. Errors other
39
+ * than {@link NeedMoreData} are real decode faults and propagate. See also
40
+ * `streamRowBatches`, the async driver built on this.
41
+ */
42
+ export function readRows<T>(readRow: Reader<T>): Reader<T[]> {
43
+ return (state) => {
44
+ const out: T[] = [];
45
+ let committed = state.pos;
46
+ try {
47
+ while (state.pos < state.buf.length) {
48
+ const row = readRow(state);
49
+ committed = state.pos; // row read cleanly — advance the commit point
50
+ out.push(row);
51
+ }
52
+ } catch (e) {
53
+ if (e !== NeedMoreData) throw e;
54
+ state.pos = committed; // drop the partial trailing row; resume next chunk
55
+ }
56
+ return out;
57
+ };
58
+ }
@@ -0,0 +1,20 @@
1
+ import { type Reader } from "./core.js";
2
+
3
+ /**
4
+ * `SimpleAggregateFunction(func, T)` is TRANSPARENT in RowBinary: the column
5
+ * already holds a finished value of the underlying type `T` (the partial
6
+ * aggregate of a "simple" function — sum / min / max / groupArrayArray / … — is
7
+ * just a value of `T`), so it is encoded byte-for-byte the same as `T`. Decode
8
+ * the inner `T` directly.
9
+ *
10
+ * Do NOT confuse it with `AggregateFunction(func, T)`, whose value is an opaque
11
+ * serialized aggregation STATE with a function-specific binary layout — see
12
+ * `./aggregateFunction.js`.
13
+ *
14
+ * Identity combinator, documentation only:
15
+ *
16
+ * readSimpleAggregateFunction(readUInt64) === readUInt64
17
+ */
18
+ export const readSimpleAggregateFunction = <T>(
19
+ readValue: Reader<T>,
20
+ ): Reader<T> => readValue;
@@ -0,0 +1,276 @@
1
+ import { type Reader, Cursor } from "./core.js";
2
+ import { readRows } from "./rows.js";
3
+
4
+ /** Empty buffer reused as the "no carry" sentinel between chunks. */
5
+ const EMPTY_CHUNK = Buffer.alloc(0);
6
+
7
+ /** Stats captured at the moment the small-chunk warning fires. */
8
+ export interface SmallChunkStats {
9
+ /** Chunks consumed so far. */
10
+ chunks: number;
11
+ /** Rows decoded so far. */
12
+ rows: number;
13
+ /** `rows / chunks` — the ratio that tripped the threshold. */
14
+ rowsPerChunk: number;
15
+ }
16
+
17
+ /**
18
+ * Tuning for {@link streamRowBatches}'s small-chunk warning. Pass `false` to
19
+ * disable it, `true` / omit for the defaults, or an object to tune.
20
+ */
21
+ export type WarnOnSmallChunks =
22
+ | boolean
23
+ | {
24
+ /**
25
+ * Warn when the running `rows / chunks` average drops below this. Default
26
+ * `2`: throw + restart re-decodes the partial trailing row on EVERY chunk,
27
+ * so once a chunk barely covers a row or two the re-scan dominates — the
28
+ * regime where `streamingRow.bench.ts` shows throw+restart losing to a lean
29
+ * generator. Keep it low so the warning only fires when chunks are
30
+ * genuinely too small, never on a healthy hundreds-of-rows-per-chunk stream.
31
+ */
32
+ minRowsPerChunk?: number;
33
+ /**
34
+ * Don't evaluate until this many chunks have been seen. Default `16`:
35
+ * lets the average settle and suppresses the warning on small results,
36
+ * where the gotcha doesn't bite (it only matters at megabytes / millions
37
+ * of rows). A stream that ends before this never warns.
38
+ */
39
+ warmupChunks?: number;
40
+ /** Where the warning goes. Default `console.warn`. */
41
+ warn?: (message: string, stats: SmallChunkStats) => void;
42
+ };
43
+
44
+ /** Options for {@link streamRowBatches}. */
45
+ export interface StreamRowBatchesOptions {
46
+ /**
47
+ * Diagnostic that catches a silent throughput killer: chunks so small that the
48
+ * throw+restart streaming strategy spends most of its time re-decoding the
49
+ * partial trailing row instead of making progress. Fires AT MOST ONCE per
50
+ * stream. On by default; see {@link WarnOnSmallChunks} to tune or disable.
51
+ *
52
+ * The fix it points at is usually upstream — raise the HTTP response's read
53
+ * size (Node sets the socket/stream `highWaterMark`; a fetch `Response.body`
54
+ * reader delivers larger chunks than a hand-rolled tiny read) into the
55
+ * tens–hundreds of KB range — or, when chunk size isn't yours to control,
56
+ * compose {@link coalesceChunks} in front to merge small chunks first.
57
+ */
58
+ warnOnSmallChunks?: WarnOnSmallChunks;
59
+ }
60
+
61
+ /**
62
+ * Stream a chunked `RowBinary` response into batches of decoded rows. This is
63
+ * the async front door built on {@link readRows}: feed it the byte chunks of an
64
+ * HTTP response (anything async-iterable — a Node `Readable`, `response.body`,
65
+ * etc.) and a per-row `Reader`, and `for await` the batches.
66
+ *
67
+ * One batch is yielded per incoming chunk — exactly the rows that completed
68
+ * within it — so batch size tracks chunk size, which the caller controls. A
69
+ * chunk that doesn't complete a new row yields nothing; its bytes are carried
70
+ * into the next chunk. Empty batches are never yielded.
71
+ *
72
+ * How it works (the carry-buffer driver):
73
+ * - Join the leftover `carry` from the previous chunk to the new chunk, build a
74
+ * state over the join, and run `readRows`. It decodes whole rows, stops cleanly
75
+ * on the partial trailing row (catching `NeedMoreData`), and leaves `pos` at
76
+ * that row's start.
77
+ * - The unread tail `pos..end` becomes the next `carry` as a `subarray` VIEW,
78
+ * NOT a copy. The joined buffer is owned entirely by this generator — it is
79
+ * never yielded to the caller — so there is no aliasing hazard in keeping a
80
+ * view into it, and we skip a per-chunk copy of the tail. The view is also
81
+ * short-lived: the next chunk's `Buffer.concat` copies these bytes into a
82
+ * fresh buffer, after which the old one is released.
83
+ * - When the stream ends, any non-empty carry means the response was truncated
84
+ * mid-row — a malformed stream — so it throws rather than silently dropping
85
+ * bytes.
86
+ *
87
+ * `readRow` is a `Reader<T>` — write it as `(s) => ({ id: readUInt64(s),
88
+ * name: readString(s) })`. Build any configured/combinator readers ONCE (e.g.
89
+ * `const readRow = readTupleNamed({...})`) and reuse, rather than rebuilding them
90
+ * per chunk.
91
+ *
92
+ * ZERO-COPY NOTE: raw-bytes readers (`readUUID`/`readIPv6`/`readFixedStringBytes`
93
+ * and binary `String`) return views into the current chunk's joined buffer. Those
94
+ * stay valid as long as you hold the row objects, but are NOT views into one
95
+ * stable buffer across batches. If you retain them long-term, copy in `readRow`.
96
+ *
97
+ * BACKPRESSURE: this is a pull stream — the next chunk is only requested when the
98
+ * consumer asks for the next batch, so a slow consumer naturally throttles reading.
99
+ *
100
+ * The per-chunk bookkeeping for the small-chunk warning (two integer adds and a
101
+ * compare) runs once per CHUNK, not per row, so it is off every hot path; the
102
+ * default-on warning is documented in {@link StreamRowBatchesOptions}.
103
+ */
104
+ export async function* streamRowBatches<T>(
105
+ chunks: AsyncIterable<Uint8Array>,
106
+ readRow: Reader<T>,
107
+ options?: StreamRowBatchesOptions,
108
+ ): AsyncGenerator<T[], void, undefined> {
109
+ const drive = readRows(readRow);
110
+ let carry: Buffer<ArrayBufferLike> = EMPTY_CHUNK;
111
+
112
+ // Resolve the warning config once, outside the loop.
113
+ const warnCfg = options?.warnOnSmallChunks;
114
+ const warnEnabled = warnCfg !== false;
115
+ const warnObj = typeof warnCfg === "object" ? warnCfg : undefined;
116
+ const minRowsPerChunk = warnObj?.minRowsPerChunk ?? 2;
117
+ const warmupChunks = warnObj?.warmupChunks ?? 16;
118
+ const warn = warnObj?.warn ?? ((message: string) => console.warn(message));
119
+ let chunkCount = 0;
120
+ let rowCount = 0;
121
+ let warned = false;
122
+
123
+ for await (const chunk of chunks) {
124
+ // Normalize to a Buffer without copying (a Uint8Array shares its ArrayBuffer).
125
+ const incoming = Buffer.isBuffer(chunk)
126
+ ? chunk
127
+ : Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
128
+ const work =
129
+ carry.length === 0 ? incoming : Buffer.concat([carry, incoming]);
130
+
131
+ const state = new Cursor(work);
132
+ const rows = drive(state);
133
+ if (rows.length > 0) yield rows;
134
+
135
+ // Carry the unread tail (the partial trailing row, if any) to the next
136
+ // chunk. A view, not a copy: we own `work` and never expose it, so keeping a
137
+ // subarray into it is safe; the next concat copies these bytes out.
138
+ carry = state.pos < work.length ? work.subarray(state.pos) : EMPTY_CHUNK;
139
+
140
+ if (warnEnabled && !warned) {
141
+ chunkCount++;
142
+ rowCount += rows.length;
143
+ const rowsPerChunk = rowCount / chunkCount;
144
+ if (chunkCount >= warmupChunks && rowsPerChunk < minRowsPerChunk) {
145
+ warned = true;
146
+ warn(
147
+ `RowBinary stream: chunks look too small — ${rowsPerChunk.toFixed(2)} rows/chunk over ${chunkCount} chunks. ` +
148
+ `Streaming throws + restarts the partial trailing row on every chunk, so tiny chunks spend most of their ` +
149
+ `time re-decoding instead of advancing. Increase the upstream read/highWaterMark to tens–hundreds of KB, ` +
150
+ `or compose coalesceChunks() in front of this stream to merge small chunks first.`,
151
+ { chunks: chunkCount, rows: rowCount, rowsPerChunk },
152
+ );
153
+ }
154
+ }
155
+ }
156
+ if (carry.length > 0) {
157
+ throw new Error(
158
+ `RowBinary stream ended mid-row: ${carry.length} trailing byte(s) left undecoded`,
159
+ );
160
+ }
161
+ }
162
+
163
+ /** A timeout result distinct from any `IteratorResult`. */
164
+ const TIMED_OUT = Symbol("coalesceChunks.timeout");
165
+
166
+ /**
167
+ * Coalesce (debounce) a chunk stream so each emitted chunk is at least `minSize`
168
+ * bytes — a filter you compose IN FRONT of {@link streamRowBatches} when the
169
+ * source delivers chunks too small to stream efficiently and you can't enlarge
170
+ * them upstream:
171
+ *
172
+ * streamRowBatches(coalesceChunks(httpChunks, { minSize: 64 * 1024, timeoutMs: 50 }), readRow)
173
+ *
174
+ * WHY: the throw+restart streaming strategy re-decodes the partial trailing row
175
+ * on every chunk boundary, so the smaller the chunks the more time is wasted
176
+ * re-scanning (see `streamingRow.bench.ts`). Merging small chunks up front cuts
177
+ * the number of boundaries — and the backtracking with it.
178
+ *
179
+ * THE TRADE-OFF (latency vs. reallocation vs. backtracking): merging holds bytes
180
+ * back until enough accumulate, so it ADDS up to `timeoutMs` of latency to data
181
+ * that arrives in a trickle, and it COPIES via `Buffer.concat` to join the parts
182
+ * (one extra allocation per emitted chunk). In return the downstream parser
183
+ * backtracks far less. Tune `minSize` to the downstream sweet spot (tens–hundreds
184
+ * of KB) and `timeoutMs` to the latency you can spare.
185
+ *
186
+ * SEMANTICS:
187
+ * - Accumulates incoming chunks until their total reaches `minSize`, then emits
188
+ * the join immediately.
189
+ * - A batch below `minSize` is flushed early when `timeoutMs` elapses from the
190
+ * moment its FIRST byte arrived (the deadline is anchored, not reset per
191
+ * chunk — a steady trickle of tiny chunks can't defer the flush forever).
192
+ * - While nothing is buffered it blocks indefinitely for the next chunk: an idle
193
+ * or finished stream is never charged the timeout.
194
+ * - End of stream flushes whatever remains (possibly below `minSize`); a single
195
+ * already-large-enough chunk passes straight through with no copy.
196
+ *
197
+ * It keeps exactly ONE outstanding pull on the source at a time (never calls
198
+ * `next()` while a prior result is still in flight), reads one chunk ahead so it
199
+ * can race arrival against the timer, and releases the source via `return()` if
200
+ * the consumer abandons it early.
201
+ */
202
+ export async function* coalesceChunks(
203
+ source: AsyncIterable<Uint8Array>,
204
+ { minSize, timeoutMs }: { minSize: number; timeoutMs: number },
205
+ ): AsyncGenerator<Buffer, void, undefined> {
206
+ const it = source[Symbol.asyncIterator]();
207
+ // The single in-flight pull. Read one ahead so we always have a promise to
208
+ // race the timer against; never start a second next() before this resolves.
209
+ let pull = it.next();
210
+ let parts: Buffer[] = [];
211
+ let buffered = 0;
212
+ let deadline = 0; // ms timestamp; armed when the first byte enters an empty batch
213
+
214
+ const asBuffer = (u8: Uint8Array): Buffer =>
215
+ Buffer.isBuffer(u8)
216
+ ? u8
217
+ : Buffer.from(u8.buffer, u8.byteOffset, u8.byteLength);
218
+
219
+ const flush = (): Buffer => {
220
+ // One part: hand it back as-is (no concat, no copy). Many: join them.
221
+ const out = parts.length === 1 ? parts[0]! : Buffer.concat(parts, buffered);
222
+ parts = [];
223
+ buffered = 0;
224
+ return out;
225
+ };
226
+
227
+ const take = (u8: Uint8Array): void => {
228
+ const b = asBuffer(u8);
229
+ parts.push(b);
230
+ buffered += b.length;
231
+ };
232
+
233
+ try {
234
+ while (true) {
235
+ if (buffered === 0) {
236
+ // Nothing buffered: block for the next chunk with no timeout.
237
+ const r = await pull;
238
+ if (r.done) return;
239
+ take(r.value);
240
+ deadline = Date.now() + timeoutMs;
241
+ pull = it.next();
242
+ if (buffered >= minSize) yield flush();
243
+ continue;
244
+ }
245
+
246
+ // Below minSize with bytes in hand: race the next chunk against the time
247
+ // left on this batch's anchored deadline.
248
+ const remaining = deadline - Date.now();
249
+ if (remaining <= 0) {
250
+ yield flush();
251
+ continue;
252
+ }
253
+ let timer: ReturnType<typeof setTimeout> | undefined;
254
+ const timeout = new Promise<typeof TIMED_OUT>((resolve) => {
255
+ timer = setTimeout(() => resolve(TIMED_OUT), remaining);
256
+ });
257
+ const r = await Promise.race([pull, timeout]);
258
+ clearTimeout(timer); // no-op if it already fired; frees the loop otherwise
259
+ if (r === TIMED_OUT) {
260
+ // pull is STILL outstanding — keep it; just flush what we have so far.
261
+ yield flush();
262
+ continue;
263
+ }
264
+ if (r.done) {
265
+ yield flush(); // emit the tail; stream is over
266
+ return;
267
+ }
268
+ take(r.value);
269
+ pull = it.next();
270
+ if (buffered >= minSize) yield flush();
271
+ }
272
+ } finally {
273
+ // Consumer broke out early (break/throw): let the source clean up.
274
+ if (typeof it.return === "function") await it.return();
275
+ }
276
+ }
@@ -0,0 +1,55 @@
1
+ import { type Reader, Cursor, advance } from "./core.js";
2
+ import { readUVarint } from "./varint.js";
3
+
4
+ /**
5
+ * Read a `String`: a varint byte-length prefix followed by that many bytes,
6
+ * decoded as UTF-8.
7
+ *
8
+ * NOTE: ClickHouse `String` is arbitrary bytes, not guaranteed UTF-8. For binary
9
+ * columns, read `state.buf.subarray(start, start + len)` and skip the decode to
10
+ * keep the raw bytes.
11
+ */
12
+ export function readString(state: Cursor): string {
13
+ const len = readUVarint(state);
14
+ const start = advance(state, len);
15
+ return state.buf.toString("utf8", start, start + len);
16
+ }
17
+
18
+ /**
19
+ * Read a `FixedString(N)`: exactly `size` raw bytes, decoded as UTF-8. Curried:
20
+ * `readFixedString(N)` returns the reader.
21
+ *
22
+ * The value is right-padded with NUL bytes to `size`; those trailing `\x00` are
23
+ * part of the stored value and are preserved here. Trim them
24
+ * (`.replace(/\x00+$/, "")`) only if your column holds NUL-terminated text.
25
+ *
26
+ * ClickHouse server returns `FixedString`s in JSON with the trailing NULs,
27
+ * therefore this reader preserves them as well.
28
+ */
29
+ export function readFixedString(size: number): Reader<string> {
30
+ return (state) => {
31
+ const start = advance(state, size);
32
+ return state.buf.toString("utf8", start, start + size);
33
+ };
34
+ }
35
+
36
+ /**
37
+ * Read a `FixedString(N)` as raw bytes (no UTF-8 decode) — for binary columns.
38
+ * Curried: `readFixedStringBytes(N)` returns the reader. Returns a zero-copy
39
+ * view: no allocation, but the slice shares memory with the response, so
40
+ * retaining any one slice pins the entire chunk buffer in memory.
41
+ *
42
+ * SAFE TO TOGGLE — if the bytes outlive the row/response, return an independent
43
+ * copy instead so the chunk can be freed:
44
+ *
45
+ * // return Buffer.from(state.buf.subarray(start, start + size));
46
+ *
47
+ * Make an educated tradeoff: view (default) when consumed immediately, a copy
48
+ * when retained.
49
+ */
50
+ export function readFixedStringBytes(size: number): Reader<Buffer> {
51
+ return (state) => {
52
+ const start = advance(state, size);
53
+ return state.buf.subarray(start, start + size);
54
+ };
55
+ }