@clickhouse/client 1.23.0-head.dbc2960.1 → 1.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1342 -0
- package/README.md +18 -6
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/version.js.map +1 -1
- package/package.json +7 -6
- package/skills/AGENTS.md +8 -0
- package/skills/clickhouse-js-node-rowbinary/AGENTS.md +44 -0
- package/skills/clickhouse-js-node-rowbinary/CHANGELOG.md +49 -0
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/README.md +78 -14
- package/skills/clickhouse-js-node-rowbinary/SKILL.md +111 -0
- package/skills/{clickhouse-js-node-rowbinary-parser/SKILL.md → clickhouse-js-node-rowbinary/reader.md} +51 -131
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/carts.ts +9 -5
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/events.ts +5 -5
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/iot.ts +4 -4
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/ledger.ts +3 -3
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/logs.ts +4 -4
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/observability.ts +9 -10
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/orders.ts +10 -9
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/profiles.ts +5 -5
- package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/src/examples/telemetry.ts +6 -6
- package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/compile.ts +18 -8
- package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/dynamic.ts +12 -8
- package/skills/clickhouse-js-node-rowbinary/src/readers/enums.ts +40 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/aggregateFunction.ts +18 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/bool.ts +10 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/composite.ts +140 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/core.ts +92 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/datetime.ts +123 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/decimals.ts +51 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/enums.ts +18 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/floats.ts +40 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/geo.ts +125 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/integers.ts +90 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/interval.ts +11 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/ip.ts +121 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/lowCardinality.ts +12 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/nested.ts +17 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/nothing.ts +21 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/rows.ts +144 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/simpleAggregateFunction.ts +12 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/strings.ts +77 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/time.ts +54 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/uuid.ts +60 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/varint.ts +64 -0
- package/skills/clickhouse-js-node-rowbinary/src/writers/writer.ts +101 -0
- package/skills/clickhouse-js-node-rowbinary/writer.md +96 -0
- package/skills/clickhouse-js-node-rowbinary-parser/src/enums.ts +0 -28
- /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/EXAMPLES.md +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/iot-rowbinary-vs-json.md +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/ledger-rowbinary-vs-json.md +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/logs-json-wins.md +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser → clickhouse-js-node-rowbinary}/case-studies/wasm-vs-js.md +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/aggregateFunction.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/bool.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/columnar.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/composite.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/core.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/datetime.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/decimals.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/floats.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/geo.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/header.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/integers.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/interval.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/ip.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/json.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/lowCardinality.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/nested.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/nothing.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/reader.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/rowBinaryWithNamesAndTypes.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/rows.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/simpleAggregateFunction.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/stream.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/strings.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/time.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/uuid.ts +0 -0
- /package/skills/{clickhouse-js-node-rowbinary-parser/src → clickhouse-js-node-rowbinary/src/readers}/varint.ts +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { writeInt64 } from "./integers.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Write an `Interval` — any of `IntervalNanosecond` ... `IntervalYear`: a signed
|
|
5
|
+
* `Int64` count of the unit. The inverse of `readInterval`; the unit lives in the
|
|
6
|
+
* column type, not the bytes, so all 11 interval types share this writer.
|
|
7
|
+
*
|
|
8
|
+
* It IS `writeInt64` — assigned directly rather than wrapped, so there is no extra
|
|
9
|
+
* call frame on the wire-write path.
|
|
10
|
+
*/
|
|
11
|
+
export const writeInterval = writeInt64;
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { Sink, reserve } from "./core.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Write an `IPv4`: the raw 32-bit value (as produced by `readIPv4`) as a 4-byte
|
|
5
|
+
* little-endian `UInt32`. The inverse of `readIPv4`; pair with {@link parseIPv4}
|
|
6
|
+
* to start from a dotted-quad string.
|
|
7
|
+
*/
|
|
8
|
+
export function writeIPv4(sink: Sink, value: number): void {
|
|
9
|
+
sink.view.setUint32(reserve(sink, 4), value, true);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Write an `IPv6`: the raw 16 bytes (network order, as produced by `readIPv6`)
|
|
14
|
+
* copied verbatim. The inverse of `readIPv6`; pair with {@link parseIPv6} to
|
|
15
|
+
* start from a string. Throws unless exactly 16 bytes.
|
|
16
|
+
*/
|
|
17
|
+
export function writeIPv6(sink: Sink, value: Uint8Array): void {
|
|
18
|
+
if (value.length !== 16) {
|
|
19
|
+
throw new RangeError(
|
|
20
|
+
`RowBinary: IPv6 must be 16 bytes, got ${value.length}`,
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
const o = reserve(sink, 16);
|
|
24
|
+
sink.buf.set(value, o);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Parse one dotted-quad field into a 0..255 octet, throwing on anything else. */
|
|
28
|
+
function parseOctet(part: string): number {
|
|
29
|
+
const octet = Number(part);
|
|
30
|
+
if (!Number.isInteger(octet) || octet < 0 || octet > 255) {
|
|
31
|
+
throw new RangeError(
|
|
32
|
+
`RowBinary: invalid IPv4 octet ${JSON.stringify(part)}`,
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
return octet;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Parse a dotted-quad IPv4 string into the raw 32-bit value — the inverse of
|
|
40
|
+
* `formatIPv4`. `"1.2.3.4"` -> `0x01020304`. Pair with {@link writeIPv4}.
|
|
41
|
+
*
|
|
42
|
+
* The four octets are read explicitly rather than in a loop (only ever four);
|
|
43
|
+
* `>>> 0` coerces the assembled value back to an unsigned 32-bit number.
|
|
44
|
+
*/
|
|
45
|
+
export function parseIPv4(text: string): number {
|
|
46
|
+
const parts = text.split(".");
|
|
47
|
+
if (parts.length !== 4) {
|
|
48
|
+
throw new RangeError(
|
|
49
|
+
`RowBinary: invalid IPv4 string ${JSON.stringify(text)}`,
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
const a = parseOctet(parts[0]!);
|
|
53
|
+
const b = parseOctet(parts[1]!);
|
|
54
|
+
const c = parseOctet(parts[2]!);
|
|
55
|
+
const d = parseOctet(parts[3]!);
|
|
56
|
+
return ((a << 24) | (b << 16) | (c << 8) | d) >>> 0;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Parse an IPv6 string into its raw 16 bytes (network order) — the inverse of
|
|
61
|
+
* `formatIPv6`, accepting the canonical forms it emits (`::` zero-run compression
|
|
62
|
+
* and the `::ffff:a.b.c.d` IPv4-mapped form) as well as the fully expanded form.
|
|
63
|
+
* Pair with {@link writeIPv6}.
|
|
64
|
+
*
|
|
65
|
+
* Rejects malformed input (throws): a parse-time helper validates because it
|
|
66
|
+
* must produce exactly 16 well-defined bytes and there is no hot loop or server
|
|
67
|
+
* to fall back on — see the "No defensive validation" exceptions in AGENTS.md.
|
|
68
|
+
*/
|
|
69
|
+
export function parseIPv6(text: string): Buffer {
|
|
70
|
+
const halves = text.split("::");
|
|
71
|
+
if (halves.length > 2) {
|
|
72
|
+
throw new RangeError(
|
|
73
|
+
`RowBinary: invalid IPv6 string ${JSON.stringify(text)}`,
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Expand a colon-separated side into 16-bit groups, splitting a trailing
|
|
78
|
+
// embedded IPv4 (a.b.c.d) into its two groups.
|
|
79
|
+
const toGroups = (side: string): number[] => {
|
|
80
|
+
if (side === "") return [];
|
|
81
|
+
const groups: number[] = [];
|
|
82
|
+
for (const part of side.split(":")) {
|
|
83
|
+
if (part.includes(".")) {
|
|
84
|
+
const v4 = parseIPv4(part);
|
|
85
|
+
groups.push((v4 >>> 16) & 0xffff, v4 & 0xffff);
|
|
86
|
+
} else {
|
|
87
|
+
// 1–4 hex digits only. Parsing strictly rather than `parseInt(...) &
|
|
88
|
+
// 0xffff` rejects malformed groups instead of silently turning them
|
|
89
|
+
// into 0 (`NaN & 0xffff`) or wrapping negatives like "-1" to 0xffff.
|
|
90
|
+
if (!/^[0-9a-fA-F]{1,4}$/.test(part)) {
|
|
91
|
+
throw new RangeError(
|
|
92
|
+
`RowBinary: invalid IPv6 group ${JSON.stringify(part)}`,
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
groups.push(parseInt(part, 16));
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return groups;
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const head = toGroups(halves[0]!);
|
|
102
|
+
const tail = halves.length === 2 ? toGroups(halves[1]!) : [];
|
|
103
|
+
const groups =
|
|
104
|
+
halves.length === 2
|
|
105
|
+
? [...head, ...new Array(8 - head.length - tail.length).fill(0), ...tail]
|
|
106
|
+
: head;
|
|
107
|
+
if (groups.length !== 8) {
|
|
108
|
+
throw new RangeError(
|
|
109
|
+
`RowBinary: invalid IPv6 string ${JSON.stringify(text)}`,
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
// SAFE: allocUnsafe — the loop below writes all 16 bytes (out[0..15] for the
|
|
113
|
+
// 8 groups), and `out` is only allocated here, past every throw, so an
|
|
114
|
+
// uninitialized buffer is never returned.
|
|
115
|
+
const out = Buffer.allocUnsafe(16);
|
|
116
|
+
for (let i = 0; i < 8; i++) {
|
|
117
|
+
out[2 * i] = (groups[i]! >>> 8) & 0xff;
|
|
118
|
+
out[2 * i + 1] = groups[i]! & 0xff;
|
|
119
|
+
}
|
|
120
|
+
return out;
|
|
121
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { type Writer } from "./core.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* `LowCardinality(T)` is TRANSPARENT in RowBinary (no dictionary layer on the
|
|
5
|
+
* wire), so there is nothing extra to encode: use `T`'s own writer directly.
|
|
6
|
+
* This identity combinator mirrors `readLowCardinality` and returns the inner
|
|
7
|
+
* writer unchanged:
|
|
8
|
+
*
|
|
9
|
+
* writeLowCardinality(writeString) === writeString
|
|
10
|
+
*/
|
|
11
|
+
export const writeLowCardinality = <T>(writeValue: Writer<T>): Writer<T> =>
|
|
12
|
+
writeValue;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { type Writer } from "./core.js";
|
|
2
|
+
import { writeArray, writeTupleNamed } from "./composite.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Inverse of `readNested`: `Nested(...)` has no wire format of its own, so for the
|
|
6
|
+
* `flatten_nested = 0` shape it is simply `Array(Tuple(a T1, b T2, …))`. This thin
|
|
7
|
+
* alias composes the existing array + named-tuple writers, mirroring the reader:
|
|
8
|
+
*
|
|
9
|
+
* writeNested({ a: writeUInt8, b: writeString })
|
|
10
|
+
* === writeArray(writeTupleNamed({ a: writeUInt8, b: writeString }))
|
|
11
|
+
*
|
|
12
|
+
* When generating code, prefer inlining (monomorphize the array + tuple) over this
|
|
13
|
+
* generic composition.
|
|
14
|
+
*/
|
|
15
|
+
export const writeNested = <T extends Record<string, unknown>>(writers: {
|
|
16
|
+
[K in keyof T]: Writer<T[K]>;
|
|
17
|
+
}): Writer<readonly T[]> => writeArray(writeTupleNamed(writers));
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { type Writer } from "./core.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Inverse of `readNothing`: a `Nothing` value is NEVER written either. It only
|
|
5
|
+
* appears wrapped, where the wrapper short-circuits before reaching it:
|
|
6
|
+
*
|
|
7
|
+
* writeArray(writeNothing) // only the empty array [] (length 0x00)
|
|
8
|
+
* writeNullable(writeNothing) // only null (lone flag byte 0x01)
|
|
9
|
+
*
|
|
10
|
+
* In both cases the element/inner writer is not invoked. This writer throws if it
|
|
11
|
+
* is ever actually called, which would mean a `Nothing` writer was placed where a
|
|
12
|
+
* real element/inner type was expected.
|
|
13
|
+
*/
|
|
14
|
+
export const writeNothing: Writer<never> = () => {
|
|
15
|
+
throw new Error(
|
|
16
|
+
"RowBinary: Nothing is zero-width and is never encoded — it only appears as " +
|
|
17
|
+
"an empty Array(Nothing) or a NULL Nullable(Nothing), where the inner writer " +
|
|
18
|
+
"is not called. Reaching here means a Nothing writer was wired where a real " +
|
|
19
|
+
"element/inner type was expected.",
|
|
20
|
+
);
|
|
21
|
+
};
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { channel } from "node:diagnostics_channel";
|
|
2
|
+
import { BufferFull, Sink, type Writer } from "./core.js";
|
|
3
|
+
|
|
4
|
+
/** Default sink size when `writeRows` isn't given one — a typical flush chunk. */
|
|
5
|
+
const DEFAULT_BUFFER_SIZE = 64 * 1024;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Payload of {@link FLUSH_CHANNEL_NAME}, published once per buffer `writeRows`
|
|
9
|
+
* flushes — the hook for buffer-capacity-utilization metrics (e.g. an OTEL
|
|
10
|
+
* `used_bytes` / `capacity_bytes` counter pair, divided in the backend for a
|
|
11
|
+
* byte-weighted average fill). Identity (table, `query_id`, …) is deliberately
|
|
12
|
+
* NOT here: carry it in `AsyncLocalStorage` and read it in the subscriber, which
|
|
13
|
+
* runs synchronously on the publisher's call stack, so its async context is live.
|
|
14
|
+
*/
|
|
15
|
+
export interface WriteRowsFlush {
|
|
16
|
+
/** Bytes actually written into the buffer just flushed (`<= capacityBytes`). */
|
|
17
|
+
usedBytes: number;
|
|
18
|
+
/** That buffer's capacity; doubles from `bufferSize` to fit an oversized row. */
|
|
19
|
+
capacityBytes: number;
|
|
20
|
+
/**
|
|
21
|
+
* The configured initial buffer size for this run. `capacityBytes > bufferSize`
|
|
22
|
+
* means the buffer had to grow to fit an oversized row, and `usedBytes /
|
|
23
|
+
* bufferSize` is the overflow magnitude — the signal that `bufferSize` is too
|
|
24
|
+
* small. (Growth is sticky: once grown, every later flush in the run reports the
|
|
25
|
+
* larger `capacityBytes`, so compare against `bufferSize`, not a prior capacity.)
|
|
26
|
+
*/
|
|
27
|
+
bufferSize: number;
|
|
28
|
+
/** Why it flushed: `"full"` mid-stream (next row overflowed) or `"end"` (rows ran out). */
|
|
29
|
+
reason: "full" | "end";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* `node:diagnostics_channel` name {@link writeRows} publishes a {@link WriteRowsFlush}
|
|
34
|
+
* once per flushed buffer. Subscribe to observe buffer-capacity utilization; with no
|
|
35
|
+
* subscriber `writeRows` skips the publish entirely (a single `hasSubscribers`
|
|
36
|
+
* check per buffer, off the per-row path), so it's free when unused.
|
|
37
|
+
*/
|
|
38
|
+
export const FLUSH_CHANNEL_NAME = "@clickhouse/rowbinary:writeRows.flush";
|
|
39
|
+
|
|
40
|
+
/** Created once — `channel()` is idempotent (same name → same object). */
|
|
41
|
+
const flushChannel = channel(FLUSH_CHANNEL_NAME);
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Drive `writeRow` over every row of an iterable into a plain `RowBinary` payload
|
|
45
|
+
* — the encode mirror of `readRows`. Rows are concatenated with NO count, length
|
|
46
|
+
* prefix, or delimiter (just as the reader expects), so `writeRow` must emit
|
|
47
|
+
* EXACTLY one row's bytes. Curried: `writeRows(writeRow)` returns the driver.
|
|
48
|
+
*
|
|
49
|
+
* Returns a GENERATOR rather than a `Writer<readonly T[]>` on purpose. A `Sink`
|
|
50
|
+
* wraps a FIXED-length buffer, so a large (or unbounded) result won't fit in one
|
|
51
|
+
* pass — when a row would overflow, `writeRow` throws {@link BufferFull} from
|
|
52
|
+
* `reserve`, and a plain `(sink, rows) => void` would let that escape mid-row,
|
|
53
|
+
* leaving the caller unable to tell how many rows actually made it in. Instead
|
|
54
|
+
* this owns the sink: it catches `BufferFull`, rewinds to the last COMPLETE row
|
|
55
|
+
* boundary (never a half-written row), `yield`s that batch, and starts a FRESH
|
|
56
|
+
* `Sink` for the rows that didn't fit. Because each flush gets its own buffer,
|
|
57
|
+
* every yielded `Buffer` stays valid after the generator resumes — safe to retain
|
|
58
|
+
* or hand to an async sink, no copy needed. The caller supplies a `bufferSize`,
|
|
59
|
+
* not a sink, and `rows` is an `Iterable<T>` (not a fixed array), so the same
|
|
60
|
+
* driver handles a future infinite/streaming row source unchanged.
|
|
61
|
+
*
|
|
62
|
+
* The driver loop is just a `for...of` — the generator yields each batch of whole
|
|
63
|
+
* rows whenever it stops accumulating (on overflow, or when the rows run out), so
|
|
64
|
+
* the final batch comes through the same channel and there's nothing to flush
|
|
65
|
+
* afterwards:
|
|
66
|
+
*
|
|
67
|
+
* const drive = writeRows(writeRow);
|
|
68
|
+
* for (const chunk of drive(rows, 64 * 1024)) send(chunk);
|
|
69
|
+
*
|
|
70
|
+
* OVERSIZED ROWS: when a single row won't fit even an empty buffer, the buffer is
|
|
71
|
+
* GROWN (doubled) and the row retried — never dropped, never thrown. The first
|
|
72
|
+
* time this happens `writeRows` `console.warn`s ONCE (the buffer may keep doubling
|
|
73
|
+
* after that) so an under-sized `bufferSize` or a pathologically large row doesn't
|
|
74
|
+
* pass unnoticed.
|
|
75
|
+
*
|
|
76
|
+
* METRICS: each flushed buffer is published as a {@link WriteRowsFlush} on the
|
|
77
|
+
* {@link FLUSH_CHANNEL_NAME} diagnostics channel — wire it to a utilization metric.
|
|
78
|
+
* No subscriber means no publish (one `hasSubscribers` check per buffer).
|
|
79
|
+
*
|
|
80
|
+
* When generating code, inline the per-column writes into the loop body,
|
|
81
|
+
* mirroring the reader.
|
|
82
|
+
*/
|
|
83
|
+
export function writeRows<T>(
|
|
84
|
+
writeRow: Writer<T>,
|
|
85
|
+
): (rows: Iterable<T>, bufferSize?: number) => Generator<Buffer, void, void> {
|
|
86
|
+
return function* (rows, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
87
|
+
if (!Number.isSafeInteger(bufferSize) || bufferSize <= 0)
|
|
88
|
+
// Guard the growth loop: a 0 / NaN / negative size makes the first row
|
|
89
|
+
// overflow forever (`size *= 2` never escapes 0/NaN), so fail fast instead.
|
|
90
|
+
throw new RangeError(
|
|
91
|
+
`RowBinary writeRows: bufferSize must be a positive integer, got ${bufferSize}`,
|
|
92
|
+
);
|
|
93
|
+
let size = bufferSize;
|
|
94
|
+
let warned = false;
|
|
95
|
+
let sink = new Sink(Buffer.allocUnsafe(size));
|
|
96
|
+
for (const row of rows) {
|
|
97
|
+
while (true) {
|
|
98
|
+
const committed = sink.pos; // start of this row — the last clean boundary
|
|
99
|
+
try {
|
|
100
|
+
writeRow(sink, row);
|
|
101
|
+
break; // row written cleanly — on to the next
|
|
102
|
+
} catch (e) {
|
|
103
|
+
if (e !== BufferFull) throw e;
|
|
104
|
+
if (committed === 0) {
|
|
105
|
+
// An empty buffer couldn't hold even this one row: double it and
|
|
106
|
+
// retry the SAME row — never drop it. Nothing was written, so the
|
|
107
|
+
// discarded sink had no bytes to flush.
|
|
108
|
+
size *= 2;
|
|
109
|
+
if (!warned) {
|
|
110
|
+
warned = true;
|
|
111
|
+
console.warn(
|
|
112
|
+
`RowBinary writeRows: a row didn't fit bufferSize=${bufferSize}; ` +
|
|
113
|
+
`growing the buffer beyond it (possibly more than once). Raise bufferSize.`,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
sink = new Sink(Buffer.allocUnsafe(size));
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
sink.pos = committed; // drop the partially written row, then flush
|
|
120
|
+
if (flushChannel.hasSubscribers)
|
|
121
|
+
flushChannel.publish({
|
|
122
|
+
usedBytes: committed,
|
|
123
|
+
capacityBytes: size,
|
|
124
|
+
bufferSize,
|
|
125
|
+
reason: "full",
|
|
126
|
+
} satisfies WriteRowsFlush);
|
|
127
|
+
yield sink.bytes();
|
|
128
|
+
sink = new Sink(Buffer.allocUnsafe(size)); // fresh buffer; retry the row
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
if (sink.pos > 0) {
|
|
133
|
+
// the final batch
|
|
134
|
+
if (flushChannel.hasSubscribers)
|
|
135
|
+
flushChannel.publish({
|
|
136
|
+
usedBytes: sink.pos,
|
|
137
|
+
capacityBytes: size,
|
|
138
|
+
bufferSize,
|
|
139
|
+
reason: "end",
|
|
140
|
+
} satisfies WriteRowsFlush);
|
|
141
|
+
yield sink.bytes();
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { type Writer } from "./core.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* `SimpleAggregateFunction(func, T)` is TRANSPARENT in RowBinary — the column
|
|
5
|
+
* holds a finished value of `T` — so encode the inner `T` directly. Identity
|
|
6
|
+
* combinator mirroring `readSimpleAggregateFunction`:
|
|
7
|
+
*
|
|
8
|
+
* writeSimpleAggregateFunction(writeUInt64) === writeUInt64
|
|
9
|
+
*/
|
|
10
|
+
export const writeSimpleAggregateFunction = <T>(
|
|
11
|
+
writeValue: Writer<T>,
|
|
12
|
+
): Writer<T> => writeValue;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { type Writer, Sink, reserve } from "./core.js";
|
|
2
|
+
import { writeUVarint } from "./varint.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Write a `String` from a JS string: a varint byte-length prefix followed by the
|
|
6
|
+
* UTF-8 bytes. The inverse of `readString`.
|
|
7
|
+
*
|
|
8
|
+
* Split from {@link writeStringBytes} (rather than one function branching on the
|
|
9
|
+
* argument type) so each is monomorphic — V8 keeps a single shape per call site
|
|
10
|
+
* instead of going megamorphic on a `string | Uint8Array` parameter.
|
|
11
|
+
*/
|
|
12
|
+
export function writeString(sink: Sink, value: string): void {
|
|
13
|
+
const len = Buffer.byteLength(value, "utf8");
|
|
14
|
+
writeUVarint(sink, len);
|
|
15
|
+
const o = reserve(sink, len);
|
|
16
|
+
sink.buf.write(value, o, len, "utf8");
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Write a `String` from raw bytes: a varint byte-length prefix followed by the
|
|
21
|
+
* bytes verbatim. Use this for ClickHouse `String` columns holding arbitrary
|
|
22
|
+
* (non-UTF-8) bytes, mirroring the reader's note that `String` is not guaranteed
|
|
23
|
+
* UTF-8. The bytes counterpart of {@link writeString} (see the note there on why
|
|
24
|
+
* they are kept as two monomorphic functions).
|
|
25
|
+
*/
|
|
26
|
+
export function writeStringBytes(sink: Sink, value: Uint8Array): void {
|
|
27
|
+
writeUVarint(sink, value.length);
|
|
28
|
+
const o = reserve(sink, value.length);
|
|
29
|
+
sink.buf.set(value, o);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Write a `FixedString(N)` from a string: exactly `size` bytes, UTF-8 encoded and
|
|
34
|
+
* right-padded with NUL bytes (`\x00`) to `size`. Curried: `writeFixedString(N)`
|
|
35
|
+
* returns the writer. The inverse of `readFixedString` — which preserves the
|
|
36
|
+
* trailing NULs, so re-encoding a value it produced is byte-exact.
|
|
37
|
+
*
|
|
38
|
+
* Throws if the UTF-8 encoding exceeds `size` bytes (it would not fit the column).
|
|
39
|
+
*/
|
|
40
|
+
export function writeFixedString(size: number): Writer<string> {
|
|
41
|
+
return (sink, value) => {
|
|
42
|
+
const len = Buffer.byteLength(value, "utf8");
|
|
43
|
+
if (len > size) {
|
|
44
|
+
throw new RangeError(
|
|
45
|
+
`RowBinary: FixedString value is ${len} bytes, exceeds FixedString(${size})`,
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
const o = reserve(sink, size);
|
|
49
|
+
sink.buf.write(value, o, len, "utf8");
|
|
50
|
+
// The sink's buffer may be uninitialized (allocUnsafe); zero the padding.
|
|
51
|
+
// POTENTIAL OPTIMIZATION: drop this fill when the buffer is known to be
|
|
52
|
+
// zero-initialized. Kept by default — relying on a zeroed buffer is a footgun
|
|
53
|
+
// (a pooled/reused sink would leak stale bytes into the column).
|
|
54
|
+
sink.buf.fill(0, o + len, o + size);
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Write a `FixedString(N)` from raw bytes: exactly `size` bytes, the value copied
|
|
60
|
+
* verbatim and right-padded with NUL bytes if shorter. Curried:
|
|
61
|
+
* `writeFixedStringBytes(N)` returns the writer. The inverse of
|
|
62
|
+
* `readFixedStringBytes` (binary columns).
|
|
63
|
+
*
|
|
64
|
+
* Throws if the value is longer than `size`.
|
|
65
|
+
*/
|
|
66
|
+
export function writeFixedStringBytes(size: number): Writer<Uint8Array> {
|
|
67
|
+
return (sink, value) => {
|
|
68
|
+
if (value.length > size) {
|
|
69
|
+
throw new RangeError(
|
|
70
|
+
`RowBinary: FixedString value is ${value.length} bytes, exceeds FixedString(${size})`,
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
const o = reserve(sink, size);
|
|
74
|
+
sink.buf.set(value, o);
|
|
75
|
+
sink.buf.fill(0, o + value.length, o + size);
|
|
76
|
+
};
|
|
77
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { type Writer, Sink } from "./core.js";
|
|
2
|
+
import { type ScaledTicks, type Seconds } from "../readers/time.js";
|
|
3
|
+
import { writeInt32, writeInt64 } from "./integers.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Write a `Time`: 4-byte signed `Int32` seconds-of-day. The inverse of `readTime`;
|
|
7
|
+
* pair with {@link parseTime} to start from an "[-]HH:MM:SS" string.
|
|
8
|
+
*/
|
|
9
|
+
export function writeTime(sink: Sink, value: Seconds): void {
|
|
10
|
+
writeInt32(sink, value);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Write a `Time64(P)`: 8-byte signed `Int64` count of `10^-P`-second ticks, from
|
|
15
|
+
* a {@link ScaledTicks} `[ticks, precision]`. The inverse of `readTime64`; the
|
|
16
|
+
* precision lives in the type, so only `ticks` is written. Pair with
|
|
17
|
+
* {@link parseTime64} to start from a string.
|
|
18
|
+
*/
|
|
19
|
+
export const writeTime64: Writer<ScaledTicks> = (sink, [ticks]) =>
|
|
20
|
+
writeInt64(sink, ticks);
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Parse an "[-]HH:MM:SS" string into signed seconds-of-day — the inverse of
|
|
24
|
+
* `formatTime`. The hour field may exceed two digits (range ±999:59:59).
|
|
25
|
+
*/
|
|
26
|
+
export function parseTime(text: string): Seconds {
|
|
27
|
+
const neg = text.startsWith("-");
|
|
28
|
+
const body = neg ? text.slice(1) : text;
|
|
29
|
+
const [hh, mm, ss] = body.split(":");
|
|
30
|
+
const seconds = Number(hh) * 3600 + Number(mm) * 60 + Number(ss);
|
|
31
|
+
return neg ? -seconds : seconds;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Parse an "[-]HH:MM:SS[.fff]" string into a {@link ScaledTicks} at the given
|
|
36
|
+
* `precision` — the inverse of `formatTime64`. A shorter fraction is right-padded
|
|
37
|
+
* with zeros to `precision`; a longer one is truncated.
|
|
38
|
+
*/
|
|
39
|
+
export function parseTime64(text: string, precision: number): ScaledTicks {
|
|
40
|
+
const neg = text.startsWith("-");
|
|
41
|
+
const body = neg ? text.slice(1) : text;
|
|
42
|
+
const dot = body.indexOf(".");
|
|
43
|
+
const timePart = dot < 0 ? body : body.slice(0, dot);
|
|
44
|
+
const fracPart = dot < 0 ? "" : body.slice(dot + 1);
|
|
45
|
+
const [hh, mm, ss] = timePart.split(":");
|
|
46
|
+
const scale = 10n ** BigInt(precision);
|
|
47
|
+
const wholeSeconds =
|
|
48
|
+
BigInt(Number(hh) * 3600 + Number(mm) * 60 + Number(ss)) * scale;
|
|
49
|
+
const frac = BigInt(
|
|
50
|
+
(fracPart + "0".repeat(precision)).slice(0, precision) || "0",
|
|
51
|
+
);
|
|
52
|
+
const ticks = wholeSeconds + frac;
|
|
53
|
+
return [neg ? -ticks : ticks, precision];
|
|
54
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { Sink, reserve } from "./core.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Parse a canonical `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` UUID string into the
|
|
5
|
+
* raw 16 wire bytes — the inverse of `formatUUID`. ClickHouse stores a UUID as
|
|
6
|
+
* two little-endian `UInt64` halves (high then low), so the 32 hex digits are
|
|
7
|
+
* split at the midpoint and each half written little-endian. Pair with
|
|
8
|
+
* {@link writeUUID}.
|
|
9
|
+
*/
|
|
10
|
+
export function parseUUID(text: string): Buffer {
|
|
11
|
+
const hex = text.replace(/-/g, "");
|
|
12
|
+
if (hex.length !== 32) {
|
|
13
|
+
throw new RangeError(
|
|
14
|
+
`RowBinary: invalid UUID string ${JSON.stringify(text)}`,
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
const v = BigInt("0x" + hex);
|
|
18
|
+
// SAFE: allocUnsafe — the two writeBigUInt64LE calls below overwrite all 16
|
|
19
|
+
// bytes (offsets 0..7 and 8..15), so no uninitialized pool memory survives.
|
|
20
|
+
const b = Buffer.allocUnsafe(16);
|
|
21
|
+
b.writeBigUInt64LE(v >> 64n, 0); // high half -> first 8 bytes
|
|
22
|
+
b.writeBigUInt64LE(v & 0xffffffffffffffffn, 8); // low half -> last 8 bytes
|
|
23
|
+
return b;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Write a `UUID` from its raw 16 wire bytes (as produced by `readUUID` or
|
|
28
|
+
* {@link parseUUID}): copied verbatim. The inverse of `readUUID`. Throws unless
|
|
29
|
+
* exactly 16 bytes are given.
|
|
30
|
+
*/
|
|
31
|
+
export function writeUUID(sink: Sink, value: Uint8Array): void {
|
|
32
|
+
if (value.length !== 16) {
|
|
33
|
+
throw new RangeError(
|
|
34
|
+
`RowBinary: UUID must be 16 bytes, got ${value.length}`,
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
const o = reserve(sink, 16);
|
|
38
|
+
sink.buf.set(value, o);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Write a `UUID` from a single 128-bit `bigint` (`hi << 64 | lo`) — the inverse
|
|
43
|
+
* of `readUUIDBigInt`. The high 64 bits go to the first little-endian `UInt64`
|
|
44
|
+
* half, the low 64 bits to the second.
|
|
45
|
+
*/
|
|
46
|
+
export function writeUUIDBigInt(sink: Sink, value: bigint): void {
|
|
47
|
+
const o = reserve(sink, 16);
|
|
48
|
+
sink.buf.writeBigUInt64LE((value >> 64n) & 0xffffffffffffffffn, o);
|
|
49
|
+
sink.buf.writeBigUInt64LE(value & 0xffffffffffffffffn, o + 8);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Write a `UUID` from its two raw little-endian `UInt64` halves `[hi, lo]` — the
|
|
54
|
+
* inverse of `readUUIDHiLo`, the faithful wire split with no combining work.
|
|
55
|
+
*/
|
|
56
|
+
export function writeUUIDHiLo(sink: Sink, [hi, lo]: [bigint, bigint]): void {
|
|
57
|
+
const o = reserve(sink, 16);
|
|
58
|
+
sink.buf.writeBigUInt64LE(hi, o);
|
|
59
|
+
sink.buf.writeBigUInt64LE(lo, o + 8);
|
|
60
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { Sink, reserve } from "./core.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Write a LEB128 unsigned varint — the encode mirror of `readUVarint` (used for
|
|
5
|
+
* string/array/map lengths).
|
|
6
|
+
*
|
|
7
|
+
* Takes a JS `number`, so it is NOT bigint-friendly: the value MUST be a
|
|
8
|
+
* non-negative integer no larger than `Number.MAX_SAFE_INTEGER` (2^53 - 1). That
|
|
9
|
+
* precondition is NOT checked here — at this level the data is expected to be
|
|
10
|
+
* correct (a length the encoder itself produced), and an out-of-range value is a
|
|
11
|
+
* programming error the server will reject. If you genuinely need lengths beyond
|
|
12
|
+
* 2^53, write a bigint version with a bigint accumulator instead of widening this
|
|
13
|
+
* one.
|
|
14
|
+
*
|
|
15
|
+
* UNROLLED, mirroring `readUVarint`: branch on magnitude so the exact byte count
|
|
16
|
+
* is known up front for a single {@link reserve}, with no length-counting loop.
|
|
17
|
+
* Each byte carries 7 payload bits low-first, with the continuation bit (`+ 0x80`)
|
|
18
|
+
* set while more bits remain. `/` and `%` (never `>>>`/`&`): JS bitwise operators
|
|
19
|
+
* are 32-bit and would corrupt values past bit 31. The overwhelmingly common
|
|
20
|
+
* 1–2 byte case costs one or two compares.
|
|
21
|
+
*/
|
|
22
|
+
export function writeUVarint(sink: Sink, value: number): void {
|
|
23
|
+
if (value < 0x80) {
|
|
24
|
+
sink.buf[reserve(sink, 1)] = value;
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
if (value < 0x4000) {
|
|
28
|
+
const o = reserve(sink, 2);
|
|
29
|
+
sink.buf[o] = (value % 128) + 0x80;
|
|
30
|
+
sink.buf[o + 1] = Math.floor(value / 128);
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
if (value < 0x200000) {
|
|
34
|
+
const o = reserve(sink, 3);
|
|
35
|
+
sink.buf[o] = (value % 128) + 0x80;
|
|
36
|
+
sink.buf[o + 1] = (Math.floor(value / 128) % 128) + 0x80;
|
|
37
|
+
sink.buf[o + 2] = Math.floor(value / 16384);
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
if (value < 0x10000000) {
|
|
41
|
+
const o = reserve(sink, 4);
|
|
42
|
+
sink.buf[o] = (value % 128) + 0x80;
|
|
43
|
+
sink.buf[o + 1] = (Math.floor(value / 128) % 128) + 0x80;
|
|
44
|
+
sink.buf[o + 2] = (Math.floor(value / 16384) % 128) + 0x80;
|
|
45
|
+
sink.buf[o + 3] = Math.floor(value / 2097152);
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
// >= 2^28 — rare for RowBinary lengths. Fall back to a short loop writing into
|
|
49
|
+
// a single span sized by a leading magnitude count (still one reserve()).
|
|
50
|
+
let size = 5;
|
|
51
|
+
for (
|
|
52
|
+
let v = Math.floor(value / 268435456);
|
|
53
|
+
v >= 0x80;
|
|
54
|
+
v = Math.floor(v / 128)
|
|
55
|
+
)
|
|
56
|
+
size++;
|
|
57
|
+
const o = reserve(sink, size);
|
|
58
|
+
let v = value;
|
|
59
|
+
for (let i = 0; i < size - 1; i++) {
|
|
60
|
+
sink.buf[o + i] = (v % 128) + 0x80;
|
|
61
|
+
v = Math.floor(v / 128);
|
|
62
|
+
}
|
|
63
|
+
sink.buf[o + size - 1] = v;
|
|
64
|
+
}
|