@query-farm/vgi-rpc 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/dist/access-log.d.ts +50 -0
  2. package/dist/access-log.d.ts.map +1 -0
  3. package/dist/arrow/impl-arrowjs/index.d.ts +96 -0
  4. package/dist/arrow/impl-arrowjs/index.d.ts.map +1 -0
  5. package/dist/arrow/impl-flechette/index.d.ts +102 -0
  6. package/dist/arrow/impl-flechette/index.d.ts.map +1 -0
  7. package/dist/arrow/impl-flechette/message-meta.d.ts +11 -0
  8. package/dist/arrow/impl-flechette/message-meta.d.ts.map +1 -0
  9. package/dist/arrow/index.d.ts +4 -0
  10. package/dist/arrow/index.d.ts.map +1 -0
  11. package/dist/arrow/predicates.d.ts +44 -0
  12. package/dist/arrow/predicates.d.ts.map +1 -0
  13. package/dist/arrow/types.d.ts +62 -0
  14. package/dist/arrow/types.d.ts.map +1 -0
  15. package/dist/client/capabilities.d.ts +25 -0
  16. package/dist/client/capabilities.d.ts.map +1 -0
  17. package/dist/client/connect.d.ts.map +1 -1
  18. package/dist/client/introspect.d.ts +7 -0
  19. package/dist/client/introspect.d.ts.map +1 -1
  20. package/dist/client/ipc.d.ts +8 -2
  21. package/dist/client/ipc.d.ts.map +1 -1
  22. package/dist/client/pipe.d.ts.map +1 -1
  23. package/dist/client/stream.d.ts +11 -2
  24. package/dist/client/stream.d.ts.map +1 -1
  25. package/dist/client/uploadUrl.d.ts +25 -0
  26. package/dist/client/uploadUrl.d.ts.map +1 -0
  27. package/dist/constants.d.ts +15 -1
  28. package/dist/constants.d.ts.map +1 -1
  29. package/dist/crypto.d.ts +22 -0
  30. package/dist/crypto.d.ts.map +1 -0
  31. package/dist/dispatch/describe.d.ts +10 -6
  32. package/dist/dispatch/describe.d.ts.map +1 -1
  33. package/dist/dispatch/stream.d.ts +2 -2
  34. package/dist/dispatch/stream.d.ts.map +1 -1
  35. package/dist/dispatch/unary.d.ts +2 -2
  36. package/dist/dispatch/unary.d.ts.map +1 -1
  37. package/dist/errors.d.ts +46 -0
  38. package/dist/errors.d.ts.map +1 -1
  39. package/dist/external.d.ts +25 -5
  40. package/dist/external.d.ts.map +1 -1
  41. package/dist/http/bearer.d.ts.map +1 -1
  42. package/dist/http/common.d.ts +42 -7
  43. package/dist/http/common.d.ts.map +1 -1
  44. package/dist/http/dispatch.d.ts +20 -2
  45. package/dist/http/dispatch.d.ts.map +1 -1
  46. package/dist/http/handler.d.ts.map +1 -1
  47. package/dist/http/index.d.ts +1 -0
  48. package/dist/http/index.d.ts.map +1 -1
  49. package/dist/http/mtls.d.ts +2 -1
  50. package/dist/http/mtls.d.ts.map +1 -1
  51. package/dist/http/oauth-pkce.d.ts +141 -0
  52. package/dist/http/oauth-pkce.d.ts.map +1 -0
  53. package/dist/http/pages.d.ts +3 -0
  54. package/dist/http/pages.d.ts.map +1 -1
  55. package/dist/http/sticky.d.ts +124 -0
  56. package/dist/http/sticky.d.ts.map +1 -0
  57. package/dist/http/token.d.ts +38 -12
  58. package/dist/http/token.d.ts.map +1 -1
  59. package/dist/http/types.d.ts +68 -5
  60. package/dist/http/types.d.ts.map +1 -1
  61. package/dist/index.d.ts +6 -4
  62. package/dist/index.d.ts.map +1 -1
  63. package/dist/index.js +1275 -3507
  64. package/dist/index.js.map +19 -37
  65. package/dist/launcher/hash.d.ts +22 -0
  66. package/dist/launcher/hash.d.ts.map +1 -0
  67. package/dist/launcher/index.d.ts +23 -0
  68. package/dist/launcher/index.d.ts.map +1 -0
  69. package/dist/launcher/launch.d.ts +27 -0
  70. package/dist/launcher/launch.d.ts.map +1 -0
  71. package/dist/launcher/lock.d.ts +19 -0
  72. package/dist/launcher/lock.d.ts.map +1 -0
  73. package/dist/launcher/serve-unix.d.ts +54 -0
  74. package/dist/launcher/serve-unix.d.ts.map +1 -0
  75. package/dist/launcher/state.d.ts +59 -0
  76. package/dist/launcher/state.d.ts.map +1 -0
  77. package/dist/otel.d.ts.map +1 -1
  78. package/dist/protocol.d.ts +16 -2
  79. package/dist/protocol.d.ts.map +1 -1
  80. package/dist/schema.d.ts +45 -18
  81. package/dist/schema.d.ts.map +1 -1
  82. package/dist/server.d.ts +23 -2
  83. package/dist/server.d.ts.map +1 -1
  84. package/dist/types.d.ts +216 -12
  85. package/dist/types.d.ts.map +1 -1
  86. package/dist/util/gzip.d.ts +10 -0
  87. package/dist/util/gzip.d.ts.map +1 -0
  88. package/dist/util/schema.d.ts +3 -15
  89. package/dist/util/schema.d.ts.map +1 -1
  90. package/dist/util/web-crypto.d.ts +22 -0
  91. package/dist/util/web-crypto.d.ts.map +1 -0
  92. package/dist/util/zstd.d.ts +26 -3
  93. package/dist/util/zstd.d.ts.map +1 -1
  94. package/dist/wire/opaque.d.ts +11 -0
  95. package/dist/wire/opaque.d.ts.map +1 -0
  96. package/dist/wire/reader.d.ts +5 -5
  97. package/dist/wire/reader.d.ts.map +1 -1
  98. package/dist/wire/request.d.ts +11 -3
  99. package/dist/wire/request.d.ts.map +1 -1
  100. package/dist/wire/response.d.ts +6 -6
  101. package/dist/wire/response.d.ts.map +1 -1
  102. package/dist/wire/writer.d.ts +49 -39
  103. package/dist/wire/writer.d.ts.map +1 -1
  104. package/package.json +24 -10
  105. package/src/access-log.ts +195 -0
  106. package/src/arrow/impl-arrowjs/index.ts +433 -0
  107. package/src/arrow/impl-flechette/index.ts +414 -0
  108. package/src/arrow/impl-flechette/message-meta.ts +174 -0
  109. package/src/arrow/index.ts +89 -0
  110. package/src/arrow/predicates.ts +56 -0
  111. package/src/arrow/types.ts +73 -0
  112. package/src/client/capabilities.ts +84 -0
  113. package/src/client/connect.ts +103 -26
  114. package/src/client/introspect.ts +60 -38
  115. package/src/client/ipc.ts +37 -27
  116. package/src/client/pipe.ts +12 -9
  117. package/src/client/stream.ts +34 -19
  118. package/src/client/uploadUrl.ts +169 -0
  119. package/src/constants.ts +18 -1
  120. package/src/crypto.ts +95 -0
  121. package/src/dispatch/describe.ts +146 -107
  122. package/src/dispatch/stream.ts +53 -24
  123. package/src/dispatch/unary.ts +5 -4
  124. package/src/errors.ts +76 -0
  125. package/src/external.ts +43 -29
  126. package/src/http/bearer.ts +2 -5
  127. package/src/http/common.ts +90 -23
  128. package/src/http/dispatch.ts +373 -46
  129. package/src/http/handler.ts +794 -68
  130. package/src/http/index.ts +1 -0
  131. package/src/http/mtls.ts +18 -3
  132. package/src/http/oauth-pkce.ts +1035 -0
  133. package/src/http/pages.ts +30 -15
  134. package/src/http/sticky.ts +429 -0
  135. package/src/http/token.ts +165 -75
  136. package/src/http/types.ts +69 -5
  137. package/src/index.ts +40 -1
  138. package/src/launcher/hash.ts +104 -0
  139. package/src/launcher/index.ts +35 -0
  140. package/src/launcher/launch.ts +284 -0
  141. package/src/launcher/lock.ts +171 -0
  142. package/src/launcher/serve-unix.ts +385 -0
  143. package/src/launcher/state.ts +245 -0
  144. package/src/otel.ts +39 -33
  145. package/src/protocol.ts +27 -3
  146. package/src/schema.ts +107 -56
  147. package/src/server.ts +196 -20
  148. package/src/types.ts +322 -18
  149. package/src/util/gzip.ts +63 -0
  150. package/src/util/schema.ts +4 -22
  151. package/src/util/web-crypto.ts +98 -0
  152. package/src/util/zstd.ts +133 -14
  153. package/src/wire/opaque.ts +37 -0
  154. package/src/wire/reader.ts +5 -4
  155. package/src/wire/request.ts +67 -8
  156. package/src/wire/response.ts +51 -85
  157. package/src/wire/writer.ts +165 -69
  158. package/dist/util/conform.d.ts +0 -18
  159. package/dist/util/conform.d.ts.map +0 -1
  160. package/src/util/conform.ts +0 -94
package/src/util/zstd.ts CHANGED
@@ -4,40 +4,159 @@
4
4
  /**
5
5
  * Cross-runtime zstd compression/decompression.
6
6
  *
7
- * Uses Bun.zstd* when running on Bun, otherwise falls back to node:zlib
8
- * (available on Node.js 22.15+ and Deno 2.6.9+).
7
+ * Decompression order of preference: Bun.zstd node:zlib zstd (Node 22.15+,
8
+ * Deno 2.6.9+) → fzstd pure-JS fallback. The fzstd fallback exists so
9
+ * Cloudflare workerd — which has no native zstd — can still decode
10
+ * `Content-Encoding: zstd` request bodies (the DuckDB VGI extension always
11
+ * sends them). fzstd is decompression-only, so compression on workerd still
12
+ * throws.
9
13
  */
10
14
 
11
- import * as zlib from "node:zlib";
15
+ import { decompress as fzstdDecompress } from "fzstd";
12
16
 
17
+ // Resolve node:zlib via indirect-string require so esbuild/wrangler can't
18
+ // trace it statically. On workerd we want the fzstd path, not a node:zlib
19
+ // import that wouldn't have zstd anyway.
20
+ const _NODE_ZLIB_MOD = "node:zlib";
13
21
  const isBun = typeof globalThis.Bun !== "undefined";
22
+ function _loadZlibOrNull(): any | null {
23
+ const req: any = (import.meta as any).require ?? (globalThis as any).require ?? null;
24
+ if (!req) return null;
25
+ try {
26
+ return req(_NODE_ZLIB_MOD);
27
+ } catch {
28
+ return null;
29
+ }
30
+ }
31
+
32
+ /** Return true when the current runtime can produce zstd-compressed output.
33
+ *
34
+ * Bun has `Bun.zstdCompressSync`; Node ≥22.15 / Deno ≥2.6.9 expose it via
35
+ * `node:zlib`. Other runtimes (workerd, older Node) have no encoder. The
36
+ * fzstd fallback is decompress-only so it doesn't count.
37
+ */
38
+ export function isZstdCompressAvailable(): boolean {
39
+ if (isBun) return true;
40
+ const zlib = _loadZlibOrNull();
41
+ return typeof zlib?.zstdCompressSync === "function";
42
+ }
14
43
 
15
44
  /** Compress data with zstd at the given level (1-22). */
16
- export function zstdCompress(data: Uint8Array, level: number): Uint8Array<ArrayBuffer> {
45
+ export async function zstdCompress(data: Uint8Array, level: number): Promise<Uint8Array<ArrayBuffer>> {
17
46
  if (isBun) {
18
47
  return new Uint8Array(Bun.zstdCompressSync(data, { level }));
19
48
  }
20
- const fn = (zlib as any).zstdCompressSync;
49
+ const zlib = _loadZlibOrNull();
50
+ const fn = zlib?.zstdCompressSync;
21
51
  if (typeof fn !== "function") {
22
- throw new Error("zstd is not available in this runtime. " + "Requires Bun, Node.js >= 22.15, or Deno >= 2.6.9.");
52
+ throw new Error(
53
+ "zstd compression is not available in this runtime. " +
54
+ "Requires Bun or Node.js >= 22.15 / Deno >= 2.6.9. " +
55
+ "(workerd has no native zstd encoder; fzstd is decompress-only.)",
56
+ );
23
57
  }
24
58
  return new Uint8Array(
25
59
  fn(data, {
26
60
  params: {
27
- [(zlib.constants as any).ZSTD_c_compressionLevel]: level,
61
+ [zlib.constants.ZSTD_c_compressionLevel]: level,
28
62
  },
29
63
  }),
30
64
  );
31
65
  }
32
66
 
33
- /** Decompress zstd-compressed data. */
34
- export function zstdDecompress(data: Uint8Array): Uint8Array<ArrayBuffer> {
67
+ /**
68
+ * Decompress zstd-compressed data, optionally bounding the output size.
69
+ *
70
+ * Zstd frames carry the decompressed size in the header and decompressors
71
+ * trust it eagerly: a ~3 KB compressed body claiming 100 MB output would
72
+ * allocate 100 MB. When `maxOutputSize` is supplied, this helper:
73
+ *
74
+ * 1. Reads `Frame_Content_Size` from the frame header. If declared and
75
+ * above the cap, refuses *before* allocation with a clear error.
76
+ * 2. Decompresses, then asserts the actual output size is also under the
77
+ * cap (covers frames whose size is not in the header — a streaming
78
+ * cap would be tighter, but neither Bun.zstdDecompressSync nor
79
+ * node:zlib's sync API exposes one, so we use the post-check).
80
+ *
81
+ * Mirrors the Python server-side fix in `_decompress_body` and the
82
+ * client-side fix in `external_fetch.fetch_url`.
83
+ */
84
+ export async function zstdDecompress(data: Uint8Array, maxOutputSize?: number): Promise<Uint8Array<ArrayBuffer>> {
85
+ if (maxOutputSize != null) {
86
+ const declared = readZstdFrameContentSize(data);
87
+ if (declared !== null && declared > maxOutputSize) {
88
+ throw new Error(`zstd decompressed size (${declared}) would exceed cap (${maxOutputSize})`);
89
+ }
90
+ }
91
+
92
+ let out: Uint8Array<ArrayBuffer>;
35
93
  if (isBun) {
36
- return new Uint8Array(Bun.zstdDecompressSync(data));
94
+ out = new Uint8Array(Bun.zstdDecompressSync(data));
95
+ } else {
96
+ const zlib = _loadZlibOrNull();
97
+ const fn = zlib?.zstdDecompressSync;
98
+ if (typeof fn === "function") {
99
+ out = new Uint8Array(fn(data));
100
+ } else {
101
+ // workerd path: no native zstd, fall back to the pure-JS decoder.
102
+ // fzstd is decompress-only and synchronous; cap-checking already ran
103
+ // above against the frame header, but pure-JS decode of large inputs
104
+ // is slow — keep the upstream maxOutputSize tight.
105
+ //
106
+ // CRITICAL: copy into a freshly-allocated ArrayBuffer so byteOffset is
107
+ // 0. fzstd internally returns subarray views with arbitrary byteOffset
108
+ // (often not 8-aligned), and downstream Arrow IPC readers create
109
+ // BigInt64Array views relative to the buffer's byteOffset — those
110
+ // throw `start offset of BigInt64Array should be a multiple of 8` if
111
+ // the underlying offset isn't 8-aligned.
112
+ const decoded = fzstdDecompress(data);
113
+ out = new Uint8Array(decoded.byteLength);
114
+ out.set(decoded);
115
+ }
37
116
  }
38
- const fn = (zlib as any).zstdDecompressSync;
39
- if (typeof fn !== "function") {
40
- throw new Error("zstd is not available in this runtime. " + "Requires Bun, Node.js >= 22.15, or Deno >= 2.6.9.");
117
+
118
+ if (maxOutputSize != null && out.byteLength > maxOutputSize) {
119
+ throw new Error(`zstd decompressed size (${out.byteLength}) exceeds cap (${maxOutputSize})`);
120
+ }
121
+ return out;
122
+ }
123
+
124
+ /**
125
+ * Parse `Frame_Content_Size` from a zstd frame header.
126
+ *
127
+ * Returns the declared decompressed size, or `null` if the frame header
128
+ * does not include it (frames may omit it for streaming compression) or
129
+ * the input is too short / not a valid zstd frame magic.
130
+ *
131
+ * Frame format (RFC 8478): magic(4) | FHD(1) | window_desc(0|1) |
132
+ * dict_id(0|1|2|4) | frame_content_size(0|1|2|4|8). FCS_size depends on
133
+ * FCS_field_size (FHD bits 6-7) and Single_Segment_flag (FHD bit 5).
134
+ */
135
+ function readZstdFrameContentSize(data: Uint8Array): number | null {
136
+ if (data.length < 6) return null;
137
+ // Magic: 0xFD2FB528 little-endian.
138
+ if (data[0] !== 0x28 || data[1] !== 0xb5 || data[2] !== 0x2f || data[3] !== 0xfd) {
139
+ return null;
140
+ }
141
+ const fhd = data[4];
142
+ const fcsFieldSize = (fhd >> 6) & 0x3;
143
+ const singleSegment = ((fhd >> 5) & 0x1) === 1;
144
+ const dictIdFlag = fhd & 0x3;
145
+ // Per spec: FCS_size = 0 → 0 unless Single_Segment_flag is set, then 1.
146
+ const fcsSize = fcsFieldSize === 0 ? (singleSegment ? 1 : 0) : fcsFieldSize === 1 ? 2 : fcsFieldSize === 2 ? 4 : 8;
147
+ if (fcsSize === 0) return null;
148
+
149
+ const windowDescSize = singleSegment ? 0 : 1;
150
+ const dictIdSize = dictIdFlag === 0 ? 0 : dictIdFlag === 1 ? 1 : dictIdFlag === 2 ? 2 : 4;
151
+ const fcsOffset = 5 + windowDescSize + dictIdSize;
152
+ if (data.length < fcsOffset + fcsSize) return null;
153
+
154
+ let fcs = 0n;
155
+ for (let i = 0; i < fcsSize; i++) {
156
+ fcs |= BigInt(data[fcsOffset + i]) << BigInt(i * 8);
41
157
  }
42
- return new Uint8Array(fn(data));
158
+ // FCS_field_size == 1 (size 2) carries an offset of 256.
159
+ if (fcsSize === 2) fcs += 256n;
160
+ if (fcs > BigInt(Number.MAX_SAFE_INTEGER)) return Number.MAX_SAFE_INTEGER;
161
+ return Number(fcs);
43
162
  }
@@ -0,0 +1,37 @@
1
+ // © Copyright 2025-2026, Query.Farm LLC - https://query.farm
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ import {
5
+ isDate,
6
+ isDecimal,
7
+ isDictionary,
8
+ isDuration,
9
+ isFixedSizeBinary,
10
+ isLargeBinary,
11
+ isLargeUtf8,
12
+ isTime,
13
+ isTimestamp,
14
+ type VgiDataType,
15
+ } from "../arrow/index.js";
16
+
17
+ /**
18
+ * Arrow types whose `.get(0)` / `vectorFromArray` round-trips are unreliable
19
+ * in arrow-js. For these we extract and re-emit the underlying `Data` object
20
+ * directly (passthrough), like we already do for Map_.
21
+ *
22
+ * Covers Date/Time/Timestamp/Duration/Decimal/LargeUtf8/LargeBinary/
23
+ * FixedSizeBinary/Dictionary.
24
+ */
25
+ export function isOpaquePassthroughType(type: VgiDataType): boolean {
26
+ return (
27
+ isDate(type) ||
28
+ isTime(type) ||
29
+ isTimestamp(type) ||
30
+ isDuration(type) ||
31
+ isDecimal(type) ||
32
+ isLargeUtf8(type) ||
33
+ isLargeBinary(type) ||
34
+ isFixedSizeBinary(type) ||
35
+ isDictionary(type)
36
+ );
37
+ }
@@ -2,10 +2,11 @@
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  import { type RecordBatch, RecordBatchReader, type Schema } from "@query-farm/apache-arrow";
5
+ import type { VgiBatch, VgiSchema } from "../arrow/index.js";
5
6
 
6
7
  export interface StreamMessage {
7
- schema: Schema;
8
- batches: RecordBatch[];
8
+ schema: VgiSchema;
9
+ batches: VgiBatch[];
9
10
  }
10
11
 
11
12
  /**
@@ -68,7 +69,7 @@ export class IpcStreamReader {
68
69
  * Use readNextBatch() to read batches one at a time.
69
70
  * Returns null on EOF.
70
71
  */
71
- async openNextStream(): Promise<Schema | null> {
72
+ async openNextStream(): Promise<VgiSchema | null> {
72
73
  if (this.initialized) {
73
74
  await this.reader.reset().open();
74
75
  if (this.reader.closed) {
@@ -88,7 +89,7 @@ export class IpcStreamReader {
88
89
  * reading from the underlying byte source. This prevents the Arrow-JS
89
90
  * reader from consuming bytes that belong to the next IPC stream.
90
91
  */
91
- async readNextBatch(): Promise<RecordBatch | null> {
92
+ async readNextBatch(): Promise<VgiBatch | null> {
92
93
  if (this.streamEnded) return null;
93
94
  const result = await this.reader.next();
94
95
  if (result.done) {
@@ -1,15 +1,16 @@
1
1
  // © Copyright 2025-2026, Query.Farm LLC - https://query.farm
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
 
4
- import { DataType, type RecordBatch, type Schema } from "@query-farm/apache-arrow";
4
+ import { backend, isMap, type VgiBatch, type VgiSchema } from "../arrow/index.js";
5
5
  import { REQUEST_ID_KEY, REQUEST_VERSION, REQUEST_VERSION_KEY, RPC_METHOD_KEY } from "../constants.js";
6
6
  import { RpcError, VersionError } from "../errors.js";
7
+ import { isOpaquePassthroughType } from "./opaque.js";
7
8
 
8
9
  export interface ParsedRequest {
9
10
  methodName: string;
10
11
  requestVersion: string;
11
12
  requestId: string | null;
12
- schema: Schema;
13
+ schema: VgiSchema;
13
14
  params: Record<string, any>;
14
15
  rawMetadata: Map<string, string>;
15
16
  }
@@ -18,7 +19,7 @@ export interface ParsedRequest {
18
19
  * Parse a request from a RecordBatch with metadata.
19
20
  * Extracts method name, version, and params from the batch.
20
21
  */
21
- export function parseRequest(schema: Schema, batch: RecordBatch): ParsedRequest {
22
+ export function parseRequest(schema: VgiSchema, batch: VgiBatch): ParsedRequest {
22
23
  const metadata: Map<string, string> = batch.metadata ?? new Map();
23
24
 
24
25
  const methodName = metadata.get(RPC_METHOD_KEY);
@@ -59,16 +60,54 @@ export function parseRequest(schema: Schema, batch: RecordBatch): ParsedRequest
59
60
  );
60
61
  }
61
62
 
63
+ // Map_ + Date/Time/Timestamp/Duration/Decimal/LargeUtf8/LargeBinary/
64
+ // FixedSizeBinary/Dictionary all need passthrough on arrow-js because
65
+ // its `.get(0)` round-trip is unreliable for those types. On flechette
66
+ // those same types extract cleanly via `col.get(0)`, and `col.data[0]`
67
+ // is a Batch (not a Data) so the arrow-js passthrough trick doesn't
68
+ // apply. The backend advertises whether passthrough is needed.
69
+ const useOpaquePassthrough = backend.opaquePassthrough;
62
70
  for (let i = 0; i < schema.fields.length; i++) {
63
71
  const field = schema.fields[i];
64
- // Map_ columns have a broken .get() in arrow-js — pass through raw Data
65
- if (DataType.isMap(field.type)) {
66
- params[field.name] = batch.getChildAt(i)!.data[0];
72
+ if (useOpaquePassthrough && (isMap(field.type) || isOpaquePassthroughType(field.type))) {
73
+ const col = batch.getChildAt(i)!;
74
+ params[field.name] = (col as any).data?.[0] ?? col.get(0);
67
75
  continue;
68
76
  }
69
77
  let value = batch.getChildAt(i)?.get(0);
70
- // Convert BigInt to Number when safe
71
- if (typeof value === "bigint") {
78
+ // Normalize arrow-js DecimalBigNum wrappers to primitive BigInt.
79
+ //
80
+ // TODO: remove once the stdio transport reads through a facade-aware
81
+ // reader. The stdio reader is arrow-js-coupled (the facade exposes no
82
+ // streaming reader), so under the flechette facade we still receive
83
+ // arrow-js batches; an opaque Decimal column then yields a
84
+ // `DecimalBigNum` (a Uint32Array subclass whose `.toString()` is the
85
+ // numeric value). Downstream construction goes through the flechette
86
+ // facade and expects a primitive — without this the encoder treats the
87
+ // BigNum as a Number and loses precision.
88
+ //
89
+ // Detect structurally via `instanceof Uint32Array` rather than by
90
+ // constructor name (which minifies away): DecimalBigNum is the only
91
+ // opaque type whose `.get(0)` returns a Uint32Array, so this also
92
+ // excludes binary `Uint8Array` values from being mis-parsed as BigInt.
93
+ if (value instanceof Uint32Array && isOpaquePassthroughType(field.type)) {
94
+ // BigInt(decimalBigNum) triggers arrow-js's
95
+ // Symbol.toPrimitive('number') path which throws for values
96
+ // outside the safe-integer range. Go through `.toString()`
97
+ // instead — it handles arbitrary precision.
98
+ try {
99
+ value = BigInt((value as unknown as { toString(): string }).toString());
100
+ } catch {
101
+ // leave as-is on unexpected shape
102
+ }
103
+ }
104
+ // Convert BigInt to Number when safe — but NOT for types whose
105
+ // BigInt-encoded value is type-scaled (Decimal: unscaled integer;
106
+ // Date32: ms-since-epoch; Timestamp/Time/Duration: native-unit
107
+ // ticks). Re-encoding a Number where a BigInt is expected would
108
+ // make the builder apply a `*scale` multiplication (Decimal) or
109
+ // `* 10^unit` (Time/Timestamp), corrupting the value.
110
+ if (typeof value === "bigint" && !isOpaquePassthroughType(field.type)) {
72
111
  if (value >= BigInt(Number.MIN_SAFE_INTEGER) && value <= BigInt(Number.MAX_SAFE_INTEGER)) {
73
112
  value = Number(value);
74
113
  }
@@ -85,3 +124,23 @@ export function parseRequest(schema: Schema, batch: RecordBatch): ParsedRequest
85
124
  rawMetadata: metadata,
86
125
  };
87
126
  }
127
+
128
+ /**
129
+ * Fill in `defaults` for any params that arrived as null/undefined.
130
+ * The slim DESCRIBE_VERSION 4 wire format no longer carries defaults to the
131
+ * client, so default substitution must happen server-side: the client sends
132
+ * a null in any column it didn't supply, and dispatch swaps in the registered
133
+ * default before invoking the handler.
134
+ */
135
+ export function applyDefaults(
136
+ params: Record<string, any>,
137
+ defaults: Record<string, any> | undefined,
138
+ ): Record<string, any> {
139
+ if (!defaults) return params;
140
+ for (const key of Object.keys(defaults)) {
141
+ if (params[key] == null) {
142
+ params[key] = defaults[key];
143
+ }
144
+ }
145
+ return params;
146
+ }
@@ -2,32 +2,36 @@
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  import {
5
- Data,
6
- DataType,
7
- type Field,
8
- makeData,
9
- RecordBatch,
10
- type Schema,
11
- Struct,
12
- vectorFromArray,
13
- } from "@query-farm/apache-arrow";
14
- import { LOG_EXTRA_KEY, LOG_LEVEL_KEY, LOG_MESSAGE_KEY, REQUEST_ID_KEY, SERVER_ID_KEY } from "../constants.js";
5
+ emptyBatchWithMetadata,
6
+ isInt,
7
+ singleRowBatchWithMetadata,
8
+ type VgiBatch,
9
+ type VgiSchema,
10
+ } from "../arrow/index.js";
11
+ import {
12
+ ERROR_KIND_KEY,
13
+ LOG_EXTRA_KEY,
14
+ LOG_LEVEL_KEY,
15
+ LOG_MESSAGE_KEY,
16
+ REQUEST_ID_KEY,
17
+ SERVER_ID_KEY,
18
+ } from "../constants.js";
15
19
 
16
20
  /**
17
21
  * Coerce values for Int64 schema fields from Number to BigInt.
18
22
  * Handles both single values and arrays. Returns a new record with coerced values.
19
23
  */
20
- export function coerceInt64(schema: Schema, values: Record<string, any>): Record<string, any> {
24
+ export function coerceInt64(schema: VgiSchema, values: Record<string, any>): Record<string, any> {
21
25
  const result: Record<string, any> = { ...values };
22
- for (const field of schema.fields) {
23
- const val = result[field.name];
26
+ for (const f of schema.fields) {
27
+ const val = result[f.name];
24
28
  if (val === undefined) continue;
25
- if (!DataType.isInt(field.type) || (field.type as any).bitWidth !== 64) continue;
29
+ if (!isInt(f.type) || (f.type as any).bitWidth !== 64) continue;
26
30
 
27
31
  if (Array.isArray(val)) {
28
- result[field.name] = val.map((v: any) => (typeof v === "number" ? BigInt(v) : v));
32
+ result[f.name] = val.map((v: any) => (typeof v === "number" ? BigInt(v) : v));
29
33
  } else if (typeof val === "number") {
30
- result[field.name] = BigInt(val);
34
+ result[f.name] = BigInt(val);
31
35
  }
32
36
  }
33
37
  return result;
@@ -38,11 +42,11 @@ export function coerceInt64(schema: Schema, values: Record<string, any>): Record
38
42
  * For unary methods, `values` maps field names to single values.
39
43
  */
40
44
  export function buildResultBatch(
41
- schema: Schema,
45
+ schema: VgiSchema,
42
46
  values: Record<string, any>,
43
47
  serverId: string,
44
48
  requestId: string | null,
45
- ): RecordBatch {
49
+ ): VgiBatch {
46
50
  const metadata = new Map<string, string>();
47
51
  metadata.set(SERVER_ID_KEY, serverId);
48
52
  if (requestId !== null) {
@@ -54,49 +58,48 @@ export function buildResultBatch(
54
58
  }
55
59
 
56
60
  // Validate required fields
57
- for (const field of schema.fields) {
58
- if (values[field.name] === undefined && !field.nullable) {
61
+ for (const f of schema.fields) {
62
+ if (values[f.name] === undefined && !f.nullable) {
59
63
  const got = Object.keys(values);
60
- throw new TypeError(`Handler result missing required field '${field.name}'. Got keys: [${got.join(", ")}]`);
64
+ throw new TypeError(`Handler result missing required field '${f.name}'. Got keys: [${got.join(", ")}]`);
61
65
  }
62
66
  }
63
67
 
64
68
  const coerced = coerceInt64(schema, values);
65
-
66
- const children = schema.fields.map((f: Field) => {
67
- const val = coerced[f.name];
68
- // Raw Data passthrough for Map_ types (whose .get() is broken in arrow-js)
69
- if (val instanceof Data) {
70
- return val;
71
- }
72
- const arr = vectorFromArray([val], f.type);
73
- return arr.data[0];
74
- });
75
-
76
- const structType = new Struct(schema.fields);
77
- const data = makeData({
78
- type: structType,
79
- length: 1,
80
- children,
81
- nullCount: 0,
82
- });
83
-
84
- return new RecordBatch(schema, data, metadata);
69
+ return singleRowBatchWithMetadata(schema, coerced, metadata);
85
70
  }
86
71
 
87
72
  /**
88
73
  * Build a 0-row error batch with EXCEPTION metadata matching Python's Message.from_exception().
89
74
  */
90
- export function buildErrorBatch(schema: Schema, error: Error, serverId: string, requestId: string | null): RecordBatch {
75
+ export function buildErrorBatch(schema: VgiSchema, error: Error, serverId: string, requestId: string | null): VgiBatch {
91
76
  const metadata = new Map<string, string>();
92
77
  metadata.set(LOG_LEVEL_KEY, "EXCEPTION");
93
- metadata.set(LOG_MESSAGE_KEY, `${error.constructor.name}: ${error.message}`);
78
+ // Prefer the standard `error.name` property (which user classes can set
79
+ // via `this.name = "Foo"` even after a bundler renames the class) over
80
+ // `constructor.name`, which is fragile under minification.
81
+ const exceptionType = typeof error.name === "string" && error.name !== "Error" ? error.name : error.constructor.name;
82
+ metadata.set(LOG_MESSAGE_KEY, `${exceptionType}: ${error.message}`);
83
+
84
+ // Hoist `errorKind` (typed-exception marker) into the EXCEPTION batch
85
+ // metadata as a top-level `vgi_rpc.error_kind` field so clients can
86
+ // branch on the kind without parsing the log_extra JSON blob. Mirrors
87
+ // Python's `Message.from_exception()` + `add_to_metadata()` hoisting.
88
+ const errorKind =
89
+ (error as { errorKind?: unknown }).errorKind ??
90
+ ((error.constructor as { errorKind?: unknown }).errorKind as unknown);
91
+ if (typeof errorKind === "string" && errorKind.length > 0) {
92
+ metadata.set(ERROR_KIND_KEY, errorKind);
93
+ }
94
94
 
95
95
  const extra: Record<string, any> = {
96
- exception_type: error.constructor.name,
96
+ exception_type: exceptionType,
97
97
  exception_message: error.message,
98
98
  traceback: error.stack ?? "",
99
99
  };
100
+ if (typeof errorKind === "string" && errorKind.length > 0) {
101
+ extra.error_kind = errorKind;
102
+ }
100
103
  metadata.set(LOG_EXTRA_KEY, JSON.stringify(extra));
101
104
  metadata.set(SERVER_ID_KEY, serverId);
102
105
  if (requestId !== null) {
@@ -110,13 +113,13 @@ export function buildErrorBatch(schema: Schema, error: Error, serverId: string,
110
113
  * Build a 0-row log batch.
111
114
  */
112
115
  export function buildLogBatch(
113
- schema: Schema,
116
+ schema: VgiSchema,
114
117
  level: string,
115
118
  message: string,
116
119
  extra?: Record<string, any>,
117
120
  serverId?: string,
118
121
  requestId?: string | null,
119
- ): RecordBatch {
122
+ ): VgiBatch {
120
123
  const metadata = new Map<string, string>();
121
124
  metadata.set(LOG_LEVEL_KEY, level);
122
125
  metadata.set(LOG_MESSAGE_KEY, message);
@@ -133,47 +136,10 @@ export function buildLogBatch(
133
136
  return buildEmptyBatch(schema, metadata);
134
137
  }
135
138
 
136
- /**
137
- * Recursively create empty (0-row) Data for any Arrow type,
138
- * including complex types (Struct, List, FixedSizeList, Map).
139
- */
140
- function makeEmptyData(type: DataType): Data {
141
- if (DataType.isStruct(type)) {
142
- const children = type.children.map((f: Field) => makeEmptyData(f.type));
143
- return makeData({ type, length: 0, children, nullCount: 0 });
144
- }
145
- if (DataType.isList(type)) {
146
- const childData = makeEmptyData(type.children[0].type);
147
- return makeData({ type, length: 0, children: [childData], nullCount: 0, valueOffsets: new Int32Array([0]) } as any);
148
- }
149
- if (DataType.isFixedSizeList(type)) {
150
- const childData = makeEmptyData(type.children[0].type);
151
- return makeData({ type, length: 0, child: childData, nullCount: 0 } as any);
152
- }
153
- if (DataType.isMap(type)) {
154
- const entryType = type.children[0]?.type;
155
- const entryData = entryType
156
- ? makeEmptyData(entryType)
157
- : makeData({ type: new Struct([]), length: 0, children: [], nullCount: 0 });
158
- return makeData({ type, length: 0, children: [entryData], nullCount: 0, valueOffsets: new Int32Array([0]) } as any);
159
- }
160
- return makeData({ type, length: 0, nullCount: 0 });
161
- }
162
-
163
139
  /**
164
140
  * Build a 0-row batch from a schema with metadata.
165
141
  * Used for error/log batches.
166
142
  */
167
- export function buildEmptyBatch(schema: Schema, metadata?: Map<string, string>): RecordBatch {
168
- const children = schema.fields.map((f: Field) => makeEmptyData(f.type));
169
-
170
- const structType = new Struct(schema.fields);
171
- const data = makeData({
172
- type: structType,
173
- length: 0,
174
- children,
175
- nullCount: 0,
176
- });
177
-
178
- return new RecordBatch(schema, data, metadata);
143
+ export function buildEmptyBatch(schema: VgiSchema, metadata?: Map<string, string>): VgiBatch {
144
+ return emptyBatchWithMetadata(schema, metadata);
179
145
  }