@query-farm/vgi-rpc 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/dist/access-log.d.ts +50 -0
  2. package/dist/access-log.d.ts.map +1 -0
  3. package/dist/arrow/impl-arrowjs/index.d.ts +96 -0
  4. package/dist/arrow/impl-arrowjs/index.d.ts.map +1 -0
  5. package/dist/arrow/impl-flechette/index.d.ts +102 -0
  6. package/dist/arrow/impl-flechette/index.d.ts.map +1 -0
  7. package/dist/arrow/impl-flechette/message-meta.d.ts +11 -0
  8. package/dist/arrow/impl-flechette/message-meta.d.ts.map +1 -0
  9. package/dist/arrow/index.d.ts +4 -0
  10. package/dist/arrow/index.d.ts.map +1 -0
  11. package/dist/arrow/predicates.d.ts +44 -0
  12. package/dist/arrow/predicates.d.ts.map +1 -0
  13. package/dist/arrow/types.d.ts +62 -0
  14. package/dist/arrow/types.d.ts.map +1 -0
  15. package/dist/client/capabilities.d.ts +25 -0
  16. package/dist/client/capabilities.d.ts.map +1 -0
  17. package/dist/client/connect.d.ts.map +1 -1
  18. package/dist/client/introspect.d.ts +7 -0
  19. package/dist/client/introspect.d.ts.map +1 -1
  20. package/dist/client/ipc.d.ts +8 -2
  21. package/dist/client/ipc.d.ts.map +1 -1
  22. package/dist/client/pipe.d.ts.map +1 -1
  23. package/dist/client/stream.d.ts +11 -2
  24. package/dist/client/stream.d.ts.map +1 -1
  25. package/dist/client/uploadUrl.d.ts +25 -0
  26. package/dist/client/uploadUrl.d.ts.map +1 -0
  27. package/dist/constants.d.ts +15 -1
  28. package/dist/constants.d.ts.map +1 -1
  29. package/dist/crypto.d.ts +22 -0
  30. package/dist/crypto.d.ts.map +1 -0
  31. package/dist/dispatch/describe.d.ts +10 -6
  32. package/dist/dispatch/describe.d.ts.map +1 -1
  33. package/dist/dispatch/stream.d.ts +2 -2
  34. package/dist/dispatch/stream.d.ts.map +1 -1
  35. package/dist/dispatch/unary.d.ts +2 -2
  36. package/dist/dispatch/unary.d.ts.map +1 -1
  37. package/dist/errors.d.ts +46 -0
  38. package/dist/errors.d.ts.map +1 -1
  39. package/dist/external.d.ts +25 -5
  40. package/dist/external.d.ts.map +1 -1
  41. package/dist/http/bearer.d.ts.map +1 -1
  42. package/dist/http/common.d.ts +42 -7
  43. package/dist/http/common.d.ts.map +1 -1
  44. package/dist/http/dispatch.d.ts +20 -2
  45. package/dist/http/dispatch.d.ts.map +1 -1
  46. package/dist/http/handler.d.ts.map +1 -1
  47. package/dist/http/index.d.ts +1 -0
  48. package/dist/http/index.d.ts.map +1 -1
  49. package/dist/http/mtls.d.ts +2 -1
  50. package/dist/http/mtls.d.ts.map +1 -1
  51. package/dist/http/oauth-pkce.d.ts +141 -0
  52. package/dist/http/oauth-pkce.d.ts.map +1 -0
  53. package/dist/http/pages.d.ts +3 -0
  54. package/dist/http/pages.d.ts.map +1 -1
  55. package/dist/http/sticky.d.ts +124 -0
  56. package/dist/http/sticky.d.ts.map +1 -0
  57. package/dist/http/token.d.ts +38 -12
  58. package/dist/http/token.d.ts.map +1 -1
  59. package/dist/http/types.d.ts +68 -5
  60. package/dist/http/types.d.ts.map +1 -1
  61. package/dist/index.d.ts +6 -4
  62. package/dist/index.d.ts.map +1 -1
  63. package/dist/index.js +1275 -3507
  64. package/dist/index.js.map +19 -37
  65. package/dist/launcher/hash.d.ts +22 -0
  66. package/dist/launcher/hash.d.ts.map +1 -0
  67. package/dist/launcher/index.d.ts +23 -0
  68. package/dist/launcher/index.d.ts.map +1 -0
  69. package/dist/launcher/launch.d.ts +27 -0
  70. package/dist/launcher/launch.d.ts.map +1 -0
  71. package/dist/launcher/lock.d.ts +19 -0
  72. package/dist/launcher/lock.d.ts.map +1 -0
  73. package/dist/launcher/serve-unix.d.ts +54 -0
  74. package/dist/launcher/serve-unix.d.ts.map +1 -0
  75. package/dist/launcher/state.d.ts +59 -0
  76. package/dist/launcher/state.d.ts.map +1 -0
  77. package/dist/otel.d.ts.map +1 -1
  78. package/dist/protocol.d.ts +16 -2
  79. package/dist/protocol.d.ts.map +1 -1
  80. package/dist/schema.d.ts +45 -18
  81. package/dist/schema.d.ts.map +1 -1
  82. package/dist/server.d.ts +23 -2
  83. package/dist/server.d.ts.map +1 -1
  84. package/dist/types.d.ts +216 -12
  85. package/dist/types.d.ts.map +1 -1
  86. package/dist/util/gzip.d.ts +10 -0
  87. package/dist/util/gzip.d.ts.map +1 -0
  88. package/dist/util/schema.d.ts +3 -15
  89. package/dist/util/schema.d.ts.map +1 -1
  90. package/dist/util/web-crypto.d.ts +22 -0
  91. package/dist/util/web-crypto.d.ts.map +1 -0
  92. package/dist/util/zstd.d.ts +26 -3
  93. package/dist/util/zstd.d.ts.map +1 -1
  94. package/dist/wire/opaque.d.ts +11 -0
  95. package/dist/wire/opaque.d.ts.map +1 -0
  96. package/dist/wire/reader.d.ts +5 -5
  97. package/dist/wire/reader.d.ts.map +1 -1
  98. package/dist/wire/request.d.ts +11 -3
  99. package/dist/wire/request.d.ts.map +1 -1
  100. package/dist/wire/response.d.ts +6 -6
  101. package/dist/wire/response.d.ts.map +1 -1
  102. package/dist/wire/writer.d.ts +49 -39
  103. package/dist/wire/writer.d.ts.map +1 -1
  104. package/package.json +24 -10
  105. package/src/access-log.ts +195 -0
  106. package/src/arrow/impl-arrowjs/index.ts +433 -0
  107. package/src/arrow/impl-flechette/index.ts +414 -0
  108. package/src/arrow/impl-flechette/message-meta.ts +174 -0
  109. package/src/arrow/index.ts +89 -0
  110. package/src/arrow/predicates.ts +56 -0
  111. package/src/arrow/types.ts +73 -0
  112. package/src/client/capabilities.ts +84 -0
  113. package/src/client/connect.ts +103 -26
  114. package/src/client/introspect.ts +60 -38
  115. package/src/client/ipc.ts +37 -27
  116. package/src/client/pipe.ts +12 -9
  117. package/src/client/stream.ts +34 -19
  118. package/src/client/uploadUrl.ts +169 -0
  119. package/src/constants.ts +18 -1
  120. package/src/crypto.ts +95 -0
  121. package/src/dispatch/describe.ts +146 -107
  122. package/src/dispatch/stream.ts +53 -24
  123. package/src/dispatch/unary.ts +5 -4
  124. package/src/errors.ts +76 -0
  125. package/src/external.ts +43 -29
  126. package/src/http/bearer.ts +2 -5
  127. package/src/http/common.ts +90 -23
  128. package/src/http/dispatch.ts +373 -46
  129. package/src/http/handler.ts +794 -68
  130. package/src/http/index.ts +1 -0
  131. package/src/http/mtls.ts +18 -3
  132. package/src/http/oauth-pkce.ts +1035 -0
  133. package/src/http/pages.ts +30 -15
  134. package/src/http/sticky.ts +429 -0
  135. package/src/http/token.ts +165 -75
  136. package/src/http/types.ts +69 -5
  137. package/src/index.ts +40 -1
  138. package/src/launcher/hash.ts +104 -0
  139. package/src/launcher/index.ts +35 -0
  140. package/src/launcher/launch.ts +284 -0
  141. package/src/launcher/lock.ts +171 -0
  142. package/src/launcher/serve-unix.ts +385 -0
  143. package/src/launcher/state.ts +245 -0
  144. package/src/otel.ts +39 -33
  145. package/src/protocol.ts +27 -3
  146. package/src/schema.ts +107 -56
  147. package/src/server.ts +196 -20
  148. package/src/types.ts +322 -18
  149. package/src/util/gzip.ts +63 -0
  150. package/src/util/schema.ts +4 -22
  151. package/src/util/web-crypto.ts +98 -0
  152. package/src/util/zstd.ts +133 -14
  153. package/src/wire/opaque.ts +37 -0
  154. package/src/wire/reader.ts +5 -4
  155. package/src/wire/request.ts +67 -8
  156. package/src/wire/response.ts +51 -85
  157. package/src/wire/writer.ts +165 -69
  158. package/dist/util/conform.d.ts +0 -18
  159. package/dist/util/conform.d.ts.map +0 -1
  160. package/src/util/conform.ts +0 -94
@@ -1,18 +1,50 @@
1
1
  // © Copyright 2025-2026, Query.Farm LLC - https://query.farm
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
 
4
- import { writeSync } from "node:fs";
5
- import { type RecordBatch, RecordBatchStreamWriter, type Schema } from "@query-farm/apache-arrow";
4
+ import type { Socket } from "node:net";
5
+ import type { IncrementalEncoder, VgiBatch, VgiSchema } from "../arrow/index.js";
6
+ import { createIncrementalEncoder, serializeBatches } from "../arrow/index.js";
6
7
 
7
8
  const STDOUT_FD = 1;
8
9
 
10
+ // Resolve node:fs via indirect-string require so esbuild/wrangler don't
11
+ // statically pull node:fs into the bundle. Workers (Cloudflare workerd) never
12
+ // instantiate IpcStreamWriter (no stdio transport on workers), so the
13
+ // runtime-time require("node:fs") is unreachable in those builds.
14
+ //
15
+ // `globalThis.require` is undefined in both Bun ESM and Node ESM, so we try
16
+ // `import.meta.require` (Bun) first, then fall back to globalThis.require
17
+ // (Node CJS). Node ESM consumers must polyfill require if they need the
18
+ // subprocess transport.
19
+ const _NODE_FS_MOD = "node:fs";
20
+ let _writeSync: ((fd: number, data: Uint8Array, offset?: number, len?: number) => number) | null = null;
21
+ function _loadWriteSync(): (fd: number, data: Uint8Array, offset?: number, len?: number) => number {
22
+ if (_writeSync) return _writeSync;
23
+ const req: any = (import.meta as any).require ?? (globalThis as any).require ?? null;
24
+ if (!req) {
25
+ throw new Error(
26
+ "IpcStreamWriter requires Bun or Node.js CJS for sync node:fs.writeSync. " +
27
+ "Subprocess transport is not available in this runtime.",
28
+ );
29
+ }
30
+ const fs = req(_NODE_FS_MOD);
31
+ _writeSync = fs.writeSync.bind(fs);
32
+ return _writeSync!;
33
+ }
34
+
9
35
  /**
10
36
  * Write all bytes to a file descriptor, looping on partial writes.
11
- * Handles EAGAIN (pipe buffer full) by busy-waiting with Atomics.wait().
12
- * writeSync() can return fewer bytes than requested when the pipe buffer
13
- * is full (e.g., 64KB limit), and throws EAGAIN on non-blocking fds.
37
+ *
38
+ * Stdio path only. The fd here is a pipe (typically stdout) inherited from
39
+ * the launcher in subprocess transport. Pipes set up by fork/exec are
40
+ * blocking by default, so writeSync blocks the kernel (not the event loop in
41
+ * a bad way) until the parent reader drains — that is the *desired* lockstep
42
+ * with the client. The AF_UNIX path does not come through here; sockets are
43
+ * non-blocking and EAGAIN-spinning them would freeze the event loop and
44
+ * starve every other connection. See `socketWriteAll` below.
14
45
  */
15
46
  function writeAll(fd: number, data: Uint8Array): void {
47
+ const writeSync = _loadWriteSync();
16
48
  let offset = 0;
17
49
  while (offset < data.length) {
18
50
  try {
@@ -21,7 +53,9 @@ function writeAll(fd: number, data: Uint8Array): void {
21
53
  offset += written;
22
54
  } catch (e: any) {
23
55
  if (e.code === "EAGAIN") {
24
- // Pipe buffer fullbusy-wait briefly then retry
56
+ // A non-blocking pipeunexpected for stdio, but handle defensively.
57
+ // Yielding is not possible from a synchronous function; busy-wait
58
+ // briefly. AF_UNIX sockets must NOT use this path.
25
59
  Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 1);
26
60
  continue;
27
61
  }
@@ -31,103 +65,165 @@ function writeAll(fd: number, data: Uint8Array): void {
31
65
  }
32
66
 
33
67
  /**
34
- * Writes sequential IPC streams to a file descriptor (e.g., stdout).
35
- * Each call to writeStream() writes a complete IPC stream: schema + batches + EOS.
68
+ * Asynchronously write all bytes to a Node net.Socket, honouring backpressure.
69
+ *
70
+ * `socket.write(buf)` queues `buf` into libuv's per-socket outbound buffer
71
+ * and returns false when that buffer has grown past `highWaterMark`. We then
72
+ * `await` the `'drain'` event so the JS thread yields — other connections
73
+ * can be dispatched while the kernel drains the send buffer in the
74
+ * background. Without this yield, a slow consumer on connection A would
75
+ * freeze every other connection's handler on the shared event loop.
76
+ *
77
+ * Resolves cleanly on socket close/error so the caller's try/finally can
78
+ * unwind without dangling listeners.
79
+ */
80
+ async function socketWriteAll(socket: Socket, data: Uint8Array): Promise<void> {
81
+ if (socket.destroyed || socket.writableEnded) {
82
+ throw new Error("socketWriteAll: socket is already closed");
83
+ }
84
+ const ok = socket.write(data);
85
+ if (ok) return;
86
+ await new Promise<void>((resolve, reject) => {
87
+ const cleanup = () => {
88
+ socket.off("drain", onDrain);
89
+ socket.off("error", onError);
90
+ socket.off("close", onClose);
91
+ };
92
+ const onDrain = () => {
93
+ cleanup();
94
+ resolve();
95
+ };
96
+ const onError = (err: Error) => {
97
+ cleanup();
98
+ reject(err);
99
+ };
100
+ const onClose = () => {
101
+ // Peer hung up before drain. Resolve so the caller's try/finally
102
+ // runs; the next read on this connection will surface EOF.
103
+ cleanup();
104
+ resolve();
105
+ };
106
+ socket.once("drain", onDrain);
107
+ socket.once("error", onError);
108
+ socket.once("close", onClose);
109
+ });
110
+ }
111
+
112
+ type WriterTarget = { kind: "fd"; fd: number } | { kind: "socket"; socket: Socket };
113
+
114
+ /**
115
+ * Writes sequential IPC streams to either an fd (stdio subprocess transport)
116
+ * or a Node Socket (AF_UNIX transport). Each call to writeStream() writes a
117
+ * complete IPC stream: schema + batches + EOS.
36
118
  *
37
- * All writes use synchronous I/O (writeSync) to avoid deadlocks when
38
- * interleaving stdout writes with blocking stdin reads.
119
+ * All public methods are async. The fd path resolves immediately after a
120
+ * synchronous writeSync; the socket path awaits real `'drain'` events on
121
+ * backpressure so the event loop stays responsive to other connections.
39
122
  */
40
123
  export class IpcStreamWriter {
41
- private readonly fd: number;
124
+ private readonly target: WriterTarget;
42
125
 
43
- constructor(fd: number = STDOUT_FD) {
44
- this.fd = fd;
126
+ /**
127
+ * Construct from a file descriptor (stdio transport) or a Node net.Socket
128
+ * (AF_UNIX transport). The default targets stdout for legacy stdio servers
129
+ * that didn't pass an fd.
130
+ */
131
+ constructor(fdOrSocket: number | Socket = STDOUT_FD) {
132
+ if (typeof fdOrSocket === "number") {
133
+ this.target = { kind: "fd", fd: fdOrSocket };
134
+ } else {
135
+ this.target = { kind: "socket", socket: fdOrSocket };
136
+ }
45
137
  }
46
138
 
47
139
  /**
48
140
  * Write a complete IPC stream with the given schema and batches.
49
141
  * Creates schema message, writes all batches (with their metadata), writes EOS.
50
142
  */
51
- writeStream(schema: Schema, batches: RecordBatch[]): void {
52
- const writer = new RecordBatchStreamWriter();
53
- writer.reset(undefined, schema);
54
- for (const batch of batches) {
55
- // Use _writeRecordBatch to bypass schema comparison (see IncrementalStream.write)
56
- (writer as any)._writeRecordBatch(batch);
143
+ async writeStream(schema: VgiSchema, batches: VgiBatch[]): Promise<void> {
144
+ // Delegate to the Arrow facade so the bytes-on-the-wire match the
145
+ // active backend (arrow-js by default, flechette under the `workerd`/
146
+ // `worker`/`browser` package.json conditions). The incremental stream
147
+ // below still uses arrow-js directly flechette has no equivalent
148
+ // streaming-writer surface and only the stdio server uses the
149
+ // incremental path.
150
+ const bytes = serializeBatches(schema, batches);
151
+ if (this.target.kind === "fd") {
152
+ writeAll(this.target.fd, bytes);
153
+ } else {
154
+ await socketWriteAll(this.target.socket, bytes);
57
155
  }
58
- writer.close();
59
- const bytes = writer.toUint8Array(true);
60
- writeAll(this.fd, bytes);
61
156
  }
62
157
 
63
158
  /**
64
159
  * Open an incremental IPC stream for writing batches one at a time.
65
- * Used for streaming methods where output batches are produced incrementally.
66
- * Bytes are written synchronously after each batch.
67
160
  */
68
- openStream(schema: Schema): IncrementalStream {
69
- return new IncrementalStream(this.fd, schema);
161
+ openStream(schema: VgiSchema): IncrementalStream {
162
+ return new IncrementalStream(this.target, schema);
70
163
  }
71
164
  }
72
165
 
73
166
  /**
74
167
  * An open IPC stream that supports incremental batch writes.
75
168
  *
76
- * Uses RecordBatchStreamWriter with internal buffering (no pipe to stdout).
77
- * After each operation, drains the writer's internal AsyncByteQueue buffer
78
- * and writes bytes synchronously via writeAll(). This avoids deadlocks
79
- * caused by Node.js async stream piping when stdin reads block before
80
- * stdout writes flush through the event loop.
169
+ * Drives a backend {@link IncrementalEncoder} and flushes its bytes through
170
+ * the same target (fd or socket) as the parent IpcStreamWriter. The write()
171
+ * and close() methods are async so the socket path can yield on backpressure
172
+ * critical under AF_UNIX where the kernel send buffer (~8 KB on macOS)
173
+ * fills quickly and any synchronous busy-wait would starve every other
174
+ * connection sharing this event loop.
175
+ *
176
+ * The encoder is obtained from the Arrow facade, so this file no longer
177
+ * imports arrow-js directly — keeping arrow-js out of the flechette
178
+ * (workerd/browser) bundle. The flechette encoder throws on construction;
179
+ * the stdio exchange protocol is lockstep (the client reads each response
180
+ * batch before sending the next input) which needs an incremental writer
181
+ * flechette doesn't provide. workerd/browser deployments use HTTP (no
182
+ * stdio), so the flechette path is never reached there; `flechette-pipe`
183
+ * conformance is xfailed for streams.
81
184
  */
82
185
  export class IncrementalStream {
83
- private writer: RecordBatchStreamWriter;
84
- private readonly fd: number;
186
+ private readonly encoder: IncrementalEncoder;
187
+ private readonly target: WriterTarget;
85
188
  private closed = false;
189
+ // Chains async writes so concurrent callers can't interleave bytes on the
190
+ // wire. Each operation appends to this promise; awaiting it preserves
191
+ // FIFO order even when the caller doesn't await between writes.
192
+ private writeChain: Promise<void> = Promise.resolve();
86
193
 
87
- constructor(fd: number, schema: Schema) {
88
- this.fd = fd;
89
- this.writer = new RecordBatchStreamWriter();
90
- // Buffer internally (no sink) we drain manually via writeAll
91
- this.writer.reset(undefined, schema);
92
- this.drain();
194
+ constructor(target: WriterTarget, schema: VgiSchema) {
195
+ this.target = target;
196
+ this.encoder = createIncrementalEncoder(schema);
197
+ // Schema bytes synchronous on the fd path, queued on the socket path.
198
+ // Callers don't await openStream(), so we serialize via writeChain.
199
+ this.enqueue(this.encoder.start());
93
200
  }
94
201
 
95
- /**
96
- * Write a single batch to the stream. Bytes are flushed synchronously.
97
- *
98
- * Uses _writeRecordBatch() directly to bypass the Arrow writer's schema
99
- * comparison in write(). The public write() method calls compareSchemas()
100
- * and auto-closes the writer if the batch's schema differs (e.g., in
101
- * nullability), silently dropping the batch. Since our output schema is
102
- * set at stream open time and all batches are structurally compatible,
103
- * we skip the comparison.
104
- */
105
- write(batch: RecordBatch): void {
202
+ /** Write a single batch. Resolves once the bytes are queued/flushed. */
203
+ async write(batch: VgiBatch): Promise<void> {
106
204
  if (this.closed) throw new Error("Stream already closed");
107
- (this.writer as any)._writeRecordBatch(batch);
108
- this.drain();
205
+ return this.enqueue(this.encoder.writeBatch(batch));
109
206
  }
110
207
 
111
- /**
112
- * Close the stream (writes EOS marker synchronously).
113
- */
114
- close(): void {
208
+ /** Close the stream (writes EOS marker). */
209
+ async close(): Promise<void> {
115
210
  if (this.closed) return;
116
211
  this.closed = true;
117
- // EOS marker: continuation (0xFFFFFFFF) + metadata length (0x00000000)
118
- const eos = new Uint8Array(new Int32Array([-1, 0]).buffer);
119
- writeAll(this.fd, eos);
212
+ return this.enqueue(this.encoder.finish());
120
213
  }
121
214
 
122
- /**
123
- * Drain buffered bytes from the Arrow writer's internal queue
124
- * and write them synchronously to the output fd.
125
- */
126
- private drain(): void {
127
- const values = (this.writer as any)._sink._values as Uint8Array[];
128
- for (const chunk of values) {
129
- writeAll(this.fd, chunk);
130
- }
131
- values.length = 0;
215
+ private enqueue(bytes: Uint8Array): Promise<void> {
216
+ const next = this.writeChain.then(() => {
217
+ if (this.target.kind === "fd") {
218
+ writeAll(this.target.fd, bytes);
219
+ return;
220
+ }
221
+ return socketWriteAll(this.target.socket, bytes);
222
+ });
223
+ // Swallow rejections on the chain so a single failure doesn't poison
224
+ // every subsequent enqueue with an unhandled rejection. The returned
225
+ // `next` still propagates the error to the caller that triggered it.
226
+ this.writeChain = next.catch(() => undefined);
227
+ return next;
132
228
  }
133
229
  }
@@ -1,18 +0,0 @@
1
- import { RecordBatch, type Schema } from "@query-farm/apache-arrow";
2
- /**
3
- * Rebuild a batch's data to match the given schema's field types.
4
- *
5
- * Batches deserialized from IPC streams (e.g., from PyArrow) may use generic
6
- * types (Float) instead of specific ones (Float64). Arrow-JS's
7
- * RecordBatchStreamWriter silently drops batches whose child Data types don't
8
- * match the writer's schema. Cloning each child Data with the schema's field
9
- * type fixes the type metadata while preserving the underlying buffers.
10
- *
11
- * This is also used to cast compatible input types (e.g., int32→float64,
12
- * float32→float64) when the input batch schema doesn't exactly match the
13
- * method's declared input schema. When the underlying buffer layout differs
14
- * (e.g., 4-byte int32 vs 8-byte float64), we read the values and build a
15
- * new vector with the target type.
16
- */
17
- export declare function conformBatchToSchema(batch: RecordBatch, schema: Schema): RecordBatch;
18
- //# sourceMappingURL=conform.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"conform.d.ts","sourceRoot":"","sources":["../../src/util/conform.ts"],"names":[],"mappings":"AAGA,OAAO,EAGL,WAAW,EACX,KAAK,MAAM,EAIZ,MAAM,0BAA0B,CAAC;AAkBlC;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,GAAG,WAAW,CAiDpF"}
@@ -1,94 +0,0 @@
1
- // © Copyright 2025-2026, Query.Farm LLC - https://query.farm
2
- // SPDX-License-Identifier: Apache-2.0
3
-
4
- import {
5
- type DataType,
6
- makeData,
7
- RecordBatch,
8
- type Schema,
9
- Struct,
10
- Type,
11
- vectorFromArray,
12
- } from "@query-farm/apache-arrow";
13
-
14
- /** Return true when the source type's values can be losslessly read and
15
- * re-encoded into the target type (e.g., int32 → float64). */
16
- function needsValueCast(src: DataType, dst: DataType): boolean {
17
- if (src.typeId === dst.typeId) return false;
18
- // Same broad family (e.g. Float → Float64) — clone is sufficient.
19
- if (src.constructor === dst.constructor) return false;
20
- return true;
21
- }
22
-
23
- /** Check if a type is a numeric type we can cast between.
24
- * Uses typeId instead of instanceof because IPC-deserialized types
25
- * may be generic (e.g., Int_ instead of Int64). */
26
- function isNumeric(t: DataType): boolean {
27
- return t.typeId === Type.Int || t.typeId === Type.Float;
28
- }
29
-
30
- /**
31
- * Rebuild a batch's data to match the given schema's field types.
32
- *
33
- * Batches deserialized from IPC streams (e.g., from PyArrow) may use generic
34
- * types (Float) instead of specific ones (Float64). Arrow-JS's
35
- * RecordBatchStreamWriter silently drops batches whose child Data types don't
36
- * match the writer's schema. Cloning each child Data with the schema's field
37
- * type fixes the type metadata while preserving the underlying buffers.
38
- *
39
- * This is also used to cast compatible input types (e.g., int32→float64,
40
- * float32→float64) when the input batch schema doesn't exactly match the
41
- * method's declared input schema. When the underlying buffer layout differs
42
- * (e.g., 4-byte int32 vs 8-byte float64), we read the values and build a
43
- * new vector with the target type.
44
- */
45
- export function conformBatchToSchema(batch: RecordBatch, schema: Schema): RecordBatch {
46
- if (batch.numRows === 0) return batch;
47
-
48
- // Validate field count and names match before attempting any cast.
49
- if (batch.schema.fields.length !== schema.fields.length) {
50
- throw new TypeError(`Field count mismatch: expected ${schema.fields.length}, got ${batch.schema.fields.length}`);
51
- }
52
- for (let i = 0; i < schema.fields.length; i++) {
53
- if (batch.schema.fields[i].name !== schema.fields[i].name) {
54
- throw new TypeError(
55
- `Field name mismatch at index ${i}: expected '${schema.fields[i].name}', got '${batch.schema.fields[i].name}'`,
56
- );
57
- }
58
- }
59
-
60
- const children = schema.fields.map((f, i) => {
61
- const srcChild = batch.data.children[i];
62
- const srcType = srcChild.type;
63
- const dstType = f.type;
64
-
65
- if (!needsValueCast(srcType, dstType)) {
66
- return srcChild.clone(dstType);
67
- }
68
-
69
- // Numeric → numeric: read values and rebuild with target type.
70
- if (isNumeric(srcType) && isNumeric(dstType)) {
71
- // Read source values via the batch's column vector.
72
- const col = batch.getChildAt(i)!;
73
- const values: number[] = [];
74
- for (let r = 0; r < batch.numRows; r++) {
75
- const v = col.get(r);
76
- values.push(typeof v === "bigint" ? Number(v) : (v as number));
77
- }
78
- return vectorFromArray(values, dstType).data[0];
79
- }
80
-
81
- // Fallback: clone type metadata (works for same-layout types).
82
- return srcChild.clone(dstType);
83
- });
84
-
85
- const structType = new Struct(schema.fields);
86
- const data = makeData({
87
- type: structType,
88
- length: batch.numRows,
89
- children,
90
- nullCount: batch.data.nullCount,
91
- nullBitmap: batch.data.nullBitmap,
92
- });
93
- return new RecordBatch(schema, data, batch.metadata);
94
- }