@query-farm/vgi-rpc 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/client/connect.d.ts.map +1 -1
  2. package/dist/client/index.d.ts +3 -3
  3. package/dist/client/index.d.ts.map +1 -1
  4. package/dist/client/introspect.d.ts +1 -1
  5. package/dist/client/introspect.d.ts.map +1 -1
  6. package/dist/client/ipc.d.ts +1 -1
  7. package/dist/client/ipc.d.ts.map +1 -1
  8. package/dist/client/pipe.d.ts +1 -1
  9. package/dist/client/pipe.d.ts.map +1 -1
  10. package/dist/client/stream.d.ts.map +1 -1
  11. package/dist/dispatch/describe.d.ts +1 -1
  12. package/dist/dispatch/describe.d.ts.map +1 -1
  13. package/dist/dispatch/stream.d.ts +1 -1
  14. package/dist/dispatch/stream.d.ts.map +1 -1
  15. package/dist/dispatch/unary.d.ts.map +1 -1
  16. package/dist/http/common.d.ts +1 -1
  17. package/dist/http/common.d.ts.map +1 -1
  18. package/dist/http/dispatch.d.ts.map +1 -1
  19. package/dist/http/handler.d.ts.map +1 -1
  20. package/dist/http/index.d.ts +2 -2
  21. package/dist/http/index.d.ts.map +1 -1
  22. package/dist/http/token.d.ts.map +1 -1
  23. package/dist/http/types.d.ts.map +1 -1
  24. package/dist/index.d.ts +7 -7
  25. package/dist/index.d.ts.map +1 -1
  26. package/dist/index.js +2163 -2173
  27. package/dist/index.js.map +25 -25
  28. package/dist/protocol.d.ts +1 -1
  29. package/dist/protocol.d.ts.map +1 -1
  30. package/dist/schema.d.ts +1 -1
  31. package/dist/schema.d.ts.map +1 -1
  32. package/dist/server.d.ts +1 -1
  33. package/dist/server.d.ts.map +1 -1
  34. package/dist/types.d.ts +1 -1
  35. package/dist/types.d.ts.map +1 -1
  36. package/dist/util/zstd.d.ts.map +1 -1
  37. package/dist/wire/reader.d.ts.map +1 -1
  38. package/dist/wire/request.d.ts +1 -1
  39. package/dist/wire/request.d.ts.map +1 -1
  40. package/dist/wire/response.d.ts +1 -1
  41. package/dist/wire/response.d.ts.map +1 -1
  42. package/dist/wire/writer.d.ts.map +1 -1
  43. package/package.json +6 -2
  44. package/src/client/connect.ts +12 -20
  45. package/src/client/index.ts +8 -8
  46. package/src/client/introspect.ts +11 -15
  47. package/src/client/ipc.ts +17 -31
  48. package/src/client/pipe.ts +16 -36
  49. package/src/client/stream.ts +20 -46
  50. package/src/dispatch/describe.ts +14 -26
  51. package/src/dispatch/stream.ts +5 -18
  52. package/src/dispatch/unary.ts +1 -2
  53. package/src/http/common.ts +8 -18
  54. package/src/http/dispatch.ts +31 -86
  55. package/src/http/handler.ts +20 -55
  56. package/src/http/index.ts +2 -2
  57. package/src/http/token.ts +2 -7
  58. package/src/http/types.ts +2 -6
  59. package/src/index.ts +43 -43
  60. package/src/protocol.ts +7 -7
  61. package/src/schema.ts +11 -15
  62. package/src/server.ts +14 -34
  63. package/src/types.ts +9 -36
  64. package/src/util/zstd.ts +2 -8
  65. package/src/wire/reader.ts +2 -4
  66. package/src/wire/request.ts +4 -15
  67. package/src/wire/response.ts +8 -24
  68. package/src/wire/writer.ts +1 -5
package/dist/index.js CHANGED
@@ -50,11 +50,122 @@ var init_zstd = __esm(() => {
50
50
  isBun = typeof globalThis.Bun !== "undefined";
51
51
  });
52
52
 
53
- // src/server.ts
54
- import { Schema as Schema4 } from "@query-farm/apache-arrow";
53
+ // src/constants.ts
54
+ var RPC_METHOD_KEY = "vgi_rpc.method";
55
+ var LOG_LEVEL_KEY = "vgi_rpc.log_level";
56
+ var LOG_MESSAGE_KEY = "vgi_rpc.log_message";
57
+ var LOG_EXTRA_KEY = "vgi_rpc.log_extra";
58
+ var REQUEST_VERSION_KEY = "vgi_rpc.request_version";
59
+ var REQUEST_VERSION = "1";
60
+ var SERVER_ID_KEY = "vgi_rpc.server_id";
61
+ var REQUEST_ID_KEY = "vgi_rpc.request_id";
62
+ var PROTOCOL_NAME_KEY = "vgi_rpc.protocol_name";
63
+ var DESCRIBE_VERSION_KEY = "vgi_rpc.describe_version";
64
+ var DESCRIBE_VERSION = "2";
65
+ var DESCRIBE_METHOD_NAME = "__describe__";
66
+ var STATE_KEY = "vgi_rpc.stream_state#b64";
67
+
68
+ // src/http/common.ts
69
+ import {
70
+ makeData,
71
+ RecordBatch,
72
+ RecordBatchReader,
73
+ RecordBatchStreamWriter,
74
+ Struct
75
+ } from "@query-farm/apache-arrow";
76
+ var ARROW_CONTENT_TYPE = "application/vnd.apache.arrow.stream";
77
+
78
+ class HttpRpcError extends Error {
79
+ statusCode;
80
+ constructor(message, statusCode) {
81
+ super(message);
82
+ this.statusCode = statusCode;
83
+ this.name = "HttpRpcError";
84
+ }
85
+ }
86
+ function conformBatchToSchema(batch, schema) {
87
+ if (batch.numRows === 0)
88
+ return batch;
89
+ const children = schema.fields.map((f, i) => batch.data.children[i].clone(f.type));
90
+ const structType = new Struct(schema.fields);
91
+ const data = makeData({
92
+ type: structType,
93
+ length: batch.numRows,
94
+ children,
95
+ nullCount: batch.data.nullCount,
96
+ nullBitmap: batch.data.nullBitmap
97
+ });
98
+ return new RecordBatch(schema, data, batch.metadata);
99
+ }
100
+ function serializeIpcStream(schema, batches) {
101
+ const writer = new RecordBatchStreamWriter;
102
+ writer.reset(undefined, schema);
103
+ for (const batch of batches) {
104
+ writer.write(conformBatchToSchema(batch, schema));
105
+ }
106
+ writer.close();
107
+ return writer.toUint8Array(true);
108
+ }
109
+ function arrowResponse(body, status = 200, extraHeaders) {
110
+ const headers = extraHeaders ?? new Headers;
111
+ headers.set("Content-Type", ARROW_CONTENT_TYPE);
112
+ return new Response(body, { status, headers });
113
+ }
114
+ async function readRequestFromBody(body) {
115
+ const reader = await RecordBatchReader.from(body);
116
+ await reader.open();
117
+ const schema = reader.schema;
118
+ if (!schema) {
119
+ throw new HttpRpcError("Empty IPC stream: no schema", 400);
120
+ }
121
+ const batches = reader.readAll();
122
+ if (batches.length === 0) {
123
+ throw new HttpRpcError("IPC stream contains no batches", 400);
124
+ }
125
+ return { schema, batch: batches[0] };
126
+ }
127
+
128
+ // src/client/introspect.ts
129
+ import { Schema as ArrowSchema, RecordBatchReader as RecordBatchReader4 } from "@query-farm/apache-arrow";
130
+
131
+ // src/client/ipc.ts
132
+ import {
133
+ Binary,
134
+ Bool,
135
+ DataType,
136
+ Float64,
137
+ Int64,
138
+ makeData as makeData2,
139
+ RecordBatch as RecordBatch2,
140
+ RecordBatchReader as RecordBatchReader3,
141
+ Struct as Struct2,
142
+ Utf8,
143
+ vectorFromArray
144
+ } from "@query-farm/apache-arrow";
145
+
146
+ // src/errors.ts
147
+ class RpcError extends Error {
148
+ errorType;
149
+ errorMessage;
150
+ remoteTraceback;
151
+ constructor(errorType, errorMessage, remoteTraceback) {
152
+ super(`${errorType}: ${errorMessage}`);
153
+ this.errorType = errorType;
154
+ this.errorMessage = errorMessage;
155
+ this.remoteTraceback = remoteTraceback;
156
+ this.name = "RpcError";
157
+ }
158
+ }
159
+
160
+ class VersionError extends Error {
161
+ constructor(message) {
162
+ super(message);
163
+ this.name = "VersionError";
164
+ }
165
+ }
55
166
 
56
167
  // src/wire/reader.ts
57
- import { RecordBatchReader } from "@query-farm/apache-arrow";
168
+ import { RecordBatchReader as RecordBatchReader2 } from "@query-farm/apache-arrow";
58
169
 
59
170
  class IpcStreamReader {
60
171
  reader;
@@ -64,7 +175,7 @@ class IpcStreamReader {
64
175
  this.reader = reader;
65
176
  }
66
177
  static async create(input) {
67
- const reader = await RecordBatchReader.from(input);
178
+ const reader = await RecordBatchReader2.from(input);
68
179
  await reader.open({ autoDestroy: false });
69
180
  if (reader.closed) {
70
181
  throw new Error("Input stream closed before first IPC message");
@@ -124,2386 +235,2265 @@ class IpcStreamReader {
124
235
  }
125
236
  }
126
237
 
127
- // src/wire/writer.ts
128
- import {
129
- RecordBatchStreamWriter
130
- } from "@query-farm/apache-arrow";
131
- import { writeSync } from "node:fs";
132
- var STDOUT_FD = 1;
133
- function writeAll(fd, data) {
134
- let offset = 0;
135
- while (offset < data.length) {
136
- try {
137
- const written = writeSync(fd, data, offset, data.length - offset);
138
- if (written <= 0)
139
- throw new Error(`writeSync returned ${written}`);
140
- offset += written;
141
- } catch (e) {
142
- if (e.code === "EAGAIN") {
143
- Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 1);
144
- continue;
145
- }
146
- throw e;
147
- }
148
- }
238
+ // src/client/ipc.ts
239
+ function inferArrowType(value) {
240
+ if (typeof value === "string")
241
+ return new Utf8;
242
+ if (typeof value === "boolean")
243
+ return new Bool;
244
+ if (typeof value === "bigint")
245
+ return new Int64;
246
+ if (typeof value === "number")
247
+ return new Float64;
248
+ if (value instanceof Uint8Array)
249
+ return new Binary;
250
+ return new Utf8;
149
251
  }
150
-
151
- class IpcStreamWriter {
152
- fd;
153
- constructor(fd = STDOUT_FD) {
154
- this.fd = fd;
252
+ function coerceForArrow(type, value) {
253
+ if (value == null)
254
+ return value;
255
+ if (DataType.isInt(type) && type.bitWidth === 64) {
256
+ if (typeof value === "number")
257
+ return BigInt(value);
258
+ return value;
155
259
  }
156
- writeStream(schema, batches) {
157
- const writer = new RecordBatchStreamWriter;
158
- writer.reset(undefined, schema);
159
- for (const batch of batches) {
160
- writer._writeRecordBatch(batch);
260
+ if (DataType.isMap(type)) {
261
+ if (value instanceof Map) {
262
+ const entriesField = type.children[0];
263
+ const valueType = entriesField.type.children[1].type;
264
+ const coerced = new Map;
265
+ for (const [k, v] of value) {
266
+ coerced.set(k, coerceForArrow(valueType, v));
267
+ }
268
+ return coerced;
161
269
  }
162
- writer.close();
163
- const bytes = writer.toUint8Array(true);
164
- writeAll(this.fd, bytes);
270
+ return value;
165
271
  }
166
- openStream(schema) {
167
- return new IncrementalStream(this.fd, schema);
272
+ if (DataType.isList(type)) {
273
+ if (Array.isArray(value)) {
274
+ const elemType = type.children[0].type;
275
+ return value.map((v) => coerceForArrow(elemType, v));
276
+ }
277
+ return value;
168
278
  }
279
+ return value;
169
280
  }
170
-
171
- class IncrementalStream {
172
- writer;
173
- fd;
174
- closed = false;
175
- constructor(fd, schema) {
176
- this.fd = fd;
177
- this.writer = new RecordBatchStreamWriter;
178
- this.writer.reset(undefined, schema);
179
- this.drain();
281
+ function buildRequestIpc(schema, params, method) {
282
+ const metadata = new Map;
283
+ metadata.set(RPC_METHOD_KEY, method);
284
+ metadata.set(REQUEST_VERSION_KEY, REQUEST_VERSION);
285
+ if (schema.fields.length === 0) {
286
+ const structType2 = new Struct2(schema.fields);
287
+ const data2 = makeData2({
288
+ type: structType2,
289
+ length: 1,
290
+ children: [],
291
+ nullCount: 0
292
+ });
293
+ const batch2 = new RecordBatch2(schema, data2, metadata);
294
+ return serializeIpcStream(schema, [batch2]);
180
295
  }
181
- write(batch) {
182
- if (this.closed)
183
- throw new Error("Stream already closed");
184
- this.writer._writeRecordBatch(batch);
185
- this.drain();
296
+ const children = schema.fields.map((f) => {
297
+ const val = coerceForArrow(f.type, params[f.name]);
298
+ return vectorFromArray([val], f.type).data[0];
299
+ });
300
+ const structType = new Struct2(schema.fields);
301
+ const data = makeData2({
302
+ type: structType,
303
+ length: 1,
304
+ children,
305
+ nullCount: 0
306
+ });
307
+ const batch = new RecordBatch2(schema, data, metadata);
308
+ return serializeIpcStream(schema, [batch]);
309
+ }
310
+ async function readResponseBatches(body) {
311
+ const reader = await RecordBatchReader3.from(body);
312
+ await reader.open();
313
+ const schema = reader.schema;
314
+ if (!schema) {
315
+ throw new RpcError("ProtocolError", "Empty IPC stream: no schema", "");
186
316
  }
187
- close() {
188
- if (this.closed)
189
- return;
190
- this.closed = true;
191
- const eos = new Uint8Array(new Int32Array([-1, 0]).buffer);
192
- writeAll(this.fd, eos);
317
+ const batches = reader.readAll();
318
+ return { schema, batches };
319
+ }
320
+ function dispatchLogOrError(batch, onLog) {
321
+ const meta = batch.metadata;
322
+ if (!meta)
323
+ return false;
324
+ const level = meta.get(LOG_LEVEL_KEY);
325
+ if (!level)
326
+ return false;
327
+ const message = meta.get(LOG_MESSAGE_KEY) ?? "";
328
+ if (level === "EXCEPTION") {
329
+ const extraStr = meta.get(LOG_EXTRA_KEY);
330
+ let errorType = "RpcError";
331
+ let errorMessage = message;
332
+ let traceback = "";
333
+ if (extraStr) {
334
+ try {
335
+ const extra = JSON.parse(extraStr);
336
+ errorType = extra.exception_type ?? "RpcError";
337
+ errorMessage = extra.exception_message ?? message;
338
+ traceback = extra.traceback ?? "";
339
+ } catch {}
340
+ }
341
+ throw new RpcError(errorType, errorMessage, traceback);
193
342
  }
194
- drain() {
195
- const values = this.writer._sink._values;
196
- for (const chunk of values) {
197
- writeAll(this.fd, chunk);
343
+ if (onLog) {
344
+ const extraStr = meta.get(LOG_EXTRA_KEY);
345
+ let extra;
346
+ if (extraStr) {
347
+ try {
348
+ extra = JSON.parse(extraStr);
349
+ } catch {}
198
350
  }
199
- values.length = 0;
351
+ onLog({ level, message, extra });
200
352
  }
353
+ return true;
201
354
  }
202
-
203
- // src/wire/request.ts
204
- import { DataType } from "@query-farm/apache-arrow";
205
-
206
- // src/constants.ts
207
- var RPC_METHOD_KEY = "vgi_rpc.method";
208
- var LOG_LEVEL_KEY = "vgi_rpc.log_level";
209
- var LOG_MESSAGE_KEY = "vgi_rpc.log_message";
210
- var LOG_EXTRA_KEY = "vgi_rpc.log_extra";
211
- var REQUEST_VERSION_KEY = "vgi_rpc.request_version";
212
- var REQUEST_VERSION = "1";
213
- var SERVER_ID_KEY = "vgi_rpc.server_id";
214
- var REQUEST_ID_KEY = "vgi_rpc.request_id";
215
- var PROTOCOL_NAME_KEY = "vgi_rpc.protocol_name";
216
- var DESCRIBE_VERSION_KEY = "vgi_rpc.describe_version";
217
- var DESCRIBE_VERSION = "2";
218
- var DESCRIBE_METHOD_NAME = "__describe__";
219
- var STATE_KEY = "vgi_rpc.stream_state#b64";
220
-
221
- // src/errors.ts
222
- class RpcError extends Error {
223
- errorType;
224
- errorMessage;
225
- remoteTraceback;
226
- constructor(errorType, errorMessage, remoteTraceback) {
227
- super(`${errorType}: ${errorMessage}`);
228
- this.errorType = errorType;
229
- this.errorMessage = errorMessage;
230
- this.remoteTraceback = remoteTraceback;
231
- this.name = "RpcError";
355
+ function extractBatchRows(batch) {
356
+ const rows = [];
357
+ for (let r = 0;r < batch.numRows; r++) {
358
+ const row = {};
359
+ for (let i = 0;i < batch.schema.fields.length; i++) {
360
+ const field = batch.schema.fields[i];
361
+ let value = batch.getChildAt(i)?.get(r);
362
+ if (typeof value === "bigint") {
363
+ if (value >= BigInt(Number.MIN_SAFE_INTEGER) && value <= BigInt(Number.MAX_SAFE_INTEGER)) {
364
+ value = Number(value);
365
+ }
366
+ }
367
+ row[field.name] = value;
368
+ }
369
+ rows.push(row);
232
370
  }
371
+ return rows;
233
372
  }
234
-
235
- class VersionError extends Error {
236
- constructor(message) {
237
- super(message);
238
- this.name = "VersionError";
239
- }
373
+ async function readSequentialStreams(body) {
374
+ const stream = new ReadableStream({
375
+ start(controller) {
376
+ controller.enqueue(body);
377
+ controller.close();
378
+ }
379
+ });
380
+ return IpcStreamReader.create(stream);
240
381
  }
241
382
 
242
- // src/wire/request.ts
243
- function parseRequest(schema, batch) {
244
- const metadata = batch.metadata ?? new Map;
245
- const methodName = metadata.get(RPC_METHOD_KEY);
246
- if (methodName === undefined) {
247
- throw new RpcError("ProtocolError", "Missing 'vgi_rpc.method' in request batch custom_metadata. " + "Each request batch must carry a 'vgi_rpc.method' key in its Arrow IPC custom_metadata " + "with the method name as a UTF-8 string.", "");
248
- }
249
- const version = metadata.get(REQUEST_VERSION_KEY);
250
- if (version === undefined) {
251
- throw new VersionError("Missing 'vgi_rpc.request_version' in request batch custom_metadata. " + `Set the 'vgi_rpc.request_version' custom_metadata value to '${REQUEST_VERSION}'.`);
252
- }
253
- if (version !== REQUEST_VERSION) {
254
- throw new VersionError(`Unsupported request version '${version}', expected '${REQUEST_VERSION}'. ` + `Set the 'vgi_rpc.request_version' custom_metadata value to '${REQUEST_VERSION}'.`);
383
+ // src/client/introspect.ts
384
+ async function deserializeSchema(bytes) {
385
+ const reader = await RecordBatchReader4.from(bytes);
386
+ await reader.open();
387
+ return reader.schema;
388
+ }
389
+ async function parseDescribeResponse(batches, onLog) {
390
+ let dataBatch = null;
391
+ for (const batch of batches) {
392
+ if (batch.numRows === 0) {
393
+ dispatchLogOrError(batch, onLog);
394
+ continue;
395
+ }
396
+ dataBatch = batch;
255
397
  }
256
- const requestId = metadata.get(REQUEST_ID_KEY) ?? null;
257
- const params = {};
258
- if (schema.fields.length > 0 && batch.numRows !== 1) {
259
- throw new RpcError("ProtocolError", `Expected 1 row in request batch, got ${batch.numRows}. ` + "Each parameter is a column (not a row). The batch should have exactly 1 row.", "");
398
+ if (!dataBatch) {
399
+ throw new Error("Empty __describe__ response");
260
400
  }
261
- for (let i = 0;i < schema.fields.length; i++) {
262
- const field = schema.fields[i];
263
- if (DataType.isMap(field.type)) {
264
- params[field.name] = batch.getChildAt(i).data[0];
265
- continue;
401
+ const meta = dataBatch.metadata;
402
+ const protocolName = meta?.get(PROTOCOL_NAME_KEY) ?? "";
403
+ const methods = [];
404
+ for (let i = 0;i < dataBatch.numRows; i++) {
405
+ const name = dataBatch.getChildAt(0).get(i);
406
+ const methodType = dataBatch.getChildAt(1).get(i);
407
+ const doc = dataBatch.getChildAt(2)?.get(i);
408
+ const _hasReturn = dataBatch.getChildAt(3).get(i);
409
+ const paramsIpc = dataBatch.getChildAt(4).get(i);
410
+ const resultIpc = dataBatch.getChildAt(5).get(i);
411
+ const paramTypesJson = dataBatch.getChildAt(6)?.get(i);
412
+ const paramDefaultsJson = dataBatch.getChildAt(7)?.get(i);
413
+ const hasHeader = dataBatch.getChildAt(8).get(i);
414
+ const headerIpc = dataBatch.getChildAt(9)?.get(i);
415
+ const paramsSchema = await deserializeSchema(paramsIpc);
416
+ const resultSchema = await deserializeSchema(resultIpc);
417
+ let paramTypes;
418
+ if (paramTypesJson) {
419
+ try {
420
+ paramTypes = JSON.parse(paramTypesJson);
421
+ } catch {}
266
422
  }
267
- let value = batch.getChildAt(i)?.get(0);
268
- if (typeof value === "bigint") {
269
- if (value >= BigInt(Number.MIN_SAFE_INTEGER) && value <= BigInt(Number.MAX_SAFE_INTEGER)) {
270
- value = Number(value);
271
- }
423
+ let defaults;
424
+ if (paramDefaultsJson) {
425
+ try {
426
+ defaults = JSON.parse(paramDefaultsJson);
427
+ } catch {}
272
428
  }
273
- params[field.name] = value;
429
+ const info = {
430
+ name,
431
+ type: methodType,
432
+ paramsSchema,
433
+ resultSchema,
434
+ doc: doc ?? undefined,
435
+ paramTypes,
436
+ defaults
437
+ };
438
+ if (methodType === "stream") {
439
+ info.outputSchema = resultSchema;
440
+ }
441
+ if (hasHeader && headerIpc) {
442
+ info.headerSchema = await deserializeSchema(headerIpc);
443
+ }
444
+ methods.push(info);
274
445
  }
275
- return {
276
- methodName,
277
- requestVersion: version,
278
- requestId,
279
- schema,
280
- params,
281
- rawMetadata: metadata
282
- };
446
+ return { protocolName, methods };
447
+ }
448
+ async function httpIntrospect(baseUrl, options) {
449
+ const prefix = options?.prefix ?? "/vgi";
450
+ const emptySchema = new ArrowSchema([]);
451
+ const body = buildRequestIpc(emptySchema, {}, DESCRIBE_METHOD_NAME);
452
+ const response = await fetch(`${baseUrl}${prefix}/${DESCRIBE_METHOD_NAME}`, {
453
+ method: "POST",
454
+ headers: { "Content-Type": ARROW_CONTENT_TYPE },
455
+ body
456
+ });
457
+ const responseBody = new Uint8Array(await response.arrayBuffer());
458
+ const { batches } = await readResponseBatches(responseBody);
459
+ return parseDescribeResponse(batches);
283
460
  }
284
461
 
285
- // src/wire/response.ts
286
- import {
287
- RecordBatch,
288
- Data,
289
- DataType as DataType2,
290
- makeData,
291
- Struct,
292
- vectorFromArray
293
- } from "@query-farm/apache-arrow";
294
- function coerceInt64(schema, values) {
295
- const result = { ...values };
296
- for (const field of schema.fields) {
297
- const val = result[field.name];
298
- if (val === undefined)
299
- continue;
300
- if (!DataType2.isInt(field.type) || field.type.bitWidth !== 64)
301
- continue;
302
- if (Array.isArray(val)) {
303
- result[field.name] = val.map((v) => typeof v === "number" ? BigInt(v) : v);
304
- } else if (typeof val === "number") {
305
- result[field.name] = BigInt(val);
306
- }
462
+ // src/client/stream.ts
463
+ import { Field, makeData as makeData3, RecordBatch as RecordBatch3, Schema, Struct as Struct3, vectorFromArray as vectorFromArray2 } from "@query-farm/apache-arrow";
464
+ class HttpStreamSession {
465
+ _baseUrl;
466
+ _prefix;
467
+ _method;
468
+ _stateToken;
469
+ _outputSchema;
470
+ _inputSchema;
471
+ _onLog;
472
+ _pendingBatches;
473
+ _finished;
474
+ _header;
475
+ _compressionLevel;
476
+ _compressFn;
477
+ _decompressFn;
478
+ constructor(opts) {
479
+ this._baseUrl = opts.baseUrl;
480
+ this._prefix = opts.prefix;
481
+ this._method = opts.method;
482
+ this._stateToken = opts.stateToken;
483
+ this._outputSchema = opts.outputSchema;
484
+ this._inputSchema = opts.inputSchema;
485
+ this._onLog = opts.onLog;
486
+ this._pendingBatches = opts.pendingBatches;
487
+ this._finished = opts.finished;
488
+ this._header = opts.header;
489
+ this._compressionLevel = opts.compressionLevel;
490
+ this._compressFn = opts.compressFn;
491
+ this._decompressFn = opts.decompressFn;
307
492
  }
308
- return result;
309
- }
310
- function buildResultBatch(schema, values, serverId, requestId) {
311
- const metadata = new Map;
312
- metadata.set(SERVER_ID_KEY, serverId);
313
- if (requestId !== null) {
314
- metadata.set(REQUEST_ID_KEY, requestId);
493
+ get header() {
494
+ return this._header;
315
495
  }
316
- if (schema.fields.length === 0) {
317
- return buildEmptyBatch(schema, metadata);
496
+ _buildHeaders() {
497
+ const headers = {
498
+ "Content-Type": ARROW_CONTENT_TYPE
499
+ };
500
+ if (this._compressionLevel != null) {
501
+ headers["Content-Encoding"] = "zstd";
502
+ headers["Accept-Encoding"] = "zstd";
503
+ }
504
+ return headers;
318
505
  }
319
- for (const field of schema.fields) {
320
- if (values[field.name] === undefined && !field.nullable) {
321
- const got = Object.keys(values);
322
- throw new TypeError(`Handler result missing required field '${field.name}'. Got keys: [${got.join(", ")}]`);
506
+ _prepareBody(content) {
507
+ if (this._compressionLevel != null && this._compressFn) {
508
+ return this._compressFn(content, this._compressionLevel);
323
509
  }
510
+ return content;
324
511
  }
325
- const coerced = coerceInt64(schema, values);
326
- const children = schema.fields.map((f) => {
327
- let val = coerced[f.name];
328
- if (val instanceof Data) {
329
- return val;
512
+ async _readResponse(resp) {
513
+ let body = new Uint8Array(await resp.arrayBuffer());
514
+ if (resp.headers.get("Content-Encoding") === "zstd" && this._decompressFn) {
515
+ body = new Uint8Array(this._decompressFn(body));
330
516
  }
331
- const arr = vectorFromArray([val], f.type);
332
- return arr.data[0];
333
- });
334
- const structType = new Struct(schema.fields);
335
- const data = makeData({
336
- type: structType,
337
- length: 1,
338
- children,
339
- nullCount: 0
340
- });
341
- return new RecordBatch(schema, data, metadata);
342
- }
343
- function buildErrorBatch(schema, error, serverId, requestId) {
344
- const metadata = new Map;
345
- metadata.set(LOG_LEVEL_KEY, "EXCEPTION");
346
- metadata.set(LOG_MESSAGE_KEY, `${error.constructor.name}: ${error.message}`);
347
- const extra = {
348
- exception_type: error.constructor.name,
349
- exception_message: error.message,
350
- traceback: error.stack ?? ""
351
- };
352
- metadata.set(LOG_EXTRA_KEY, JSON.stringify(extra));
353
- metadata.set(SERVER_ID_KEY, serverId);
354
- if (requestId !== null) {
355
- metadata.set(REQUEST_ID_KEY, requestId);
356
- }
357
- return buildEmptyBatch(schema, metadata);
358
- }
359
- function buildLogBatch(schema, level, message, extra, serverId, requestId) {
360
- const metadata = new Map;
361
- metadata.set(LOG_LEVEL_KEY, level);
362
- metadata.set(LOG_MESSAGE_KEY, message);
363
- if (extra) {
364
- metadata.set(LOG_EXTRA_KEY, JSON.stringify(extra));
517
+ return body;
365
518
  }
366
- if (serverId != null) {
367
- metadata.set(SERVER_ID_KEY, serverId);
519
+ async exchange(input) {
520
+ if (this._stateToken === null) {
521
+ throw new RpcError("ProtocolError", "Stream has finished — no state token available", "");
522
+ }
523
+ if (input.length === 0) {
524
+ const zeroSchema = this._inputSchema ?? this._outputSchema;
525
+ const emptyBatch = this._buildEmptyBatch(zeroSchema);
526
+ const metadata2 = new Map;
527
+ metadata2.set(STATE_KEY, this._stateToken);
528
+ const batchWithMeta = new RecordBatch3(zeroSchema, emptyBatch.data, metadata2);
529
+ return this._doExchange(zeroSchema, [batchWithMeta]);
530
+ }
531
+ const keys = Object.keys(input[0]);
532
+ const fields = keys.map((key) => {
533
+ let sample;
534
+ for (const row of input) {
535
+ if (row[key] != null) {
536
+ sample = row[key];
537
+ break;
538
+ }
539
+ }
540
+ const arrowType = inferArrowType(sample);
541
+ const nullable = input.some((row) => row[key] == null);
542
+ return new Field(key, arrowType, nullable);
543
+ });
544
+ const inputSchema = new Schema(fields);
545
+ const children = inputSchema.fields.map((f) => {
546
+ const values = input.map((row) => row[f.name]);
547
+ return vectorFromArray2(values, f.type).data[0];
548
+ });
549
+ const structType = new Struct3(inputSchema.fields);
550
+ const data = makeData3({
551
+ type: structType,
552
+ length: input.length,
553
+ children,
554
+ nullCount: 0
555
+ });
556
+ const metadata = new Map;
557
+ metadata.set(STATE_KEY, this._stateToken);
558
+ const batch = new RecordBatch3(inputSchema, data, metadata);
559
+ return this._doExchange(inputSchema, [batch]);
368
560
  }
369
- if (requestId != null) {
370
- metadata.set(REQUEST_ID_KEY, requestId);
561
+ async _doExchange(schema, batches) {
562
+ const body = serializeIpcStream(schema, batches);
563
+ const resp = await fetch(`${this._baseUrl}${this._prefix}/${this._method}/exchange`, {
564
+ method: "POST",
565
+ headers: this._buildHeaders(),
566
+ body: this._prepareBody(body)
567
+ });
568
+ const responseBody = await this._readResponse(resp);
569
+ const { batches: responseBatches } = await readResponseBatches(responseBody);
570
+ let resultRows = [];
571
+ for (const batch of responseBatches) {
572
+ if (batch.numRows === 0) {
573
+ dispatchLogOrError(batch, this._onLog);
574
+ const token2 = batch.metadata?.get(STATE_KEY);
575
+ if (token2) {
576
+ this._stateToken = token2;
577
+ }
578
+ continue;
579
+ }
580
+ const token = batch.metadata?.get(STATE_KEY);
581
+ if (token) {
582
+ this._stateToken = token;
583
+ }
584
+ resultRows = extractBatchRows(batch);
585
+ }
586
+ return resultRows;
371
587
  }
372
- return buildEmptyBatch(schema, metadata);
373
- }
374
- function buildEmptyBatch(schema, metadata) {
375
- const children = schema.fields.map((f) => {
376
- return makeData({ type: f.type, length: 0, nullCount: 0 });
377
- });
378
- if (schema.fields.length === 0) {
379
- const structType2 = new Struct(schema.fields);
380
- const data2 = makeData({
381
- type: structType2,
588
+ _buildEmptyBatch(schema) {
589
+ const children = schema.fields.map((f) => {
590
+ return makeData3({ type: f.type, length: 0, nullCount: 0 });
591
+ });
592
+ const structType = new Struct3(schema.fields);
593
+ const data = makeData3({
594
+ type: structType,
382
595
  length: 0,
383
- children: [],
596
+ children,
384
597
  nullCount: 0
385
598
  });
386
- return new RecordBatch(schema, data2, metadata);
599
+ return new RecordBatch3(schema, data);
387
600
  }
388
- const structType = new Struct(schema.fields);
389
- const data = makeData({
390
- type: structType,
391
- length: 0,
392
- children,
393
- nullCount: 0
394
- });
395
- return new RecordBatch(schema, data, metadata);
396
- }
397
-
398
- // src/dispatch/describe.ts
399
- import {
400
- Schema as Schema2,
401
- Field as Field2,
402
- RecordBatch as RecordBatch2,
403
- Utf8,
404
- Bool,
405
- Binary,
406
- vectorFromArray as vectorFromArray2,
407
- makeData as makeData2,
408
- Struct as Struct2
409
- } from "@query-farm/apache-arrow";
410
-
411
- // src/util/schema.ts
412
- import { RecordBatchStreamWriter as RecordBatchStreamWriter2 } from "@query-farm/apache-arrow";
413
- function serializeSchema(schema) {
414
- const writer = new RecordBatchStreamWriter2;
415
- writer.reset(undefined, schema);
416
- writer.close();
417
- return writer.toUint8Array(true);
418
- }
419
-
420
- // src/dispatch/describe.ts
421
- var DESCRIBE_SCHEMA = new Schema2([
422
- new Field2("name", new Utf8, false),
423
- new Field2("method_type", new Utf8, false),
424
- new Field2("doc", new Utf8, true),
425
- new Field2("has_return", new Bool, false),
426
- new Field2("params_schema_ipc", new Binary, false),
427
- new Field2("result_schema_ipc", new Binary, false),
428
- new Field2("param_types_json", new Utf8, true),
429
- new Field2("param_defaults_json", new Utf8, true),
430
- new Field2("has_header", new Bool, false),
431
- new Field2("header_schema_ipc", new Binary, true)
432
- ]);
433
- function buildDescribeBatch(protocolName, methods, serverId) {
434
- const sortedEntries = [...methods.entries()].sort(([a], [b]) => a.localeCompare(b));
435
- const names = [];
436
- const methodTypes = [];
437
- const docs = [];
438
- const hasReturns = [];
439
- const paramsSchemas = [];
440
- const resultSchemas = [];
441
- const paramTypesJsons = [];
442
- const paramDefaultsJsons = [];
443
- const hasHeaders = [];
444
- const headerSchemas = [];
445
- for (const [name, method] of sortedEntries) {
446
- names.push(name);
447
- methodTypes.push(method.type);
448
- docs.push(method.doc ?? null);
449
- const hasReturn = method.type === "unary" && method.resultSchema.fields.length > 0;
450
- hasReturns.push(hasReturn);
451
- paramsSchemas.push(serializeSchema(method.paramsSchema));
452
- resultSchemas.push(serializeSchema(method.resultSchema));
453
- if (method.paramTypes && Object.keys(method.paramTypes).length > 0) {
454
- paramTypesJsons.push(JSON.stringify(method.paramTypes));
455
- } else {
456
- paramTypesJsons.push(null);
601
+ async* [Symbol.asyncIterator]() {
602
+ for (const batch of this._pendingBatches) {
603
+ if (batch.numRows === 0) {
604
+ dispatchLogOrError(batch, this._onLog);
605
+ continue;
606
+ }
607
+ yield extractBatchRows(batch);
457
608
  }
458
- if (method.defaults && Object.keys(method.defaults).length > 0) {
459
- const safe = {};
460
- for (const [k, v] of Object.entries(method.defaults)) {
461
- if (v === null || typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
462
- safe[k] = v;
609
+ this._pendingBatches = [];
610
+ if (this._finished)
611
+ return;
612
+ if (this._stateToken === null)
613
+ return;
614
+ while (true) {
615
+ const responseBody = await this._sendContinuation(this._stateToken);
616
+ const { batches } = await readResponseBatches(responseBody);
617
+ let gotContinuation = false;
618
+ for (const batch of batches) {
619
+ if (batch.numRows === 0) {
620
+ const token = batch.metadata?.get(STATE_KEY);
621
+ if (token) {
622
+ this._stateToken = token;
623
+ gotContinuation = true;
624
+ continue;
625
+ }
626
+ dispatchLogOrError(batch, this._onLog);
627
+ continue;
463
628
  }
629
+ yield extractBatchRows(batch);
464
630
  }
465
- paramDefaultsJsons.push(Object.keys(safe).length > 0 ? JSON.stringify(safe) : null);
466
- } else {
467
- paramDefaultsJsons.push(null);
631
+ if (!gotContinuation)
632
+ break;
468
633
  }
469
- hasHeaders.push(!!method.headerSchema);
470
- headerSchemas.push(method.headerSchema ? serializeSchema(method.headerSchema) : null);
471
634
  }
472
- const nameArr = vectorFromArray2(names, new Utf8);
473
- const methodTypeArr = vectorFromArray2(methodTypes, new Utf8);
474
- const docArr = vectorFromArray2(docs, new Utf8);
475
- const hasReturnArr = vectorFromArray2(hasReturns, new Bool);
476
- const paramsSchemaArr = vectorFromArray2(paramsSchemas, new Binary);
477
- const resultSchemaArr = vectorFromArray2(resultSchemas, new Binary);
478
- const paramTypesArr = vectorFromArray2(paramTypesJsons, new Utf8);
479
- const paramDefaultsArr = vectorFromArray2(paramDefaultsJsons, new Utf8);
480
- const hasHeaderArr = vectorFromArray2(hasHeaders, new Bool);
481
- const headerSchemaArr = vectorFromArray2(headerSchemas, new Binary);
482
- const children = [
483
- nameArr.data[0],
484
- methodTypeArr.data[0],
485
- docArr.data[0],
486
- hasReturnArr.data[0],
487
- paramsSchemaArr.data[0],
488
- resultSchemaArr.data[0],
489
- paramTypesArr.data[0],
490
- paramDefaultsArr.data[0],
491
- hasHeaderArr.data[0],
492
- headerSchemaArr.data[0]
493
- ];
494
- const structType = new Struct2(DESCRIBE_SCHEMA.fields);
495
- const data = makeData2({
496
- type: structType,
497
- length: sortedEntries.length,
498
- children,
499
- nullCount: 0
500
- });
501
- const metadata = new Map;
502
- metadata.set(PROTOCOL_NAME_KEY, protocolName);
503
- metadata.set(REQUEST_VERSION_KEY, REQUEST_VERSION);
504
- metadata.set(DESCRIBE_VERSION_KEY, DESCRIBE_VERSION);
505
- metadata.set(SERVER_ID_KEY, serverId);
506
- const batch = new RecordBatch2(DESCRIBE_SCHEMA, data, metadata);
507
- return { batch, metadata };
635
+ async _sendContinuation(token) {
636
+ const emptySchema = new Schema([]);
637
+ const metadata = new Map;
638
+ metadata.set(STATE_KEY, token);
639
+ const structType = new Struct3(emptySchema.fields);
640
+ const data = makeData3({
641
+ type: structType,
642
+ length: 1,
643
+ children: [],
644
+ nullCount: 0
645
+ });
646
+ const batch = new RecordBatch3(emptySchema, data, metadata);
647
+ const body = serializeIpcStream(emptySchema, [batch]);
648
+ const resp = await fetch(`${this._baseUrl}${this._prefix}/${this._method}/exchange`, {
649
+ method: "POST",
650
+ headers: this._buildHeaders(),
651
+ body: this._prepareBody(body)
652
+ });
653
+ return this._readResponse(resp);
654
+ }
655
+ close() {}
508
656
  }
509
657
 
510
- // src/types.ts
511
- import { RecordBatch as RecordBatch3, recordBatchFromArrays } from "@query-farm/apache-arrow";
512
- var MethodType;
513
- ((MethodType2) => {
514
- MethodType2["UNARY"] = "unary";
515
- MethodType2["STREAM"] = "stream";
516
- })(MethodType ||= {});
517
-
518
- class OutputCollector {
519
- _batches = [];
520
- _dataBatchIdx = null;
521
- _finished = false;
522
- _producerMode;
523
- _outputSchema;
524
- _serverId;
525
- _requestId;
526
- constructor(outputSchema, producerMode = true, serverId = "", requestId = null) {
527
- this._outputSchema = outputSchema;
528
- this._producerMode = producerMode;
529
- this._serverId = serverId;
530
- this._requestId = requestId;
531
- }
532
- get outputSchema() {
533
- return this._outputSchema;
534
- }
535
- get finished() {
536
- return this._finished;
537
- }
538
- get batches() {
539
- return this._batches;
540
- }
541
- emit(batchOrColumns, metadata) {
542
- let batch;
543
- if (batchOrColumns instanceof RecordBatch3) {
544
- batch = batchOrColumns;
545
- } else {
546
- const coerced = coerceInt64(this._outputSchema, batchOrColumns);
547
- batch = recordBatchFromArrays(coerced, this._outputSchema);
548
- }
549
- if (this._dataBatchIdx !== null) {
550
- throw new Error("Only one data batch may be emitted per call");
551
- }
552
- this._dataBatchIdx = this._batches.length;
553
- this._batches.push({ batch, metadata });
554
- }
555
- emitRow(values) {
556
- const columns = {};
557
- for (const [key, value] of Object.entries(values)) {
558
- columns[key] = [value];
559
- }
560
- this.emit(columns);
658
+ // src/client/connect.ts
659
+ function httpConnect(baseUrl, options) {
660
+ const prefix = (options?.prefix ?? "/vgi").replace(/\/+$/, "");
661
+ const onLog = options?.onLog;
662
+ const compressionLevel = options?.compressionLevel;
663
+ let methodCache = null;
664
+ let compressFn;
665
+ let decompressFn;
666
+ let compressionLoaded = false;
667
+ async function ensureCompression() {
668
+ if (compressionLoaded || compressionLevel == null)
669
+ return;
670
+ compressionLoaded = true;
671
+ try {
672
+ const mod = await Promise.resolve().then(() => (init_zstd(), exports_zstd));
673
+ compressFn = mod.zstdCompress;
674
+ decompressFn = mod.zstdDecompress;
675
+ } catch {}
561
676
  }
562
- finish() {
563
- if (!this._producerMode) {
564
- throw new Error("finish() is not allowed on exchange streams; " + "exchange streams must emit exactly one data batch per call");
677
+ function buildHeaders() {
678
+ const headers = {
679
+ "Content-Type": ARROW_CONTENT_TYPE
680
+ };
681
+ if (compressionLevel != null) {
682
+ headers["Content-Encoding"] = "zstd";
683
+ headers["Accept-Encoding"] = "zstd";
565
684
  }
566
- this._finished = true;
567
- }
568
- clientLog(level, message, extra) {
569
- const batch = buildLogBatch(this._outputSchema, level, message, extra, this._serverId, this._requestId);
570
- this._batches.push({ batch });
571
- }
572
- }
573
-
574
- // src/dispatch/unary.ts
575
- async function dispatchUnary(method, params, writer, serverId, requestId) {
576
- const schema = method.resultSchema;
577
- const out = new OutputCollector(schema, true, serverId, requestId);
578
- try {
579
- const result = await method.handler(params, out);
580
- const resultBatch = buildResultBatch(schema, result, serverId, requestId);
581
- const batches = [...out.batches.map((b) => b.batch), resultBatch];
582
- writer.writeStream(schema, batches);
583
- } catch (error) {
584
- const batch = buildErrorBatch(schema, error, serverId, requestId);
585
- writer.writeStream(schema, [batch]);
685
+ return headers;
586
686
  }
587
- }
588
-
589
- // src/dispatch/stream.ts
590
- import { Schema as Schema3 } from "@query-farm/apache-arrow";
591
- var EMPTY_SCHEMA = new Schema3([]);
592
- async function dispatchStream(method, params, writer, reader, serverId, requestId) {
593
- const isProducer = !!method.producerFn;
594
- let state;
595
- try {
596
- if (isProducer) {
597
- state = await method.producerInit(params);
598
- } else {
599
- state = await method.exchangeInit(params);
600
- }
601
- } catch (error) {
602
- const errSchema = method.headerSchema ?? EMPTY_SCHEMA;
603
- const errBatch = buildErrorBatch(errSchema, error, serverId, requestId);
604
- writer.writeStream(errSchema, [errBatch]);
605
- const inputSchema2 = await reader.openNextStream();
606
- if (inputSchema2) {
607
- while (await reader.readNextBatch() !== null) {}
687
+ function prepareBody(content) {
688
+ if (compressionLevel != null && compressFn) {
689
+ return compressFn(content, compressionLevel);
608
690
  }
609
- return;
691
+ return content;
610
692
  }
611
- const outputSchema = state?.__outputSchema ?? method.outputSchema;
612
- const effectiveProducer = state?.__isProducer ?? isProducer;
613
- if (method.headerSchema && method.headerInit) {
614
- try {
615
- const headerOut = new OutputCollector(method.headerSchema, true, serverId, requestId);
616
- const headerValues = method.headerInit(params, state, headerOut);
617
- const headerBatch = buildResultBatch(method.headerSchema, headerValues, serverId, requestId);
618
- const headerBatches = [
619
- ...headerOut.batches.map((b) => b.batch),
620
- headerBatch
621
- ];
622
- writer.writeStream(method.headerSchema, headerBatches);
623
- } catch (error) {
624
- const errBatch = buildErrorBatch(method.headerSchema, error, serverId, requestId);
625
- writer.writeStream(method.headerSchema, [errBatch]);
626
- const inputSchema2 = await reader.openNextStream();
627
- if (inputSchema2) {
628
- while (await reader.readNextBatch() !== null) {}
629
- }
630
- return;
693
+ async function readResponse(resp) {
694
+ let body = new Uint8Array(await resp.arrayBuffer());
695
+ if (resp.headers.get("Content-Encoding") === "zstd" && decompressFn) {
696
+ body = new Uint8Array(decompressFn(body));
631
697
  }
698
+ return body;
632
699
  }
633
- const inputSchema = await reader.openNextStream();
634
- if (!inputSchema) {
635
- const errBatch = buildErrorBatch(outputSchema, new Error("Expected input stream but got EOF"), serverId, requestId);
636
- writer.writeStream(outputSchema, [errBatch]);
637
- return;
700
+ async function ensureMethodCache() {
701
+ if (methodCache)
702
+ return methodCache;
703
+ const desc = await httpIntrospect(baseUrl, { prefix });
704
+ methodCache = new Map(desc.methods.map((m) => [m.name, m]));
705
+ return methodCache;
638
706
  }
639
- const stream = writer.openStream(outputSchema);
640
- try {
641
- while (true) {
642
- const inputBatch = await reader.readNextBatch();
643
- if (!inputBatch)
644
- break;
645
- const out = new OutputCollector(outputSchema, effectiveProducer, serverId, requestId);
646
- if (isProducer) {
647
- await method.producerFn(state, out);
648
- } else {
649
- await method.exchangeFn(state, inputBatch, out);
650
- }
651
- for (const emitted of out.batches) {
652
- stream.write(emitted.batch);
653
- }
654
- if (out.finished) {
655
- break;
707
+ return {
708
+ async call(method, params) {
709
+ await ensureCompression();
710
+ const methods = await ensureMethodCache();
711
+ const info = methods.get(method);
712
+ if (!info) {
713
+ throw new Error(`Unknown method: '${method}'`);
656
714
  }
657
- }
658
- } catch (error) {
659
- stream.write(buildErrorBatch(outputSchema, error, serverId, requestId));
660
- }
661
- stream.close();
662
- try {
663
- while (await reader.readNextBatch() !== null) {}
664
- } catch {}
665
- }
666
-
667
- // src/server.ts
668
- var EMPTY_SCHEMA2 = new Schema4([]);
669
-
670
- class VgiRpcServer {
671
- protocol;
672
- enableDescribe;
673
- serverId;
674
- describeBatch = null;
675
- constructor(protocol, options) {
676
- this.protocol = protocol;
677
- this.enableDescribe = options?.enableDescribe ?? true;
678
- this.serverId = options?.serverId ?? crypto.randomUUID().replace(/-/g, "").slice(0, 12);
679
- if (this.enableDescribe) {
680
- const { batch } = buildDescribeBatch(protocol.name, protocol.getMethods(), this.serverId);
681
- this.describeBatch = batch;
682
- }
683
- }
684
- async run() {
685
- const stdin = process.stdin;
686
- if (process.stdin.isTTY || process.stdout.isTTY) {
687
- process.stderr.write("WARNING: This process communicates via Arrow IPC on stdin/stdout " + `and is not intended to be run interactively.
688
- ` + "It should be launched as a subprocess by an RPC client " + `(e.g. vgi_rpc.connect()).
689
- `);
690
- }
691
- const reader = await IpcStreamReader.create(stdin);
692
- const writer = new IpcStreamWriter;
693
- try {
694
- while (true) {
695
- await this.serveOne(reader, writer);
715
+ const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
716
+ const body = buildRequestIpc(info.paramsSchema, fullParams, method);
717
+ const resp = await fetch(`${baseUrl}${prefix}/${method}`, {
718
+ method: "POST",
719
+ headers: buildHeaders(),
720
+ body: prepareBody(body)
721
+ });
722
+ const responseBody = await readResponse(resp);
723
+ const { batches } = await readResponseBatches(responseBody);
724
+ let resultBatch = null;
725
+ for (const batch of batches) {
726
+ if (batch.numRows === 0) {
727
+ dispatchLogOrError(batch, onLog);
728
+ continue;
729
+ }
730
+ resultBatch = batch;
696
731
  }
697
- } catch (e) {
698
- if (e.message?.includes("closed") || e.message?.includes("Expected Schema Message") || e.message?.includes("null or length 0") || e.code === "EPIPE" || e.code === "ERR_STREAM_PREMATURE_CLOSE" || e.code === "ERR_STREAM_DESTROYED" || e instanceof Error && e.message.includes("EOF")) {
699
- return;
732
+ if (!resultBatch) {
733
+ return null;
700
734
  }
701
- throw e;
702
- } finally {
703
- await reader.cancel();
704
- }
705
- }
706
- async serveOne(reader, writer) {
707
- const stream = await reader.readStream();
708
- if (!stream) {
709
- throw new Error("EOF");
710
- }
711
- const { schema, batches } = stream;
712
- if (batches.length === 0) {
713
- const err = new RpcError("ProtocolError", "Request stream contains no batches", "");
714
- const errBatch = buildErrorBatch(EMPTY_SCHEMA2, err, this.serverId, null);
715
- writer.writeStream(EMPTY_SCHEMA2, [errBatch]);
716
- return;
717
- }
718
- const batch = batches[0];
719
- let methodName;
720
- let params;
721
- let requestId;
722
- try {
723
- const parsed = parseRequest(schema, batch);
724
- methodName = parsed.methodName;
725
- params = parsed.params;
726
- requestId = parsed.requestId;
727
- } catch (e) {
728
- const errBatch = buildErrorBatch(EMPTY_SCHEMA2, e, this.serverId, null);
729
- writer.writeStream(EMPTY_SCHEMA2, [errBatch]);
730
- if (e instanceof VersionError || e instanceof RpcError) {
731
- return;
735
+ const rows = extractBatchRows(resultBatch);
736
+ if (rows.length === 0)
737
+ return null;
738
+ const result = rows[0];
739
+ if (info.resultSchema.fields.length === 0)
740
+ return null;
741
+ return result;
742
+ },
743
+ async stream(method, params) {
744
+ await ensureCompression();
745
+ const methods = await ensureMethodCache();
746
+ const info = methods.get(method);
747
+ if (!info) {
748
+ throw new Error(`Unknown method: '${method}'`);
732
749
  }
733
- throw e;
734
- }
735
- if (methodName === DESCRIBE_METHOD_NAME && this.describeBatch) {
736
- writer.writeStream(this.describeBatch.schema, [this.describeBatch]);
737
- return;
738
- }
739
- const methods = this.protocol.getMethods();
740
- const method = methods.get(methodName);
741
- if (!method) {
742
- const available = [...methods.keys()].sort();
743
- const err = new Error(`Unknown method: '${methodName}'. Available methods: [${available.join(", ")}]`);
744
- const errBatch = buildErrorBatch(EMPTY_SCHEMA2, err, this.serverId, requestId);
745
- writer.writeStream(EMPTY_SCHEMA2, [errBatch]);
746
- return;
747
- }
748
- if (method.type === "unary" /* UNARY */) {
749
- await dispatchUnary(method, params, writer, this.serverId, requestId);
750
- } else {
751
- await dispatchStream(method, params, writer, reader, this.serverId, requestId);
752
- }
753
- }
754
- }
755
- // src/protocol.ts
756
- import { Schema as Schema6 } from "@query-farm/apache-arrow";
757
-
758
- // src/schema.ts
759
- import {
760
- Schema as Schema5,
761
- Field as Field3,
762
- DataType as DataType3,
763
- Utf8 as Utf82,
764
- Binary as Binary2,
765
- Int64,
766
- Int32,
767
- Int16,
768
- Float64,
769
- Float32,
770
- Bool as Bool2
771
- } from "@query-farm/apache-arrow";
772
- var str = new Utf82;
773
- var bytes = new Binary2;
774
- var int = new Int64;
775
- var int32 = new Int32;
776
- var float = new Float64;
777
- var float32 = new Float32;
778
- var bool = new Bool2;
779
- function toSchema(spec) {
780
- if (spec instanceof Schema5)
781
- return spec;
782
- const fields = [];
783
- for (const [name, value] of Object.entries(spec)) {
784
- if (value instanceof Field3) {
785
- fields.push(value);
786
- } else if (value instanceof DataType3) {
787
- fields.push(new Field3(name, value, false));
788
- } else {
789
- throw new TypeError(`Invalid schema value for "${name}": expected DataType or Field, got ${typeof value}`);
790
- }
791
- }
792
- return new Schema5(fields);
793
- }
794
- var TYPE_MAP = [
795
- [Utf82, "str"],
796
- [Binary2, "bytes"],
797
- [Bool2, "bool"],
798
- [Float64, "float"],
799
- [Float32, "float"],
800
- [Int64, "int"],
801
- [Int32, "int"],
802
- [Int16, "int"]
803
- ];
804
- function inferParamTypes(spec) {
805
- const schema = toSchema(spec);
806
- if (schema.fields.length === 0)
807
- return;
808
- const result = {};
809
- for (const field of schema.fields) {
810
- let mapped;
811
- for (const [ctor, name] of TYPE_MAP) {
812
- if (field.type instanceof ctor) {
813
- mapped = name;
814
- break;
750
+ const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
751
+ const body = buildRequestIpc(info.paramsSchema, fullParams, method);
752
+ const resp = await fetch(`${baseUrl}${prefix}/${method}/init`, {
753
+ method: "POST",
754
+ headers: buildHeaders(),
755
+ body: prepareBody(body)
756
+ });
757
+ const responseBody = await readResponse(resp);
758
+ let header = null;
759
+ let stateToken = null;
760
+ const pendingBatches = [];
761
+ let finished = false;
762
+ let streamSchema = null;
763
+ if (info.headerSchema) {
764
+ const reader = await readSequentialStreams(responseBody);
765
+ const headerStream = await reader.readStream();
766
+ if (headerStream) {
767
+ for (const batch of headerStream.batches) {
768
+ if (batch.numRows === 0) {
769
+ dispatchLogOrError(batch, onLog);
770
+ continue;
771
+ }
772
+ const rows = extractBatchRows(batch);
773
+ if (rows.length > 0) {
774
+ header = rows[0];
775
+ }
776
+ }
777
+ }
778
+ const dataStream = await reader.readStream();
779
+ if (dataStream) {
780
+ streamSchema = dataStream.schema;
781
+ }
782
+ const headerErrorBatches = [];
783
+ if (dataStream) {
784
+ for (const batch of dataStream.batches) {
785
+ if (batch.numRows === 0) {
786
+ const token = batch.metadata?.get(STATE_KEY);
787
+ if (token) {
788
+ stateToken = token;
789
+ continue;
790
+ }
791
+ const level = batch.metadata?.get(LOG_LEVEL_KEY);
792
+ if (level === "EXCEPTION") {
793
+ headerErrorBatches.push(batch);
794
+ continue;
795
+ }
796
+ dispatchLogOrError(batch, onLog);
797
+ continue;
798
+ }
799
+ pendingBatches.push(batch);
800
+ }
801
+ }
802
+ if (headerErrorBatches.length > 0) {
803
+ if (pendingBatches.length > 0 || stateToken !== null) {
804
+ pendingBatches.push(...headerErrorBatches);
805
+ } else {
806
+ for (const batch of headerErrorBatches) {
807
+ dispatchLogOrError(batch, onLog);
808
+ }
809
+ }
810
+ }
811
+ if (!dataStream && !stateToken) {
812
+ finished = true;
813
+ }
814
+ } else {
815
+ const { schema: responseSchema, batches } = await readResponseBatches(responseBody);
816
+ streamSchema = responseSchema;
817
+ const errorBatches = [];
818
+ for (const batch of batches) {
819
+ if (batch.numRows === 0) {
820
+ const token = batch.metadata?.get(STATE_KEY);
821
+ if (token) {
822
+ stateToken = token;
823
+ continue;
824
+ }
825
+ const level = batch.metadata?.get(LOG_LEVEL_KEY);
826
+ if (level === "EXCEPTION") {
827
+ errorBatches.push(batch);
828
+ continue;
829
+ }
830
+ dispatchLogOrError(batch, onLog);
831
+ continue;
832
+ }
833
+ pendingBatches.push(batch);
834
+ }
835
+ if (errorBatches.length > 0) {
836
+ if (pendingBatches.length > 0 || stateToken !== null) {
837
+ pendingBatches.push(...errorBatches);
838
+ } else {
839
+ for (const batch of errorBatches) {
840
+ dispatchLogOrError(batch, onLog);
841
+ }
842
+ }
843
+ }
815
844
  }
816
- }
817
- if (!mapped)
818
- return;
819
- result[field.name] = mapped;
820
- }
821
- return result;
822
- }
823
-
824
- // src/protocol.ts
825
- var EMPTY_SCHEMA3 = new Schema6([]);
826
-
827
- class Protocol {
828
- name;
829
- _methods = new Map;
830
- constructor(name) {
831
- this.name = name;
832
- }
833
- unary(name, config) {
834
- const params = toSchema(config.params);
835
- this._methods.set(name, {
836
- name,
837
- type: "unary" /* UNARY */,
838
- paramsSchema: params,
839
- resultSchema: toSchema(config.result),
840
- handler: config.handler,
841
- doc: config.doc,
842
- defaults: config.defaults,
843
- paramTypes: config.paramTypes ?? inferParamTypes(params)
844
- });
845
- return this;
846
- }
847
- producer(name, config) {
848
- const params = toSchema(config.params);
849
- this._methods.set(name, {
850
- name,
851
- type: "stream" /* STREAM */,
852
- paramsSchema: params,
853
- resultSchema: EMPTY_SCHEMA3,
854
- outputSchema: toSchema(config.outputSchema),
855
- inputSchema: EMPTY_SCHEMA3,
856
- producerInit: config.init,
857
- producerFn: config.produce,
858
- headerSchema: config.headerSchema ? toSchema(config.headerSchema) : undefined,
859
- headerInit: config.headerInit,
860
- doc: config.doc,
861
- defaults: config.defaults,
862
- paramTypes: config.paramTypes ?? inferParamTypes(params)
863
- });
864
- return this;
865
- }
866
- exchange(name, config) {
867
- const params = toSchema(config.params);
868
- this._methods.set(name, {
869
- name,
870
- type: "stream" /* STREAM */,
871
- paramsSchema: params,
872
- resultSchema: EMPTY_SCHEMA3,
873
- inputSchema: toSchema(config.inputSchema),
874
- outputSchema: toSchema(config.outputSchema),
875
- exchangeInit: config.init,
876
- exchangeFn: config.exchange,
877
- headerSchema: config.headerSchema ? toSchema(config.headerSchema) : undefined,
878
- headerInit: config.headerInit,
879
- doc: config.doc,
880
- defaults: config.defaults,
881
- paramTypes: config.paramTypes ?? inferParamTypes(params)
882
- });
883
- return this;
884
- }
885
- getMethods() {
886
- return new Map(this._methods);
887
- }
845
+ if (pendingBatches.length === 0 && stateToken === null) {
846
+ finished = true;
847
+ }
848
+ const outputSchema = (streamSchema && streamSchema.fields.length > 0 ? streamSchema : null) ?? (pendingBatches.length > 0 ? pendingBatches[0].schema : null) ?? info.outputSchema ?? info.resultSchema;
849
+ return new HttpStreamSession({
850
+ baseUrl,
851
+ prefix,
852
+ method,
853
+ stateToken,
854
+ outputSchema,
855
+ inputSchema: info.inputSchema,
856
+ onLog,
857
+ pendingBatches,
858
+ finished,
859
+ header,
860
+ compressionLevel,
861
+ compressFn,
862
+ decompressFn
863
+ });
864
+ },
865
+ async describe() {
866
+ return httpIntrospect(baseUrl, { prefix });
867
+ },
868
+ close() {}
869
+ };
888
870
  }
889
- // src/http/handler.ts
890
- import { Schema as Schema9 } from "@query-farm/apache-arrow";
891
- import { randomBytes } from "node:crypto";
892
-
893
- // src/http/types.ts
894
- var jsonStateSerializer = {
895
- serialize(state) {
896
- return new TextEncoder().encode(JSON.stringify(state, (_key, value) => typeof value === "bigint" ? `__bigint__:${value}` : value));
897
- },
898
- deserialize(bytes2) {
899
- return JSON.parse(new TextDecoder().decode(bytes2), (_key, value) => typeof value === "string" && value.startsWith("__bigint__:") ? BigInt(value.slice(11)) : value);
900
- }
901
- };
902
-
903
- // src/http/common.ts
871
+ // src/client/pipe.ts
904
872
  import {
905
- RecordBatchStreamWriter as RecordBatchStreamWriter3,
906
- RecordBatchReader as RecordBatchReader2,
873
+ Field as Field2,
874
+ makeData as makeData4,
907
875
  RecordBatch as RecordBatch4,
908
- Struct as Struct3,
909
- makeData as makeData3
876
+ RecordBatchStreamWriter as RecordBatchStreamWriter2,
877
+ Schema as Schema2,
878
+ Struct as Struct4,
879
+ vectorFromArray as vectorFromArray3
910
880
  } from "@query-farm/apache-arrow";
911
- var ARROW_CONTENT_TYPE = "application/vnd.apache.arrow.stream";
912
-
913
- class HttpRpcError extends Error {
914
- statusCode;
915
- constructor(message, statusCode) {
916
- super(message);
917
- this.statusCode = statusCode;
918
- this.name = "HttpRpcError";
919
- }
920
- }
921
- function conformBatchToSchema(batch, schema) {
922
- if (batch.numRows === 0)
923
- return batch;
924
- const children = schema.fields.map((f, i) => batch.data.children[i].clone(f.type));
925
- const structType = new Struct3(schema.fields);
926
- const data = makeData3({
927
- type: structType,
928
- length: batch.numRows,
929
- children,
930
- nullCount: batch.data.nullCount,
931
- nullBitmap: batch.data.nullBitmap
932
- });
933
- return new RecordBatch4(schema, data, batch.metadata);
934
- }
935
- function serializeIpcStream(schema, batches) {
936
- const writer = new RecordBatchStreamWriter3;
937
- writer.reset(undefined, schema);
938
- for (const batch of batches) {
939
- writer.write(conformBatchToSchema(batch, schema));
940
- }
941
- writer.close();
942
- return writer.toUint8Array(true);
943
- }
944
- function arrowResponse(body, status = 200, extraHeaders) {
945
- const headers = extraHeaders ?? new Headers;
946
- headers.set("Content-Type", ARROW_CONTENT_TYPE);
947
- return new Response(body, { status, headers });
948
- }
949
- async function readRequestFromBody(body) {
950
- const reader = await RecordBatchReader2.from(body);
951
- await reader.open();
952
- const schema = reader.schema;
953
- if (!schema) {
954
- throw new HttpRpcError("Empty IPC stream: no schema", 400);
955
- }
956
- const batches = reader.readAll();
957
- if (batches.length === 0) {
958
- throw new HttpRpcError("IPC stream contains no batches", 400);
959
- }
960
- return { schema, batch: batches[0] };
961
- }
962
-
963
- // src/http/dispatch.ts
964
- import { Schema as Schema8, RecordBatch as RecordBatch5, RecordBatchReader as RecordBatchReader3 } from "@query-farm/apache-arrow";
965
-
966
- // src/http/token.ts
967
- import { createHmac, timingSafeEqual } from "node:crypto";
968
- var TOKEN_VERSION = 2;
969
- var HMAC_LEN = 32;
970
- var MIN_TOKEN_LEN = 1 + 8 + 12 + HMAC_LEN;
971
- function packStateToken(stateBytes, schemaBytes, inputSchemaBytes, signingKey, createdAt) {
972
- const now = createdAt ?? Math.floor(Date.now() / 1000);
973
- const payloadLen = 1 + 8 + 4 + stateBytes.length + 4 + schemaBytes.length + 4 + inputSchemaBytes.length;
974
- const buf = Buffer.alloc(payloadLen);
975
- let offset = 0;
976
- buf.writeUInt8(TOKEN_VERSION, offset);
977
- offset += 1;
978
- buf.writeBigUInt64LE(BigInt(now), offset);
979
- offset += 8;
980
- buf.writeUInt32LE(stateBytes.length, offset);
981
- offset += 4;
982
- buf.set(stateBytes, offset);
983
- offset += stateBytes.length;
984
- buf.writeUInt32LE(schemaBytes.length, offset);
985
- offset += 4;
986
- buf.set(schemaBytes, offset);
987
- offset += schemaBytes.length;
988
- buf.writeUInt32LE(inputSchemaBytes.length, offset);
989
- offset += 4;
990
- buf.set(inputSchemaBytes, offset);
991
- offset += inputSchemaBytes.length;
992
- const mac = createHmac("sha256", signingKey).update(buf).digest();
993
- const token = Buffer.concat([buf, mac]);
994
- return token.toString("base64");
995
- }
996
- function unpackStateToken(tokenBase64, signingKey, tokenTtl) {
997
- const token = Buffer.from(tokenBase64, "base64");
998
- if (token.length < MIN_TOKEN_LEN) {
999
- throw new Error("State token too short");
1000
- }
1001
- const payload = token.subarray(0, token.length - HMAC_LEN);
1002
- const receivedMac = token.subarray(token.length - HMAC_LEN);
1003
- const expectedMac = createHmac("sha256", signingKey).update(payload).digest();
1004
- if (!timingSafeEqual(receivedMac, expectedMac)) {
1005
- throw new Error("State token HMAC verification failed");
1006
- }
1007
- let offset = 0;
1008
- const version = payload.readUInt8(offset);
1009
- offset += 1;
1010
- if (version !== TOKEN_VERSION) {
1011
- throw new Error(`Unsupported state token version: ${version}`);
1012
- }
1013
- const createdAt = Number(payload.readBigUInt64LE(offset));
1014
- offset += 8;
1015
- if (tokenTtl > 0) {
1016
- const now = Math.floor(Date.now() / 1000);
1017
- if (now - createdAt > tokenTtl) {
1018
- throw new Error("State token expired");
1019
- }
881
+ class PipeIncrementalWriter {
882
+ writer;
883
+ writeFn;
884
+ closed = false;
885
+ constructor(writeFn, schema) {
886
+ this.writeFn = writeFn;
887
+ this.writer = new RecordBatchStreamWriter2;
888
+ this.writer.reset(undefined, schema);
889
+ this.drain();
1020
890
  }
1021
- const stateLen = payload.readUInt32LE(offset);
1022
- offset += 4;
1023
- if (offset + stateLen > payload.length) {
1024
- throw new Error("State token truncated (state)");
891
+ write(batch) {
892
+ if (this.closed)
893
+ throw new Error("PipeIncrementalWriter already closed");
894
+ this.writer._writeRecordBatch(batch);
895
+ this.drain();
1025
896
  }
1026
- const stateBytes = payload.slice(offset, offset + stateLen);
1027
- offset += stateLen;
1028
- const schemaLen = payload.readUInt32LE(offset);
1029
- offset += 4;
1030
- if (offset + schemaLen > payload.length) {
1031
- throw new Error("State token truncated (schema)");
897
+ close() {
898
+ if (this.closed)
899
+ return;
900
+ this.closed = true;
901
+ const eos = new Uint8Array(new Int32Array([-1, 0]).buffer);
902
+ this.writeFn(eos);
1032
903
  }
1033
- const schemaBytes = payload.slice(offset, offset + schemaLen);
1034
- offset += schemaLen;
1035
- const inputSchemaLen = payload.readUInt32LE(offset);
1036
- offset += 4;
1037
- if (offset + inputSchemaLen > payload.length) {
1038
- throw new Error("State token truncated (input schema)");
904
+ drain() {
905
+ const values = this.writer._sink._values;
906
+ for (const chunk of values) {
907
+ this.writeFn(chunk);
908
+ }
909
+ values.length = 0;
1039
910
  }
1040
- const inputSchemaBytes = payload.slice(offset, offset + inputSchemaLen);
1041
- return { stateBytes, schemaBytes, inputSchemaBytes, createdAt };
1042
911
  }
1043
912
 
1044
- // src/http/dispatch.ts
1045
- async function deserializeSchema(bytes2) {
1046
- const reader = await RecordBatchReader3.from(bytes2);
1047
- await reader.open();
1048
- return reader.schema;
1049
- }
1050
- var EMPTY_SCHEMA4 = new Schema8([]);
1051
- function httpDispatchDescribe(protocolName, methods, serverId) {
1052
- const { batch } = buildDescribeBatch(protocolName, methods, serverId);
1053
- const body = serializeIpcStream(DESCRIBE_SCHEMA, [batch]);
1054
- return arrowResponse(body);
1055
- }
1056
- async function httpDispatchUnary(method, body, ctx) {
1057
- const schema = method.resultSchema;
1058
- const { schema: reqSchema, batch: reqBatch } = await readRequestFromBody(body);
1059
- const parsed = parseRequest(reqSchema, reqBatch);
1060
- if (parsed.methodName !== method.name) {
1061
- throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
913
+ class PipeStreamSession {
914
+ _reader;
915
+ _writeFn;
916
+ _onLog;
917
+ _header;
918
+ _inputWriter = null;
919
+ _inputSchema = null;
920
+ _outputStreamOpened = false;
921
+ _closed = false;
922
+ _outputSchema;
923
+ _releaseBusy;
924
+ _setDrainPromise;
925
+ constructor(opts) {
926
+ this._reader = opts.reader;
927
+ this._writeFn = opts.writeFn;
928
+ this._onLog = opts.onLog;
929
+ this._header = opts.header;
930
+ this._outputSchema = opts.outputSchema;
931
+ this._releaseBusy = opts.releaseBusy;
932
+ this._setDrainPromise = opts.setDrainPromise;
1062
933
  }
1063
- const out = new OutputCollector(schema, true, ctx.serverId, parsed.requestId);
1064
- try {
1065
- const result = await method.handler(parsed.params, out);
1066
- const resultBatch = buildResultBatch(schema, result, ctx.serverId, parsed.requestId);
1067
- const batches = [...out.batches.map((b) => b.batch), resultBatch];
1068
- return arrowResponse(serializeIpcStream(schema, batches));
1069
- } catch (error) {
1070
- const errBatch = buildErrorBatch(schema, error, ctx.serverId, parsed.requestId);
1071
- return arrowResponse(serializeIpcStream(schema, [errBatch]), 500);
934
+ get header() {
935
+ return this._header;
1072
936
  }
1073
- }
1074
- async function httpDispatchStreamInit(method, body, ctx) {
1075
- const isProducer = !!method.producerFn;
1076
- const outputSchema = method.outputSchema;
1077
- const inputSchema = method.inputSchema ?? EMPTY_SCHEMA4;
1078
- const { schema: reqSchema, batch: reqBatch } = await readRequestFromBody(body);
1079
- const parsed = parseRequest(reqSchema, reqBatch);
1080
- if (parsed.methodName !== method.name) {
1081
- throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
937
+ async _readOutputBatch() {
938
+ while (true) {
939
+ const batch = await this._reader.readNextBatch();
940
+ if (batch === null)
941
+ return null;
942
+ if (batch.numRows === 0) {
943
+ if (dispatchLogOrError(batch, this._onLog)) {
944
+ continue;
945
+ }
946
+ }
947
+ return batch;
948
+ }
1082
949
  }
1083
- let state;
1084
- try {
1085
- if (isProducer) {
1086
- state = await method.producerInit(parsed.params);
950
+ async _ensureOutputStream() {
951
+ if (this._outputStreamOpened)
952
+ return;
953
+ this._outputStreamOpened = true;
954
+ const schema = await this._reader.openNextStream();
955
+ if (!schema) {
956
+ throw new RpcError("ProtocolError", "Expected output stream but got EOF", "");
957
+ }
958
+ }
959
+ async exchange(input) {
960
+ if (this._closed) {
961
+ throw new RpcError("ProtocolError", "Stream session is closed", "");
962
+ }
963
+ let inputSchema;
964
+ let batch;
965
+ if (input.length === 0) {
966
+ inputSchema = this._inputSchema ?? this._outputSchema;
967
+ const children = inputSchema.fields.map((f) => {
968
+ return makeData4({ type: f.type, length: 0, nullCount: 0 });
969
+ });
970
+ const structType = new Struct4(inputSchema.fields);
971
+ const data = makeData4({
972
+ type: structType,
973
+ length: 0,
974
+ children,
975
+ nullCount: 0
976
+ });
977
+ batch = new RecordBatch4(inputSchema, data);
1087
978
  } else {
1088
- state = await method.exchangeInit(parsed.params);
979
+ const keys = Object.keys(input[0]);
980
+ const fields = keys.map((key) => {
981
+ let sample;
982
+ for (const row of input) {
983
+ if (row[key] != null) {
984
+ sample = row[key];
985
+ break;
986
+ }
987
+ }
988
+ const arrowType = inferArrowType(sample);
989
+ return new Field2(key, arrowType, true);
990
+ });
991
+ inputSchema = new Schema2(fields);
992
+ if (this._inputSchema) {
993
+ const cached = this._inputSchema;
994
+ if (cached.fields.length !== inputSchema.fields.length || cached.fields.some((f, i) => f.name !== inputSchema.fields[i].name)) {
995
+ throw new RpcError("ProtocolError", `Exchange input schema changed: expected [${cached.fields.map((f) => f.name).join(", ")}] ` + `but got [${inputSchema.fields.map((f) => f.name).join(", ")}]`, "");
996
+ }
997
+ } else {
998
+ this._inputSchema = inputSchema;
999
+ }
1000
+ const children = inputSchema.fields.map((f) => {
1001
+ const values = input.map((row) => row[f.name]);
1002
+ return vectorFromArray3(values, f.type).data[0];
1003
+ });
1004
+ const structType = new Struct4(inputSchema.fields);
1005
+ const data = makeData4({
1006
+ type: structType,
1007
+ length: input.length,
1008
+ children,
1009
+ nullCount: 0
1010
+ });
1011
+ batch = new RecordBatch4(inputSchema, data);
1012
+ }
1013
+ if (!this._inputWriter) {
1014
+ this._inputWriter = new PipeIncrementalWriter(this._writeFn, inputSchema);
1015
+ }
1016
+ this._inputWriter.write(batch);
1017
+ await this._ensureOutputStream();
1018
+ try {
1019
+ const outputBatch = await this._readOutputBatch();
1020
+ if (outputBatch === null) {
1021
+ return [];
1022
+ }
1023
+ return extractBatchRows(outputBatch);
1024
+ } catch (e) {
1025
+ await this._cleanup();
1026
+ throw e;
1089
1027
  }
1090
- } catch (error) {
1091
- const errSchema = method.headerSchema ?? EMPTY_SCHEMA4;
1092
- const errBatch = buildErrorBatch(errSchema, error, ctx.serverId, parsed.requestId);
1093
- return arrowResponse(serializeIpcStream(errSchema, [errBatch]), 500);
1094
1028
  }
1095
- const resolvedOutputSchema = state?.__outputSchema ?? outputSchema;
1096
- const effectiveProducer = state?.__isProducer ?? isProducer;
1097
- let headerBytes = null;
1098
- if (method.headerSchema && method.headerInit) {
1029
+ async _cleanup() {
1030
+ if (this._closed)
1031
+ return;
1032
+ this._closed = true;
1033
+ if (this._inputWriter) {
1034
+ this._inputWriter.close();
1035
+ this._inputWriter = null;
1036
+ }
1099
1037
  try {
1100
- const headerOut = new OutputCollector(method.headerSchema, true, ctx.serverId, parsed.requestId);
1101
- const headerValues = method.headerInit(parsed.params, state, headerOut);
1102
- const headerBatch = buildResultBatch(method.headerSchema, headerValues, ctx.serverId, parsed.requestId);
1103
- const headerBatches = [
1104
- ...headerOut.batches.map((b) => b.batch),
1105
- headerBatch
1106
- ];
1107
- headerBytes = serializeIpcStream(method.headerSchema, headerBatches);
1108
- } catch (error) {
1109
- const errBatch = buildErrorBatch(method.headerSchema, error, ctx.serverId, parsed.requestId);
1110
- return arrowResponse(serializeIpcStream(method.headerSchema, [errBatch]), 500);
1038
+ if (this._outputStreamOpened) {
1039
+ while (await this._reader.readNextBatch() !== null) {}
1040
+ }
1041
+ } catch {}
1042
+ this._releaseBusy();
1043
+ }
1044
+ async* [Symbol.asyncIterator]() {
1045
+ if (this._closed)
1046
+ return;
1047
+ try {
1048
+ const tickSchema = new Schema2([]);
1049
+ this._inputWriter = new PipeIncrementalWriter(this._writeFn, tickSchema);
1050
+ const structType = new Struct4(tickSchema.fields);
1051
+ const tickData = makeData4({
1052
+ type: structType,
1053
+ length: 0,
1054
+ children: [],
1055
+ nullCount: 0
1056
+ });
1057
+ const tickBatch = new RecordBatch4(tickSchema, tickData);
1058
+ while (true) {
1059
+ this._inputWriter.write(tickBatch);
1060
+ await this._ensureOutputStream();
1061
+ const outputBatch = await this._readOutputBatch();
1062
+ if (outputBatch === null) {
1063
+ break;
1064
+ }
1065
+ yield extractBatchRows(outputBatch);
1066
+ }
1067
+ } finally {
1068
+ if (this._inputWriter) {
1069
+ this._inputWriter.close();
1070
+ this._inputWriter = null;
1071
+ }
1072
+ try {
1073
+ if (this._outputStreamOpened) {
1074
+ while (await this._reader.readNextBatch() !== null) {}
1075
+ }
1076
+ } catch {}
1077
+ this._closed = true;
1078
+ this._releaseBusy();
1111
1079
  }
1112
1080
  }
1113
- if (effectiveProducer) {
1114
- return produceStreamResponse(method, state, resolvedOutputSchema, inputSchema, ctx, parsed.requestId, headerBytes);
1115
- } else {
1116
- const stateBytes = ctx.stateSerializer.serialize(state);
1117
- const schemaBytes = serializeSchema(resolvedOutputSchema);
1118
- const inputSchemaBytes = serializeSchema(inputSchema);
1119
- const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
1120
- const tokenMeta = new Map;
1121
- tokenMeta.set(STATE_KEY, token);
1122
- const tokenBatch = buildEmptyBatch(resolvedOutputSchema, tokenMeta);
1123
- const tokenStreamBytes = serializeIpcStream(resolvedOutputSchema, [tokenBatch]);
1124
- let responseBody;
1125
- if (headerBytes) {
1126
- responseBody = concatBytes(headerBytes, tokenStreamBytes);
1081
+ close() {
1082
+ if (this._closed)
1083
+ return;
1084
+ this._closed = true;
1085
+ if (this._inputWriter) {
1086
+ this._inputWriter.close();
1087
+ this._inputWriter = null;
1127
1088
  } else {
1128
- responseBody = tokenStreamBytes;
1089
+ const emptySchema = new Schema2([]);
1090
+ const ipc = serializeIpcStream(emptySchema, []);
1091
+ this._writeFn(ipc);
1129
1092
  }
1130
- return arrowResponse(responseBody);
1093
+ const drainPromise = (async () => {
1094
+ try {
1095
+ if (!this._outputStreamOpened) {
1096
+ const schema = await this._reader.openNextStream();
1097
+ if (schema) {
1098
+ while (await this._reader.readNextBatch() !== null) {}
1099
+ }
1100
+ } else {
1101
+ while (await this._reader.readNextBatch() !== null) {}
1102
+ }
1103
+ } catch {} finally {
1104
+ this._releaseBusy();
1105
+ }
1106
+ })();
1107
+ this._setDrainPromise(drainPromise);
1131
1108
  }
1132
1109
  }
1133
- async function httpDispatchStreamExchange(method, body, ctx) {
1134
- const isProducer = !!method.producerFn;
1135
- const { batch: reqBatch } = await readRequestFromBody(body);
1136
- const tokenBase64 = reqBatch.metadata?.get(STATE_KEY);
1137
- if (!tokenBase64) {
1138
- throw new HttpRpcError("Missing state token in exchange request", 400);
1139
- }
1140
- let unpacked;
1141
- try {
1142
- unpacked = unpackStateToken(tokenBase64, ctx.signingKey, ctx.tokenTtl);
1143
- } catch (error) {
1144
- throw new HttpRpcError(`Invalid state token: ${error.message}`, 400);
1110
+ function pipeConnect(readable, writable, options) {
1111
+ const onLog = options?.onLog;
1112
+ let reader = null;
1113
+ let readerPromise = null;
1114
+ let methodCache = null;
1115
+ let protocolName = "";
1116
+ let _busy = false;
1117
+ let _drainPromise = null;
1118
+ let closed = false;
1119
+ const writeFn = (bytes) => {
1120
+ writable.write(bytes);
1121
+ writable.flush?.();
1122
+ };
1123
+ async function ensureReader() {
1124
+ if (reader)
1125
+ return reader;
1126
+ if (!readerPromise) {
1127
+ readerPromise = IpcStreamReader.create(readable);
1128
+ }
1129
+ reader = await readerPromise;
1130
+ return reader;
1145
1131
  }
1146
- let state;
1147
- try {
1148
- state = ctx.stateSerializer.deserialize(unpacked.stateBytes);
1149
- } catch (error) {
1150
- console.error(`[httpDispatchStreamExchange] state deserialize error:`, error.message);
1151
- throw new HttpRpcError(`State deserialization failed: ${error.message}`, 500);
1132
+ async function acquireBusy() {
1133
+ if (_drainPromise) {
1134
+ await _drainPromise;
1135
+ _drainPromise = null;
1136
+ }
1137
+ if (_busy) {
1138
+ throw new Error("Pipe transport is busy — another call or stream is in progress. " + "Pipe connections are single-threaded; wait for the current operation to complete.");
1139
+ }
1140
+ _busy = true;
1152
1141
  }
1153
- let outputSchema;
1154
- if (unpacked.schemaBytes.length > 0) {
1155
- outputSchema = await deserializeSchema(unpacked.schemaBytes);
1156
- } else {
1157
- outputSchema = state?.__outputSchema ?? method.outputSchema;
1142
+ function releaseBusy() {
1143
+ _busy = false;
1158
1144
  }
1159
- let inputSchema;
1160
- if (unpacked.inputSchemaBytes.length > 0) {
1161
- inputSchema = await deserializeSchema(unpacked.inputSchemaBytes);
1162
- } else {
1163
- inputSchema = method.inputSchema ?? EMPTY_SCHEMA4;
1145
+ function setDrainPromise(p) {
1146
+ _drainPromise = p;
1164
1147
  }
1165
- const effectiveProducer = state?.__isProducer ?? isProducer;
1166
- if (process.env.VGI_DISPATCH_DEBUG)
1167
- console.error(`[httpDispatchStreamExchange] method=${method.name} effectiveProducer=${effectiveProducer} stateKeys=${Object.keys(state || {})}`);
1168
- if (effectiveProducer) {
1169
- return produceStreamResponse(method, state, outputSchema, inputSchema, ctx, null, null);
1170
- } else {
1171
- const out = new OutputCollector(outputSchema, effectiveProducer, ctx.serverId, null);
1148
+ async function ensureMethodCache() {
1149
+ if (methodCache)
1150
+ return methodCache;
1151
+ await acquireBusy();
1172
1152
  try {
1173
- if (method.exchangeFn) {
1174
- await method.exchangeFn(state, reqBatch, out);
1175
- } else {
1176
- await method.producerFn(state, out);
1153
+ const emptySchema = new Schema2([]);
1154
+ const body = buildRequestIpc(emptySchema, {}, DESCRIBE_METHOD_NAME);
1155
+ writeFn(body);
1156
+ const r = await ensureReader();
1157
+ const response = await r.readStream();
1158
+ if (!response) {
1159
+ throw new Error("EOF reading __describe__ response");
1177
1160
  }
1178
- } catch (error) {
1179
- if (process.env.VGI_DISPATCH_DEBUG)
1180
- console.error(`[httpDispatchStreamExchange] exchange handler error:`, error.message, error.stack?.split(`
1181
- `).slice(0, 5).join(`
1182
- `));
1183
- const errBatch = buildErrorBatch(outputSchema, error, ctx.serverId, null);
1184
- return arrowResponse(serializeIpcStream(outputSchema, [errBatch]), 500);
1161
+ const desc = await parseDescribeResponse(response.batches, onLog);
1162
+ protocolName = desc.protocolName;
1163
+ methodCache = new Map(desc.methods.map((m) => [m.name, m]));
1164
+ return methodCache;
1165
+ } finally {
1166
+ releaseBusy();
1185
1167
  }
1186
- const batches = [];
1187
- if (out.finished) {
1188
- for (const emitted of out.batches) {
1189
- batches.push(emitted.batch);
1190
- }
1191
- } else {
1192
- const stateBytes = ctx.stateSerializer.serialize(state);
1193
- const schemaBytes = serializeSchema(outputSchema);
1194
- const inputSchemaBytes = serializeSchema(inputSchema);
1195
- const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
1196
- for (const emitted of out.batches) {
1197
- const batch = emitted.batch;
1198
- if (batch.numRows > 0) {
1199
- const mergedMeta = new Map(batch.metadata ?? []);
1200
- mergedMeta.set(STATE_KEY, token);
1201
- batches.push(new RecordBatch5(batch.schema, batch.data, mergedMeta));
1202
- } else {
1203
- batches.push(batch);
1168
+ }
1169
+ return {
1170
+ async call(method, params) {
1171
+ const methods = await ensureMethodCache();
1172
+ await acquireBusy();
1173
+ try {
1174
+ const info = methods.get(method);
1175
+ if (!info) {
1176
+ throw new Error(`Unknown method: '${method}'`);
1177
+ }
1178
+ const r = await ensureReader();
1179
+ const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
1180
+ const body = buildRequestIpc(info.paramsSchema, fullParams, method);
1181
+ writeFn(body);
1182
+ const response = await r.readStream();
1183
+ if (!response) {
1184
+ throw new Error("EOF reading response");
1185
+ }
1186
+ let resultBatch = null;
1187
+ for (const batch of response.batches) {
1188
+ if (batch.numRows === 0) {
1189
+ dispatchLogOrError(batch, onLog);
1190
+ continue;
1191
+ }
1192
+ resultBatch = batch;
1193
+ }
1194
+ if (!resultBatch) {
1195
+ return null;
1204
1196
  }
1197
+ const rows = extractBatchRows(resultBatch);
1198
+ if (rows.length === 0)
1199
+ return null;
1200
+ if (info.resultSchema.fields.length === 0)
1201
+ return null;
1202
+ return rows[0];
1203
+ } finally {
1204
+ releaseBusy();
1205
1205
  }
1206
- if (!batches.some((b) => b.metadata?.get(STATE_KEY))) {
1207
- const tokenMeta = new Map;
1208
- tokenMeta.set(STATE_KEY, token);
1209
- batches.push(buildEmptyBatch(outputSchema, tokenMeta));
1206
+ },
1207
+ async stream(method, params) {
1208
+ const methods = await ensureMethodCache();
1209
+ await acquireBusy();
1210
+ try {
1211
+ const info = methods.get(method);
1212
+ if (!info) {
1213
+ throw new Error(`Unknown method: '${method}'`);
1214
+ }
1215
+ const r = await ensureReader();
1216
+ const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
1217
+ const body = buildRequestIpc(info.paramsSchema, fullParams, method);
1218
+ writeFn(body);
1219
+ let header = null;
1220
+ if (info.headerSchema) {
1221
+ const headerStream = await r.readStream();
1222
+ if (headerStream) {
1223
+ for (const batch of headerStream.batches) {
1224
+ if (batch.numRows === 0) {
1225
+ dispatchLogOrError(batch, onLog);
1226
+ continue;
1227
+ }
1228
+ const rows = extractBatchRows(batch);
1229
+ if (rows.length > 0) {
1230
+ header = rows[0];
1231
+ }
1232
+ }
1233
+ }
1234
+ }
1235
+ const outputSchema = info.outputSchema ?? info.resultSchema;
1236
+ return new PipeStreamSession({
1237
+ reader: r,
1238
+ writeFn,
1239
+ onLog,
1240
+ header,
1241
+ outputSchema,
1242
+ releaseBusy,
1243
+ setDrainPromise
1244
+ });
1245
+ } catch (e) {
1246
+ try {
1247
+ const r = await ensureReader();
1248
+ const emptySchema = new Schema2([]);
1249
+ const ipc = serializeIpcStream(emptySchema, []);
1250
+ writeFn(ipc);
1251
+ const outStream = await r.readStream();
1252
+ } catch {}
1253
+ releaseBusy();
1254
+ throw e;
1210
1255
  }
1256
+ },
1257
+ async describe() {
1258
+ const methods = await ensureMethodCache();
1259
+ return {
1260
+ protocolName,
1261
+ methods: [...methods.values()]
1262
+ };
1263
+ },
1264
+ close() {
1265
+ if (closed)
1266
+ return;
1267
+ closed = true;
1268
+ writable.end();
1211
1269
  }
1212
- return arrowResponse(serializeIpcStream(outputSchema, batches));
1213
- }
1270
+ };
1214
1271
  }
1215
- async function produceStreamResponse(method, state, outputSchema, inputSchema, ctx, requestId, headerBytes) {
1216
- const allBatches = [];
1217
- const maxBytes = ctx.maxStreamResponseBytes;
1218
- let estimatedBytes = 0;
1219
- while (true) {
1220
- const out = new OutputCollector(outputSchema, true, ctx.serverId, requestId);
1221
- try {
1222
- if (method.producerFn) {
1223
- await method.producerFn(state, out);
1224
- } else {
1225
- const tickBatch = buildEmptyBatch(inputSchema);
1226
- await method.exchangeFn(state, tickBatch, out);
1227
- }
1228
- } catch (error) {
1229
- if (process.env.VGI_DISPATCH_DEBUG)
1230
- console.error(`[produceStreamResponse] error:`, error.message, error.stack?.split(`
1231
- `).slice(0, 3).join(`
1232
- `));
1233
- allBatches.push(buildErrorBatch(outputSchema, error, ctx.serverId, requestId));
1234
- break;
1272
+ function subprocessConnect(cmd, options) {
1273
+ const proc = Bun.spawn(cmd, {
1274
+ stdin: "pipe",
1275
+ stdout: "pipe",
1276
+ stderr: options?.stderr ?? "ignore",
1277
+ cwd: options?.cwd,
1278
+ env: options?.env ? { ...process.env, ...options.env } : undefined
1279
+ });
1280
+ const stdout = proc.stdout;
1281
+ const writable = {
1282
+ write(data) {
1283
+ proc.stdin.write(data);
1284
+ },
1285
+ flush() {
1286
+ proc.stdin.flush();
1287
+ },
1288
+ end() {
1289
+ proc.stdin.end();
1235
1290
  }
1236
- for (const emitted of out.batches) {
1237
- allBatches.push(emitted.batch);
1238
- if (maxBytes != null) {
1239
- estimatedBytes += emitted.batch.data.byteLength;
1240
- }
1291
+ };
1292
+ const client = pipeConnect(stdout, writable, {
1293
+ onLog: options?.onLog
1294
+ });
1295
+ const originalClose = client.close;
1296
+ client.close = () => {
1297
+ originalClose.call(client);
1298
+ try {
1299
+ proc.kill();
1300
+ } catch {}
1301
+ };
1302
+ return client;
1303
+ }
1304
+ // src/http/handler.ts
1305
+ import { randomBytes } from "node:crypto";
1306
+ import { Schema as Schema5 } from "@query-farm/apache-arrow";
1307
+
1308
+ // src/types.ts
1309
+ import { RecordBatch as RecordBatch6, recordBatchFromArrays } from "@query-farm/apache-arrow";
1310
+
1311
+ // src/wire/response.ts
1312
+ import {
1313
+ Data,
1314
+ DataType as DataType2,
1315
+ makeData as makeData5,
1316
+ RecordBatch as RecordBatch5,
1317
+ Struct as Struct5,
1318
+ vectorFromArray as vectorFromArray4
1319
+ } from "@query-farm/apache-arrow";
1320
+ function coerceInt64(schema, values) {
1321
+ const result = { ...values };
1322
+ for (const field of schema.fields) {
1323
+ const val = result[field.name];
1324
+ if (val === undefined)
1325
+ continue;
1326
+ if (!DataType2.isInt(field.type) || field.type.bitWidth !== 64)
1327
+ continue;
1328
+ if (Array.isArray(val)) {
1329
+ result[field.name] = val.map((v) => typeof v === "number" ? BigInt(v) : v);
1330
+ } else if (typeof val === "number") {
1331
+ result[field.name] = BigInt(val);
1241
1332
  }
1242
- if (out.finished) {
1243
- break;
1333
+ }
1334
+ return result;
1335
+ }
1336
+ function buildResultBatch(schema, values, serverId, requestId) {
1337
+ const metadata = new Map;
1338
+ metadata.set(SERVER_ID_KEY, serverId);
1339
+ if (requestId !== null) {
1340
+ metadata.set(REQUEST_ID_KEY, requestId);
1341
+ }
1342
+ if (schema.fields.length === 0) {
1343
+ return buildEmptyBatch(schema, metadata);
1344
+ }
1345
+ for (const field of schema.fields) {
1346
+ if (values[field.name] === undefined && !field.nullable) {
1347
+ const got = Object.keys(values);
1348
+ throw new TypeError(`Handler result missing required field '${field.name}'. Got keys: [${got.join(", ")}]`);
1244
1349
  }
1245
- if (maxBytes != null && estimatedBytes >= maxBytes) {
1246
- const stateBytes = ctx.stateSerializer.serialize(state);
1247
- const schemaBytes = serializeSchema(outputSchema);
1248
- const inputSchemaBytes = serializeSchema(inputSchema);
1249
- const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
1250
- const tokenMeta = new Map;
1251
- tokenMeta.set(STATE_KEY, token);
1252
- allBatches.push(buildEmptyBatch(outputSchema, tokenMeta));
1253
- break;
1350
+ }
1351
+ const coerced = coerceInt64(schema, values);
1352
+ const children = schema.fields.map((f) => {
1353
+ const val = coerced[f.name];
1354
+ if (val instanceof Data) {
1355
+ return val;
1254
1356
  }
1357
+ const arr = vectorFromArray4([val], f.type);
1358
+ return arr.data[0];
1359
+ });
1360
+ const structType = new Struct5(schema.fields);
1361
+ const data = makeData5({
1362
+ type: structType,
1363
+ length: 1,
1364
+ children,
1365
+ nullCount: 0
1366
+ });
1367
+ return new RecordBatch5(schema, data, metadata);
1368
+ }
1369
+ function buildErrorBatch(schema, error, serverId, requestId) {
1370
+ const metadata = new Map;
1371
+ metadata.set(LOG_LEVEL_KEY, "EXCEPTION");
1372
+ metadata.set(LOG_MESSAGE_KEY, `${error.constructor.name}: ${error.message}`);
1373
+ const extra = {
1374
+ exception_type: error.constructor.name,
1375
+ exception_message: error.message,
1376
+ traceback: error.stack ?? ""
1377
+ };
1378
+ metadata.set(LOG_EXTRA_KEY, JSON.stringify(extra));
1379
+ metadata.set(SERVER_ID_KEY, serverId);
1380
+ if (requestId !== null) {
1381
+ metadata.set(REQUEST_ID_KEY, requestId);
1255
1382
  }
1256
- const dataBytes = serializeIpcStream(outputSchema, allBatches);
1257
- let responseBody;
1258
- if (headerBytes) {
1259
- responseBody = concatBytes(headerBytes, dataBytes);
1260
- } else {
1261
- responseBody = dataBytes;
1383
+ return buildEmptyBatch(schema, metadata);
1384
+ }
1385
+ function buildLogBatch(schema, level, message, extra, serverId, requestId) {
1386
+ const metadata = new Map;
1387
+ metadata.set(LOG_LEVEL_KEY, level);
1388
+ metadata.set(LOG_MESSAGE_KEY, message);
1389
+ if (extra) {
1390
+ metadata.set(LOG_EXTRA_KEY, JSON.stringify(extra));
1262
1391
  }
1263
- return arrowResponse(responseBody);
1392
+ if (serverId != null) {
1393
+ metadata.set(SERVER_ID_KEY, serverId);
1394
+ }
1395
+ if (requestId != null) {
1396
+ metadata.set(REQUEST_ID_KEY, requestId);
1397
+ }
1398
+ return buildEmptyBatch(schema, metadata);
1264
1399
  }
1265
- function concatBytes(...arrays) {
1266
- const totalLen = arrays.reduce((sum, a) => sum + a.length, 0);
1267
- const result = new Uint8Array(totalLen);
1268
- let offset = 0;
1269
- for (const arr of arrays) {
1270
- result.set(arr, offset);
1271
- offset += arr.length;
1400
+ function buildEmptyBatch(schema, metadata) {
1401
+ const children = schema.fields.map((f) => {
1402
+ return makeData5({ type: f.type, length: 0, nullCount: 0 });
1403
+ });
1404
+ if (schema.fields.length === 0) {
1405
+ const structType2 = new Struct5(schema.fields);
1406
+ const data2 = makeData5({
1407
+ type: structType2,
1408
+ length: 0,
1409
+ children: [],
1410
+ nullCount: 0
1411
+ });
1412
+ return new RecordBatch5(schema, data2, metadata);
1272
1413
  }
1273
- return result;
1414
+ const structType = new Struct5(schema.fields);
1415
+ const data = makeData5({
1416
+ type: structType,
1417
+ length: 0,
1418
+ children,
1419
+ nullCount: 0
1420
+ });
1421
+ return new RecordBatch5(schema, data, metadata);
1274
1422
  }
1275
1423
 
1276
- // src/http/handler.ts
1277
- init_zstd();
1278
- var EMPTY_SCHEMA5 = new Schema9([]);
1279
- function createHttpHandler(protocol, options) {
1280
- const prefix = (options?.prefix ?? "/vgi").replace(/\/+$/, "");
1281
- const signingKey = options?.signingKey ?? randomBytes(32);
1282
- const tokenTtl = options?.tokenTtl ?? 3600;
1283
- const corsOrigins = options?.corsOrigins;
1284
- const maxRequestBytes = options?.maxRequestBytes;
1285
- const maxStreamResponseBytes = options?.maxStreamResponseBytes;
1286
- const serverId = options?.serverId ?? crypto.randomUUID().replace(/-/g, "").slice(0, 12);
1287
- const methods = protocol.getMethods();
1288
- const compressionLevel = options?.compressionLevel;
1289
- const stateSerializer = options?.stateSerializer ?? jsonStateSerializer;
1290
- const ctx = {
1291
- signingKey,
1292
- tokenTtl,
1293
- serverId,
1294
- maxStreamResponseBytes,
1295
- stateSerializer
1296
- };
1297
- function addCorsHeaders(headers) {
1298
- if (corsOrigins) {
1299
- headers.set("Access-Control-Allow-Origin", corsOrigins);
1300
- headers.set("Access-Control-Allow-Methods", "POST, OPTIONS");
1301
- headers.set("Access-Control-Allow-Headers", "Content-Type");
1302
- }
1424
+ // src/types.ts
1425
+ var MethodType;
1426
+ ((MethodType2) => {
1427
+ MethodType2["UNARY"] = "unary";
1428
+ MethodType2["STREAM"] = "stream";
1429
+ })(MethodType ||= {});
1430
+
1431
+ class OutputCollector {
1432
+ _batches = [];
1433
+ _dataBatchIdx = null;
1434
+ _finished = false;
1435
+ _producerMode;
1436
+ _outputSchema;
1437
+ _serverId;
1438
+ _requestId;
1439
+ constructor(outputSchema, producerMode = true, serverId = "", requestId = null) {
1440
+ this._outputSchema = outputSchema;
1441
+ this._producerMode = producerMode;
1442
+ this._serverId = serverId;
1443
+ this._requestId = requestId;
1303
1444
  }
1304
- async function compressIfAccepted(response, clientAcceptsZstd) {
1305
- if (compressionLevel == null || !clientAcceptsZstd)
1306
- return response;
1307
- const responseBody = new Uint8Array(await response.arrayBuffer());
1308
- const compressed = zstdCompress(responseBody, compressionLevel);
1309
- const headers = new Headers(response.headers);
1310
- headers.set("Content-Encoding", "zstd");
1311
- return new Response(compressed, {
1312
- status: response.status,
1313
- headers
1314
- });
1445
+ get outputSchema() {
1446
+ return this._outputSchema;
1315
1447
  }
1316
- function makeErrorResponse(error, statusCode, schema = EMPTY_SCHEMA5) {
1317
- const errBatch = buildErrorBatch(schema, error, serverId, null);
1318
- const body = serializeIpcStream(schema, [errBatch]);
1319
- const resp = arrowResponse(body, statusCode);
1320
- addCorsHeaders(resp.headers);
1321
- return resp;
1448
+ get finished() {
1449
+ return this._finished;
1322
1450
  }
1323
- return async function handler(request) {
1324
- const url = new URL(request.url);
1325
- const path = url.pathname;
1326
- if (request.method === "OPTIONS") {
1327
- if (path === `${prefix}/__capabilities__`) {
1328
- const headers = new Headers;
1329
- addCorsHeaders(headers);
1330
- if (maxRequestBytes != null) {
1331
- headers.set("VGI-Max-Request-Bytes", String(maxRequestBytes));
1332
- }
1333
- return new Response(null, { status: 204, headers });
1334
- }
1335
- if (corsOrigins) {
1336
- const headers = new Headers;
1337
- addCorsHeaders(headers);
1338
- return new Response(null, { status: 204, headers });
1339
- }
1340
- return new Response(null, { status: 405 });
1341
- }
1342
- if (request.method !== "POST") {
1343
- return new Response("Method Not Allowed", { status: 405 });
1344
- }
1345
- const contentType = request.headers.get("Content-Type");
1346
- if (!contentType || !contentType.includes(ARROW_CONTENT_TYPE)) {
1347
- return new Response(`Unsupported Media Type: expected ${ARROW_CONTENT_TYPE}`, { status: 415 });
1348
- }
1349
- if (maxRequestBytes != null) {
1350
- const contentLength = request.headers.get("Content-Length");
1351
- if (contentLength && parseInt(contentLength) > maxRequestBytes) {
1352
- return new Response("Request body too large", { status: 413 });
1353
- }
1354
- }
1355
- const clientAcceptsZstd = (request.headers.get("Accept-Encoding") ?? "").includes("zstd");
1356
- let body = new Uint8Array(await request.arrayBuffer());
1357
- const contentEncoding = request.headers.get("Content-Encoding");
1358
- if (contentEncoding === "zstd") {
1359
- body = zstdDecompress(body);
1360
- }
1361
- if (path === `${prefix}/${DESCRIBE_METHOD_NAME}`) {
1362
- try {
1363
- const response = httpDispatchDescribe(protocol.name, methods, serverId);
1364
- addCorsHeaders(response.headers);
1365
- return compressIfAccepted(response, clientAcceptsZstd);
1366
- } catch (error) {
1367
- return compressIfAccepted(makeErrorResponse(error, 500), clientAcceptsZstd);
1368
- }
1369
- }
1370
- if (!path.startsWith(prefix + "/")) {
1371
- return new Response("Not Found", { status: 404 });
1372
- }
1373
- const subPath = path.slice(prefix.length + 1);
1374
- let methodName;
1375
- let action;
1376
- if (subPath.endsWith("/init")) {
1377
- methodName = subPath.slice(0, -5);
1378
- action = "init";
1379
- } else if (subPath.endsWith("/exchange")) {
1380
- methodName = subPath.slice(0, -9);
1381
- action = "exchange";
1451
+ get batches() {
1452
+ return this._batches;
1453
+ }
1454
+ emit(batchOrColumns, metadata) {
1455
+ let batch;
1456
+ if (batchOrColumns instanceof RecordBatch6) {
1457
+ batch = batchOrColumns;
1382
1458
  } else {
1383
- methodName = subPath;
1384
- action = "call";
1459
+ const coerced = coerceInt64(this._outputSchema, batchOrColumns);
1460
+ batch = recordBatchFromArrays(coerced, this._outputSchema);
1461
+ }
1462
+ if (this._dataBatchIdx !== null) {
1463
+ throw new Error("Only one data batch may be emitted per call");
1385
1464
  }
1386
- const method = methods.get(methodName);
1387
- if (!method) {
1388
- const available = [...methods.keys()].sort();
1389
- const err = new Error(`Unknown method: '${methodName}'. Available methods: [${available.join(", ")}]`);
1390
- return compressIfAccepted(makeErrorResponse(err, 404), clientAcceptsZstd);
1465
+ this._dataBatchIdx = this._batches.length;
1466
+ this._batches.push({ batch, metadata });
1467
+ }
1468
+ emitRow(values) {
1469
+ const columns = {};
1470
+ for (const [key, value] of Object.entries(values)) {
1471
+ columns[key] = [value];
1391
1472
  }
1392
- try {
1393
- let response;
1394
- if (action === "call") {
1395
- if (method.type !== "unary" /* UNARY */) {
1396
- throw new HttpRpcError(`Method '${methodName}' is a stream method. Use /init and /exchange endpoints.`, 400);
1397
- }
1398
- response = await httpDispatchUnary(method, body, ctx);
1399
- } else if (action === "init") {
1400
- if (method.type !== "stream" /* STREAM */) {
1401
- throw new HttpRpcError(`Method '${methodName}' is a unary method. Use POST ${prefix}/${methodName} instead.`, 400);
1402
- }
1403
- response = await httpDispatchStreamInit(method, body, ctx);
1404
- } else {
1405
- if (method.type !== "stream" /* STREAM */) {
1406
- throw new HttpRpcError(`Method '${methodName}' is a unary method. Use POST ${prefix}/${methodName} instead.`, 400);
1407
- }
1408
- response = await httpDispatchStreamExchange(method, body, ctx);
1409
- }
1410
- addCorsHeaders(response.headers);
1411
- return compressIfAccepted(response, clientAcceptsZstd);
1412
- } catch (error) {
1413
- if (error instanceof HttpRpcError) {
1414
- return compressIfAccepted(makeErrorResponse(error, error.statusCode), clientAcceptsZstd);
1415
- }
1416
- return compressIfAccepted(makeErrorResponse(error, 500), clientAcceptsZstd);
1473
+ this.emit(columns);
1474
+ }
1475
+ finish() {
1476
+ if (!this._producerMode) {
1477
+ throw new Error("finish() is not allowed on exchange streams; " + "exchange streams must emit exactly one data batch per call");
1417
1478
  }
1418
- };
1479
+ this._finished = true;
1480
+ }
1481
+ clientLog(level, message, extra) {
1482
+ const batch = buildLogBatch(this._outputSchema, level, message, extra, this._serverId, this._requestId);
1483
+ this._batches.push({ batch });
1484
+ }
1419
1485
  }
1420
- // src/client/ipc.ts
1486
+
1487
+ // src/http/handler.ts
1488
+ init_zstd();
1489
+
1490
+ // src/http/dispatch.ts
1491
+ import { RecordBatch as RecordBatch8, RecordBatchReader as RecordBatchReader5, Schema as Schema4 } from "@query-farm/apache-arrow";
1492
+
1493
+ // src/dispatch/describe.ts
1421
1494
  import {
1422
- RecordBatch as RecordBatch6,
1423
- RecordBatchReader as RecordBatchReader4,
1424
- DataType as DataType4,
1425
- Float64 as Float642,
1426
- Int64 as Int642,
1427
- Utf8 as Utf83,
1428
- Bool as Bool3,
1429
- Binary as Binary3,
1430
- vectorFromArray as vectorFromArray3,
1431
- makeData as makeData4,
1432
- Struct as Struct4
1495
+ Binary as Binary2,
1496
+ Bool as Bool2,
1497
+ Field as Field3,
1498
+ makeData as makeData6,
1499
+ RecordBatch as RecordBatch7,
1500
+ Schema as Schema3,
1501
+ Struct as Struct6,
1502
+ Utf8 as Utf82,
1503
+ vectorFromArray as vectorFromArray5
1433
1504
  } from "@query-farm/apache-arrow";
1434
- function inferArrowType(value) {
1435
- if (typeof value === "string")
1436
- return new Utf83;
1437
- if (typeof value === "boolean")
1438
- return new Bool3;
1439
- if (typeof value === "bigint")
1440
- return new Int642;
1441
- if (typeof value === "number")
1442
- return new Float642;
1443
- if (value instanceof Uint8Array)
1444
- return new Binary3;
1445
- return new Utf83;
1505
+
1506
+ // src/util/schema.ts
1507
+ import { RecordBatchStreamWriter as RecordBatchStreamWriter3 } from "@query-farm/apache-arrow";
1508
+ function serializeSchema(schema) {
1509
+ const writer = new RecordBatchStreamWriter3;
1510
+ writer.reset(undefined, schema);
1511
+ writer.close();
1512
+ return writer.toUint8Array(true);
1446
1513
  }
1447
- function coerceForArrow(type, value) {
1448
- if (value == null)
1449
- return value;
1450
- if (DataType4.isInt(type) && type.bitWidth === 64) {
1451
- if (typeof value === "number")
1452
- return BigInt(value);
1453
- return value;
1454
- }
1455
- if (DataType4.isMap(type)) {
1456
- if (value instanceof Map) {
1457
- const entriesField = type.children[0];
1458
- const valueType = entriesField.type.children[1].type;
1459
- const coerced = new Map;
1460
- for (const [k, v] of value) {
1461
- coerced.set(k, coerceForArrow(valueType, v));
1514
+
1515
+ // src/dispatch/describe.ts
1516
+ var DESCRIBE_SCHEMA = new Schema3([
1517
+ new Field3("name", new Utf82, false),
1518
+ new Field3("method_type", new Utf82, false),
1519
+ new Field3("doc", new Utf82, true),
1520
+ new Field3("has_return", new Bool2, false),
1521
+ new Field3("params_schema_ipc", new Binary2, false),
1522
+ new Field3("result_schema_ipc", new Binary2, false),
1523
+ new Field3("param_types_json", new Utf82, true),
1524
+ new Field3("param_defaults_json", new Utf82, true),
1525
+ new Field3("has_header", new Bool2, false),
1526
+ new Field3("header_schema_ipc", new Binary2, true)
1527
+ ]);
1528
+ function buildDescribeBatch(protocolName, methods, serverId) {
1529
+ const sortedEntries = [...methods.entries()].sort(([a], [b]) => a.localeCompare(b));
1530
+ const names = [];
1531
+ const methodTypes = [];
1532
+ const docs = [];
1533
+ const hasReturns = [];
1534
+ const paramsSchemas = [];
1535
+ const resultSchemas = [];
1536
+ const paramTypesJsons = [];
1537
+ const paramDefaultsJsons = [];
1538
+ const hasHeaders = [];
1539
+ const headerSchemas = [];
1540
+ for (const [name, method] of sortedEntries) {
1541
+ names.push(name);
1542
+ methodTypes.push(method.type);
1543
+ docs.push(method.doc ?? null);
1544
+ const hasReturn = method.type === "unary" && method.resultSchema.fields.length > 0;
1545
+ hasReturns.push(hasReturn);
1546
+ paramsSchemas.push(serializeSchema(method.paramsSchema));
1547
+ resultSchemas.push(serializeSchema(method.resultSchema));
1548
+ if (method.paramTypes && Object.keys(method.paramTypes).length > 0) {
1549
+ paramTypesJsons.push(JSON.stringify(method.paramTypes));
1550
+ } else {
1551
+ paramTypesJsons.push(null);
1552
+ }
1553
+ if (method.defaults && Object.keys(method.defaults).length > 0) {
1554
+ const safe = {};
1555
+ for (const [k, v] of Object.entries(method.defaults)) {
1556
+ if (v === null || typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
1557
+ safe[k] = v;
1558
+ }
1462
1559
  }
1463
- return coerced;
1560
+ paramDefaultsJsons.push(Object.keys(safe).length > 0 ? JSON.stringify(safe) : null);
1561
+ } else {
1562
+ paramDefaultsJsons.push(null);
1464
1563
  }
1465
- return value;
1564
+ hasHeaders.push(!!method.headerSchema);
1565
+ headerSchemas.push(method.headerSchema ? serializeSchema(method.headerSchema) : null);
1466
1566
  }
1467
- if (DataType4.isList(type)) {
1468
- if (Array.isArray(value)) {
1469
- const elemType = type.children[0].type;
1470
- return value.map((v) => coerceForArrow(elemType, v));
1567
+ const nameArr = vectorFromArray5(names, new Utf82);
1568
+ const methodTypeArr = vectorFromArray5(methodTypes, new Utf82);
1569
+ const docArr = vectorFromArray5(docs, new Utf82);
1570
+ const hasReturnArr = vectorFromArray5(hasReturns, new Bool2);
1571
+ const paramsSchemaArr = vectorFromArray5(paramsSchemas, new Binary2);
1572
+ const resultSchemaArr = vectorFromArray5(resultSchemas, new Binary2);
1573
+ const paramTypesArr = vectorFromArray5(paramTypesJsons, new Utf82);
1574
+ const paramDefaultsArr = vectorFromArray5(paramDefaultsJsons, new Utf82);
1575
+ const hasHeaderArr = vectorFromArray5(hasHeaders, new Bool2);
1576
+ const headerSchemaArr = vectorFromArray5(headerSchemas, new Binary2);
1577
+ const children = [
1578
+ nameArr.data[0],
1579
+ methodTypeArr.data[0],
1580
+ docArr.data[0],
1581
+ hasReturnArr.data[0],
1582
+ paramsSchemaArr.data[0],
1583
+ resultSchemaArr.data[0],
1584
+ paramTypesArr.data[0],
1585
+ paramDefaultsArr.data[0],
1586
+ hasHeaderArr.data[0],
1587
+ headerSchemaArr.data[0]
1588
+ ];
1589
+ const structType = new Struct6(DESCRIBE_SCHEMA.fields);
1590
+ const data = makeData6({
1591
+ type: structType,
1592
+ length: sortedEntries.length,
1593
+ children,
1594
+ nullCount: 0
1595
+ });
1596
+ const metadata = new Map;
1597
+ metadata.set(PROTOCOL_NAME_KEY, protocolName);
1598
+ metadata.set(REQUEST_VERSION_KEY, REQUEST_VERSION);
1599
+ metadata.set(DESCRIBE_VERSION_KEY, DESCRIBE_VERSION);
1600
+ metadata.set(SERVER_ID_KEY, serverId);
1601
+ const batch = new RecordBatch7(DESCRIBE_SCHEMA, data, metadata);
1602
+ return { batch, metadata };
1603
+ }
1604
+
1605
+ // src/wire/request.ts
1606
+ import { DataType as DataType3 } from "@query-farm/apache-arrow";
1607
+ function parseRequest(schema, batch) {
1608
+ const metadata = batch.metadata ?? new Map;
1609
+ const methodName = metadata.get(RPC_METHOD_KEY);
1610
+ if (methodName === undefined) {
1611
+ throw new RpcError("ProtocolError", "Missing 'vgi_rpc.method' in request batch custom_metadata. " + "Each request batch must carry a 'vgi_rpc.method' key in its Arrow IPC custom_metadata " + "with the method name as a UTF-8 string.", "");
1612
+ }
1613
+ const version = metadata.get(REQUEST_VERSION_KEY);
1614
+ if (version === undefined) {
1615
+ throw new VersionError("Missing 'vgi_rpc.request_version' in request batch custom_metadata. " + `Set the 'vgi_rpc.request_version' custom_metadata value to '${REQUEST_VERSION}'.`);
1616
+ }
1617
+ if (version !== REQUEST_VERSION) {
1618
+ throw new VersionError(`Unsupported request version '${version}', expected '${REQUEST_VERSION}'. ` + `Set the 'vgi_rpc.request_version' custom_metadata value to '${REQUEST_VERSION}'.`);
1619
+ }
1620
+ const requestId = metadata.get(REQUEST_ID_KEY) ?? null;
1621
+ const params = {};
1622
+ if (schema.fields.length > 0 && batch.numRows !== 1) {
1623
+ throw new RpcError("ProtocolError", `Expected 1 row in request batch, got ${batch.numRows}. ` + "Each parameter is a column (not a row). The batch should have exactly 1 row.", "");
1624
+ }
1625
+ for (let i = 0;i < schema.fields.length; i++) {
1626
+ const field = schema.fields[i];
1627
+ if (DataType3.isMap(field.type)) {
1628
+ params[field.name] = batch.getChildAt(i).data[0];
1629
+ continue;
1471
1630
  }
1472
- return value;
1631
+ let value = batch.getChildAt(i)?.get(0);
1632
+ if (typeof value === "bigint") {
1633
+ if (value >= BigInt(Number.MIN_SAFE_INTEGER) && value <= BigInt(Number.MAX_SAFE_INTEGER)) {
1634
+ value = Number(value);
1635
+ }
1636
+ }
1637
+ params[field.name] = value;
1473
1638
  }
1474
- return value;
1639
+ return {
1640
+ methodName,
1641
+ requestVersion: version,
1642
+ requestId,
1643
+ schema,
1644
+ params,
1645
+ rawMetadata: metadata
1646
+ };
1475
1647
  }
1476
- function buildRequestIpc(schema, params, method) {
1477
- const metadata = new Map;
1478
- metadata.set(RPC_METHOD_KEY, method);
1479
- metadata.set(REQUEST_VERSION_KEY, REQUEST_VERSION);
1480
- if (schema.fields.length === 0) {
1481
- const structType2 = new Struct4(schema.fields);
1482
- const data2 = makeData4({
1483
- type: structType2,
1484
- length: 1,
1485
- children: [],
1486
- nullCount: 0
1487
- });
1488
- const batch2 = new RecordBatch6(schema, data2, metadata);
1489
- return serializeIpcStream(schema, [batch2]);
1648
+
1649
+ // src/http/token.ts
1650
+ import { createHmac, timingSafeEqual } from "node:crypto";
1651
+ var TOKEN_VERSION = 2;
1652
+ var HMAC_LEN = 32;
1653
+ var MIN_TOKEN_LEN = 1 + 8 + 12 + HMAC_LEN;
1654
+ function packStateToken(stateBytes, schemaBytes, inputSchemaBytes, signingKey, createdAt) {
1655
+ const now = createdAt ?? Math.floor(Date.now() / 1000);
1656
+ const payloadLen = 1 + 8 + 4 + stateBytes.length + 4 + schemaBytes.length + 4 + inputSchemaBytes.length;
1657
+ const buf = Buffer.alloc(payloadLen);
1658
+ let offset = 0;
1659
+ buf.writeUInt8(TOKEN_VERSION, offset);
1660
+ offset += 1;
1661
+ buf.writeBigUInt64LE(BigInt(now), offset);
1662
+ offset += 8;
1663
+ buf.writeUInt32LE(stateBytes.length, offset);
1664
+ offset += 4;
1665
+ buf.set(stateBytes, offset);
1666
+ offset += stateBytes.length;
1667
+ buf.writeUInt32LE(schemaBytes.length, offset);
1668
+ offset += 4;
1669
+ buf.set(schemaBytes, offset);
1670
+ offset += schemaBytes.length;
1671
+ buf.writeUInt32LE(inputSchemaBytes.length, offset);
1672
+ offset += 4;
1673
+ buf.set(inputSchemaBytes, offset);
1674
+ offset += inputSchemaBytes.length;
1675
+ const mac = createHmac("sha256", signingKey).update(buf).digest();
1676
+ const token = Buffer.concat([buf, mac]);
1677
+ return token.toString("base64");
1678
+ }
1679
+ function unpackStateToken(tokenBase64, signingKey, tokenTtl) {
1680
+ const token = Buffer.from(tokenBase64, "base64");
1681
+ if (token.length < MIN_TOKEN_LEN) {
1682
+ throw new Error("State token too short");
1683
+ }
1684
+ const payload = token.subarray(0, token.length - HMAC_LEN);
1685
+ const receivedMac = token.subarray(token.length - HMAC_LEN);
1686
+ const expectedMac = createHmac("sha256", signingKey).update(payload).digest();
1687
+ if (!timingSafeEqual(receivedMac, expectedMac)) {
1688
+ throw new Error("State token HMAC verification failed");
1490
1689
  }
1491
- const children = schema.fields.map((f) => {
1492
- const val = coerceForArrow(f.type, params[f.name]);
1493
- return vectorFromArray3([val], f.type).data[0];
1494
- });
1495
- const structType = new Struct4(schema.fields);
1496
- const data = makeData4({
1497
- type: structType,
1498
- length: 1,
1499
- children,
1500
- nullCount: 0
1501
- });
1502
- const batch = new RecordBatch6(schema, data, metadata);
1503
- return serializeIpcStream(schema, [batch]);
1504
- }
1505
- async function readResponseBatches(body) {
1506
- const reader = await RecordBatchReader4.from(body);
1507
- await reader.open();
1508
- const schema = reader.schema;
1509
- if (!schema) {
1510
- throw new RpcError("ProtocolError", "Empty IPC stream: no schema", "");
1690
+ let offset = 0;
1691
+ const version = payload.readUInt8(offset);
1692
+ offset += 1;
1693
+ if (version !== TOKEN_VERSION) {
1694
+ throw new Error(`Unsupported state token version: ${version}`);
1511
1695
  }
1512
- const batches = reader.readAll();
1513
- return { schema, batches };
1514
- }
1515
- function dispatchLogOrError(batch, onLog) {
1516
- const meta = batch.metadata;
1517
- if (!meta)
1518
- return false;
1519
- const level = meta.get(LOG_LEVEL_KEY);
1520
- if (!level)
1521
- return false;
1522
- const message = meta.get(LOG_MESSAGE_KEY) ?? "";
1523
- if (level === "EXCEPTION") {
1524
- const extraStr = meta.get(LOG_EXTRA_KEY);
1525
- let errorType = "RpcError";
1526
- let errorMessage = message;
1527
- let traceback = "";
1528
- if (extraStr) {
1529
- try {
1530
- const extra = JSON.parse(extraStr);
1531
- errorType = extra.exception_type ?? "RpcError";
1532
- errorMessage = extra.exception_message ?? message;
1533
- traceback = extra.traceback ?? "";
1534
- } catch {}
1696
+ const createdAt = Number(payload.readBigUInt64LE(offset));
1697
+ offset += 8;
1698
+ if (tokenTtl > 0) {
1699
+ const now = Math.floor(Date.now() / 1000);
1700
+ if (now - createdAt > tokenTtl) {
1701
+ throw new Error("State token expired");
1535
1702
  }
1536
- throw new RpcError(errorType, errorMessage, traceback);
1537
1703
  }
1538
- if (onLog) {
1539
- const extraStr = meta.get(LOG_EXTRA_KEY);
1540
- let extra;
1541
- if (extraStr) {
1542
- try {
1543
- extra = JSON.parse(extraStr);
1544
- } catch {}
1545
- }
1546
- onLog({ level, message, extra });
1704
+ const stateLen = payload.readUInt32LE(offset);
1705
+ offset += 4;
1706
+ if (offset + stateLen > payload.length) {
1707
+ throw new Error("State token truncated (state)");
1547
1708
  }
1548
- return true;
1549
- }
1550
- function extractBatchRows(batch) {
1551
- const rows = [];
1552
- for (let r = 0;r < batch.numRows; r++) {
1553
- const row = {};
1554
- for (let i = 0;i < batch.schema.fields.length; i++) {
1555
- const field = batch.schema.fields[i];
1556
- let value = batch.getChildAt(i)?.get(r);
1557
- if (typeof value === "bigint") {
1558
- if (value >= BigInt(Number.MIN_SAFE_INTEGER) && value <= BigInt(Number.MAX_SAFE_INTEGER)) {
1559
- value = Number(value);
1560
- }
1561
- }
1562
- row[field.name] = value;
1563
- }
1564
- rows.push(row);
1709
+ const stateBytes = payload.slice(offset, offset + stateLen);
1710
+ offset += stateLen;
1711
+ const schemaLen = payload.readUInt32LE(offset);
1712
+ offset += 4;
1713
+ if (offset + schemaLen > payload.length) {
1714
+ throw new Error("State token truncated (schema)");
1565
1715
  }
1566
- return rows;
1567
- }
1568
- async function readSequentialStreams(body) {
1569
- const stream = new ReadableStream({
1570
- start(controller) {
1571
- controller.enqueue(body);
1572
- controller.close();
1573
- }
1574
- });
1575
- return IpcStreamReader.create(stream);
1716
+ const schemaBytes = payload.slice(offset, offset + schemaLen);
1717
+ offset += schemaLen;
1718
+ const inputSchemaLen = payload.readUInt32LE(offset);
1719
+ offset += 4;
1720
+ if (offset + inputSchemaLen > payload.length) {
1721
+ throw new Error("State token truncated (input schema)");
1722
+ }
1723
+ const inputSchemaBytes = payload.slice(offset, offset + inputSchemaLen);
1724
+ return { stateBytes, schemaBytes, inputSchemaBytes, createdAt };
1576
1725
  }
1577
1726
 
1578
- // src/client/introspect.ts
1579
- import { RecordBatchReader as RecordBatchReader5 } from "@query-farm/apache-arrow";
1580
- import { Schema as ArrowSchema } from "@query-farm/apache-arrow";
1581
- async function deserializeSchema2(bytes2) {
1582
- const reader = await RecordBatchReader5.from(bytes2);
1727
+ // src/http/dispatch.ts
1728
+ async function deserializeSchema2(bytes) {
1729
+ const reader = await RecordBatchReader5.from(bytes);
1583
1730
  await reader.open();
1584
1731
  return reader.schema;
1585
1732
  }
1586
- async function parseDescribeResponse(batches, onLog) {
1587
- let dataBatch = null;
1588
- for (const batch of batches) {
1589
- if (batch.numRows === 0) {
1590
- dispatchLogOrError(batch, onLog);
1591
- continue;
1592
- }
1593
- dataBatch = batch;
1594
- }
1595
- if (!dataBatch) {
1596
- throw new Error("Empty __describe__ response");
1733
+ var EMPTY_SCHEMA = new Schema4([]);
1734
+ function httpDispatchDescribe(protocolName, methods, serverId) {
1735
+ const { batch } = buildDescribeBatch(protocolName, methods, serverId);
1736
+ const body = serializeIpcStream(DESCRIBE_SCHEMA, [batch]);
1737
+ return arrowResponse(body);
1738
+ }
1739
+ async function httpDispatchUnary(method, body, ctx) {
1740
+ const schema = method.resultSchema;
1741
+ const { schema: reqSchema, batch: reqBatch } = await readRequestFromBody(body);
1742
+ const parsed = parseRequest(reqSchema, reqBatch);
1743
+ if (parsed.methodName !== method.name) {
1744
+ throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
1597
1745
  }
1598
- const meta = dataBatch.metadata;
1599
- const protocolName = meta?.get(PROTOCOL_NAME_KEY) ?? "";
1600
- const methods = [];
1601
- for (let i = 0;i < dataBatch.numRows; i++) {
1602
- const name = dataBatch.getChildAt(0).get(i);
1603
- const methodType = dataBatch.getChildAt(1).get(i);
1604
- const doc = dataBatch.getChildAt(2)?.get(i);
1605
- const hasReturn = dataBatch.getChildAt(3).get(i);
1606
- const paramsIpc = dataBatch.getChildAt(4).get(i);
1607
- const resultIpc = dataBatch.getChildAt(5).get(i);
1608
- const paramTypesJson = dataBatch.getChildAt(6)?.get(i);
1609
- const paramDefaultsJson = dataBatch.getChildAt(7)?.get(i);
1610
- const hasHeader = dataBatch.getChildAt(8).get(i);
1611
- const headerIpc = dataBatch.getChildAt(9)?.get(i);
1612
- const paramsSchema = await deserializeSchema2(paramsIpc);
1613
- const resultSchema = await deserializeSchema2(resultIpc);
1614
- let paramTypes;
1615
- if (paramTypesJson) {
1616
- try {
1617
- paramTypes = JSON.parse(paramTypesJson);
1618
- } catch {}
1619
- }
1620
- let defaults;
1621
- if (paramDefaultsJson) {
1622
- try {
1623
- defaults = JSON.parse(paramDefaultsJson);
1624
- } catch {}
1625
- }
1626
- const info = {
1627
- name,
1628
- type: methodType,
1629
- paramsSchema,
1630
- resultSchema,
1631
- doc: doc ?? undefined,
1632
- paramTypes,
1633
- defaults
1634
- };
1635
- if (methodType === "stream") {
1636
- info.outputSchema = resultSchema;
1637
- }
1638
- if (hasHeader && headerIpc) {
1639
- info.headerSchema = await deserializeSchema2(headerIpc);
1640
- }
1641
- methods.push(info);
1746
+ const out = new OutputCollector(schema, true, ctx.serverId, parsed.requestId);
1747
+ try {
1748
+ const result = await method.handler(parsed.params, out);
1749
+ const resultBatch = buildResultBatch(schema, result, ctx.serverId, parsed.requestId);
1750
+ const batches = [...out.batches.map((b) => b.batch), resultBatch];
1751
+ return arrowResponse(serializeIpcStream(schema, batches));
1752
+ } catch (error) {
1753
+ const errBatch = buildErrorBatch(schema, error, ctx.serverId, parsed.requestId);
1754
+ return arrowResponse(serializeIpcStream(schema, [errBatch]), 500);
1642
1755
  }
1643
- return { protocolName, methods };
1644
- }
1645
- async function httpIntrospect(baseUrl, options) {
1646
- const prefix = options?.prefix ?? "/vgi";
1647
- const emptySchema = new ArrowSchema([]);
1648
- const body = buildRequestIpc(emptySchema, {}, DESCRIBE_METHOD_NAME);
1649
- const response = await fetch(`${baseUrl}${prefix}/${DESCRIBE_METHOD_NAME}`, {
1650
- method: "POST",
1651
- headers: { "Content-Type": ARROW_CONTENT_TYPE },
1652
- body
1653
- });
1654
- const responseBody = new Uint8Array(await response.arrayBuffer());
1655
- const { batches } = await readResponseBatches(responseBody);
1656
- return parseDescribeResponse(batches);
1657
1756
  }
1658
-
1659
- // src/client/stream.ts
1660
- import {
1661
- RecordBatch as RecordBatch8,
1662
- Schema as Schema11,
1663
- Field as Field4,
1664
- makeData as makeData5,
1665
- Struct as Struct5,
1666
- vectorFromArray as vectorFromArray4
1667
- } from "@query-farm/apache-arrow";
1668
- class HttpStreamSession {
1669
- _baseUrl;
1670
- _prefix;
1671
- _method;
1672
- _stateToken;
1673
- _outputSchema;
1674
- _inputSchema;
1675
- _onLog;
1676
- _pendingBatches;
1677
- _finished;
1678
- _header;
1679
- _compressionLevel;
1680
- _compressFn;
1681
- _decompressFn;
1682
- constructor(opts) {
1683
- this._baseUrl = opts.baseUrl;
1684
- this._prefix = opts.prefix;
1685
- this._method = opts.method;
1686
- this._stateToken = opts.stateToken;
1687
- this._outputSchema = opts.outputSchema;
1688
- this._inputSchema = opts.inputSchema;
1689
- this._onLog = opts.onLog;
1690
- this._pendingBatches = opts.pendingBatches;
1691
- this._finished = opts.finished;
1692
- this._header = opts.header;
1693
- this._compressionLevel = opts.compressionLevel;
1694
- this._compressFn = opts.compressFn;
1695
- this._decompressFn = opts.decompressFn;
1696
- }
1697
- get header() {
1698
- return this._header;
1757
+ async function httpDispatchStreamInit(method, body, ctx) {
1758
+ const isProducer = !!method.producerFn;
1759
+ const outputSchema = method.outputSchema;
1760
+ const inputSchema = method.inputSchema ?? EMPTY_SCHEMA;
1761
+ const { schema: reqSchema, batch: reqBatch } = await readRequestFromBody(body);
1762
+ const parsed = parseRequest(reqSchema, reqBatch);
1763
+ if (parsed.methodName !== method.name) {
1764
+ throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
1699
1765
  }
1700
- _buildHeaders() {
1701
- const headers = {
1702
- "Content-Type": ARROW_CONTENT_TYPE
1703
- };
1704
- if (this._compressionLevel != null) {
1705
- headers["Content-Encoding"] = "zstd";
1706
- headers["Accept-Encoding"] = "zstd";
1766
+ let state;
1767
+ try {
1768
+ if (isProducer) {
1769
+ state = await method.producerInit(parsed.params);
1770
+ } else {
1771
+ state = await method.exchangeInit(parsed.params);
1707
1772
  }
1708
- return headers;
1773
+ } catch (error) {
1774
+ const errSchema = method.headerSchema ?? EMPTY_SCHEMA;
1775
+ const errBatch = buildErrorBatch(errSchema, error, ctx.serverId, parsed.requestId);
1776
+ return arrowResponse(serializeIpcStream(errSchema, [errBatch]), 500);
1709
1777
  }
1710
- _prepareBody(content) {
1711
- if (this._compressionLevel != null && this._compressFn) {
1712
- return this._compressFn(content, this._compressionLevel);
1778
+ const resolvedOutputSchema = state?.__outputSchema ?? outputSchema;
1779
+ const effectiveProducer = state?.__isProducer ?? isProducer;
1780
+ let headerBytes = null;
1781
+ if (method.headerSchema && method.headerInit) {
1782
+ try {
1783
+ const headerOut = new OutputCollector(method.headerSchema, true, ctx.serverId, parsed.requestId);
1784
+ const headerValues = method.headerInit(parsed.params, state, headerOut);
1785
+ const headerBatch = buildResultBatch(method.headerSchema, headerValues, ctx.serverId, parsed.requestId);
1786
+ const headerBatches = [...headerOut.batches.map((b) => b.batch), headerBatch];
1787
+ headerBytes = serializeIpcStream(method.headerSchema, headerBatches);
1788
+ } catch (error) {
1789
+ const errBatch = buildErrorBatch(method.headerSchema, error, ctx.serverId, parsed.requestId);
1790
+ return arrowResponse(serializeIpcStream(method.headerSchema, [errBatch]), 500);
1713
1791
  }
1714
- return content;
1715
1792
  }
1716
- async _readResponse(resp) {
1717
- let body = new Uint8Array(await resp.arrayBuffer());
1718
- if (resp.headers.get("Content-Encoding") === "zstd" && this._decompressFn) {
1719
- body = new Uint8Array(this._decompressFn(body));
1793
+ if (effectiveProducer) {
1794
+ return produceStreamResponse(method, state, resolvedOutputSchema, inputSchema, ctx, parsed.requestId, headerBytes);
1795
+ } else {
1796
+ const stateBytes = ctx.stateSerializer.serialize(state);
1797
+ const schemaBytes = serializeSchema(resolvedOutputSchema);
1798
+ const inputSchemaBytes = serializeSchema(inputSchema);
1799
+ const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
1800
+ const tokenMeta = new Map;
1801
+ tokenMeta.set(STATE_KEY, token);
1802
+ const tokenBatch = buildEmptyBatch(resolvedOutputSchema, tokenMeta);
1803
+ const tokenStreamBytes = serializeIpcStream(resolvedOutputSchema, [tokenBatch]);
1804
+ let responseBody;
1805
+ if (headerBytes) {
1806
+ responseBody = concatBytes(headerBytes, tokenStreamBytes);
1807
+ } else {
1808
+ responseBody = tokenStreamBytes;
1720
1809
  }
1721
- return body;
1810
+ return arrowResponse(responseBody);
1722
1811
  }
1723
- async exchange(input) {
1724
- if (this._stateToken === null) {
1725
- throw new RpcError("ProtocolError", "Stream has finished — no state token available", "");
1726
- }
1727
- if (input.length === 0) {
1728
- const zeroSchema = this._inputSchema ?? this._outputSchema;
1729
- const emptyBatch = this._buildEmptyBatch(zeroSchema);
1730
- const metadata2 = new Map;
1731
- metadata2.set(STATE_KEY, this._stateToken);
1732
- const batchWithMeta = new RecordBatch8(zeroSchema, emptyBatch.data, metadata2);
1733
- return this._doExchange(zeroSchema, [batchWithMeta]);
1812
+ }
1813
+ async function httpDispatchStreamExchange(method, body, ctx) {
1814
+ const isProducer = !!method.producerFn;
1815
+ const { batch: reqBatch } = await readRequestFromBody(body);
1816
+ const tokenBase64 = reqBatch.metadata?.get(STATE_KEY);
1817
+ if (!tokenBase64) {
1818
+ throw new HttpRpcError("Missing state token in exchange request", 400);
1819
+ }
1820
+ let unpacked;
1821
+ try {
1822
+ unpacked = unpackStateToken(tokenBase64, ctx.signingKey, ctx.tokenTtl);
1823
+ } catch (error) {
1824
+ throw new HttpRpcError(`Invalid state token: ${error.message}`, 400);
1825
+ }
1826
+ let state;
1827
+ try {
1828
+ state = ctx.stateSerializer.deserialize(unpacked.stateBytes);
1829
+ } catch (error) {
1830
+ console.error(`[httpDispatchStreamExchange] state deserialize error:`, error.message);
1831
+ throw new HttpRpcError(`State deserialization failed: ${error.message}`, 500);
1832
+ }
1833
+ let outputSchema;
1834
+ if (unpacked.schemaBytes.length > 0) {
1835
+ outputSchema = await deserializeSchema2(unpacked.schemaBytes);
1836
+ } else {
1837
+ outputSchema = state?.__outputSchema ?? method.outputSchema;
1838
+ }
1839
+ let inputSchema;
1840
+ if (unpacked.inputSchemaBytes.length > 0) {
1841
+ inputSchema = await deserializeSchema2(unpacked.inputSchemaBytes);
1842
+ } else {
1843
+ inputSchema = method.inputSchema ?? EMPTY_SCHEMA;
1844
+ }
1845
+ const effectiveProducer = state?.__isProducer ?? isProducer;
1846
+ if (process.env.VGI_DISPATCH_DEBUG)
1847
+ console.error(`[httpDispatchStreamExchange] method=${method.name} effectiveProducer=${effectiveProducer} stateKeys=${Object.keys(state || {})}`);
1848
+ if (effectiveProducer) {
1849
+ return produceStreamResponse(method, state, outputSchema, inputSchema, ctx, null, null);
1850
+ } else {
1851
+ const out = new OutputCollector(outputSchema, effectiveProducer, ctx.serverId, null);
1852
+ try {
1853
+ if (method.exchangeFn) {
1854
+ await method.exchangeFn(state, reqBatch, out);
1855
+ } else {
1856
+ await method.producerFn(state, out);
1857
+ }
1858
+ } catch (error) {
1859
+ if (process.env.VGI_DISPATCH_DEBUG)
1860
+ console.error(`[httpDispatchStreamExchange] exchange handler error:`, error.message, error.stack?.split(`
1861
+ `).slice(0, 5).join(`
1862
+ `));
1863
+ const errBatch = buildErrorBatch(outputSchema, error, ctx.serverId, null);
1864
+ return arrowResponse(serializeIpcStream(outputSchema, [errBatch]), 500);
1734
1865
  }
1735
- const keys = Object.keys(input[0]);
1736
- const fields = keys.map((key) => {
1737
- let sample = undefined;
1738
- for (const row of input) {
1739
- if (row[key] != null) {
1740
- sample = row[key];
1741
- break;
1742
- }
1866
+ const batches = [];
1867
+ if (out.finished) {
1868
+ for (const emitted of out.batches) {
1869
+ batches.push(emitted.batch);
1743
1870
  }
1744
- const arrowType = inferArrowType(sample);
1745
- const nullable = input.some((row) => row[key] == null);
1746
- return new Field4(key, arrowType, nullable);
1747
- });
1748
- const inputSchema = new Schema11(fields);
1749
- const children = inputSchema.fields.map((f) => {
1750
- const values = input.map((row) => row[f.name]);
1751
- return vectorFromArray4(values, f.type).data[0];
1752
- });
1753
- const structType = new Struct5(inputSchema.fields);
1754
- const data = makeData5({
1755
- type: structType,
1756
- length: input.length,
1757
- children,
1758
- nullCount: 0
1759
- });
1760
- const metadata = new Map;
1761
- metadata.set(STATE_KEY, this._stateToken);
1762
- const batch = new RecordBatch8(inputSchema, data, metadata);
1763
- return this._doExchange(inputSchema, [batch]);
1764
- }
1765
- async _doExchange(schema, batches) {
1766
- const body = serializeIpcStream(schema, batches);
1767
- const resp = await fetch(`${this._baseUrl}${this._prefix}/${this._method}/exchange`, {
1768
- method: "POST",
1769
- headers: this._buildHeaders(),
1770
- body: this._prepareBody(body)
1771
- });
1772
- const responseBody = await this._readResponse(resp);
1773
- const { batches: responseBatches } = await readResponseBatches(responseBody);
1774
- let resultRows = [];
1775
- for (const batch of responseBatches) {
1776
- if (batch.numRows === 0) {
1777
- dispatchLogOrError(batch, this._onLog);
1778
- const token2 = batch.metadata?.get(STATE_KEY);
1779
- if (token2) {
1780
- this._stateToken = token2;
1871
+ } else {
1872
+ const stateBytes = ctx.stateSerializer.serialize(state);
1873
+ const schemaBytes = serializeSchema(outputSchema);
1874
+ const inputSchemaBytes = serializeSchema(inputSchema);
1875
+ const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
1876
+ for (const emitted of out.batches) {
1877
+ const batch = emitted.batch;
1878
+ if (batch.numRows > 0) {
1879
+ const mergedMeta = new Map(batch.metadata ?? []);
1880
+ mergedMeta.set(STATE_KEY, token);
1881
+ batches.push(new RecordBatch8(batch.schema, batch.data, mergedMeta));
1882
+ } else {
1883
+ batches.push(batch);
1781
1884
  }
1782
- continue;
1783
1885
  }
1784
- const token = batch.metadata?.get(STATE_KEY);
1785
- if (token) {
1786
- this._stateToken = token;
1886
+ if (!batches.some((b) => b.metadata?.get(STATE_KEY))) {
1887
+ const tokenMeta = new Map;
1888
+ tokenMeta.set(STATE_KEY, token);
1889
+ batches.push(buildEmptyBatch(outputSchema, tokenMeta));
1787
1890
  }
1788
- resultRows = extractBatchRows(batch);
1789
1891
  }
1790
- return resultRows;
1791
- }
1792
- _buildEmptyBatch(schema) {
1793
- const children = schema.fields.map((f) => {
1794
- return makeData5({ type: f.type, length: 0, nullCount: 0 });
1795
- });
1796
- const structType = new Struct5(schema.fields);
1797
- const data = makeData5({
1798
- type: structType,
1799
- length: 0,
1800
- children,
1801
- nullCount: 0
1802
- });
1803
- return new RecordBatch8(schema, data);
1892
+ return arrowResponse(serializeIpcStream(outputSchema, batches));
1804
1893
  }
1805
- async* [Symbol.asyncIterator]() {
1806
- for (const batch of this._pendingBatches) {
1807
- if (batch.numRows === 0) {
1808
- dispatchLogOrError(batch, this._onLog);
1809
- continue;
1894
+ }
1895
+ async function produceStreamResponse(method, state, outputSchema, inputSchema, ctx, requestId, headerBytes) {
1896
+ const allBatches = [];
1897
+ const maxBytes = ctx.maxStreamResponseBytes;
1898
+ let estimatedBytes = 0;
1899
+ while (true) {
1900
+ const out = new OutputCollector(outputSchema, true, ctx.serverId, requestId);
1901
+ try {
1902
+ if (method.producerFn) {
1903
+ await method.producerFn(state, out);
1904
+ } else {
1905
+ const tickBatch = buildEmptyBatch(inputSchema);
1906
+ await method.exchangeFn(state, tickBatch, out);
1810
1907
  }
1811
- yield extractBatchRows(batch);
1908
+ } catch (error) {
1909
+ if (process.env.VGI_DISPATCH_DEBUG)
1910
+ console.error(`[produceStreamResponse] error:`, error.message, error.stack?.split(`
1911
+ `).slice(0, 3).join(`
1912
+ `));
1913
+ allBatches.push(buildErrorBatch(outputSchema, error, ctx.serverId, requestId));
1914
+ break;
1812
1915
  }
1813
- this._pendingBatches = [];
1814
- if (this._finished)
1815
- return;
1816
- if (this._stateToken === null)
1817
- return;
1818
- while (true) {
1819
- const responseBody = await this._sendContinuation(this._stateToken);
1820
- const { batches } = await readResponseBatches(responseBody);
1821
- let gotContinuation = false;
1822
- for (const batch of batches) {
1823
- if (batch.numRows === 0) {
1824
- const token = batch.metadata?.get(STATE_KEY);
1825
- if (token) {
1826
- this._stateToken = token;
1827
- gotContinuation = true;
1828
- continue;
1829
- }
1830
- dispatchLogOrError(batch, this._onLog);
1831
- continue;
1832
- }
1833
- yield extractBatchRows(batch);
1916
+ for (const emitted of out.batches) {
1917
+ allBatches.push(emitted.batch);
1918
+ if (maxBytes != null) {
1919
+ estimatedBytes += emitted.batch.data.byteLength;
1834
1920
  }
1835
- if (!gotContinuation)
1836
- break;
1921
+ }
1922
+ if (out.finished) {
1923
+ break;
1924
+ }
1925
+ if (maxBytes != null && estimatedBytes >= maxBytes) {
1926
+ const stateBytes = ctx.stateSerializer.serialize(state);
1927
+ const schemaBytes = serializeSchema(outputSchema);
1928
+ const inputSchemaBytes = serializeSchema(inputSchema);
1929
+ const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
1930
+ const tokenMeta = new Map;
1931
+ tokenMeta.set(STATE_KEY, token);
1932
+ allBatches.push(buildEmptyBatch(outputSchema, tokenMeta));
1933
+ break;
1837
1934
  }
1838
1935
  }
1839
- async _sendContinuation(token) {
1840
- const emptySchema = new Schema11([]);
1841
- const metadata = new Map;
1842
- metadata.set(STATE_KEY, token);
1843
- const structType = new Struct5(emptySchema.fields);
1844
- const data = makeData5({
1845
- type: structType,
1846
- length: 1,
1847
- children: [],
1848
- nullCount: 0
1849
- });
1850
- const batch = new RecordBatch8(emptySchema, data, metadata);
1851
- const body = serializeIpcStream(emptySchema, [batch]);
1852
- const resp = await fetch(`${this._baseUrl}${this._prefix}/${this._method}/exchange`, {
1853
- method: "POST",
1854
- headers: this._buildHeaders(),
1855
- body: this._prepareBody(body)
1856
- });
1857
- return this._readResponse(resp);
1936
+ const dataBytes = serializeIpcStream(outputSchema, allBatches);
1937
+ let responseBody;
1938
+ if (headerBytes) {
1939
+ responseBody = concatBytes(headerBytes, dataBytes);
1940
+ } else {
1941
+ responseBody = dataBytes;
1858
1942
  }
1859
- close() {}
1943
+ return arrowResponse(responseBody);
1944
+ }
1945
+ function concatBytes(...arrays) {
1946
+ const totalLen = arrays.reduce((sum, a) => sum + a.length, 0);
1947
+ const result = new Uint8Array(totalLen);
1948
+ let offset = 0;
1949
+ for (const arr of arrays) {
1950
+ result.set(arr, offset);
1951
+ offset += arr.length;
1952
+ }
1953
+ return result;
1860
1954
  }
1861
1955
 
1862
- // src/client/connect.ts
1863
- function httpConnect(baseUrl, options) {
1956
+ // src/http/types.ts
1957
+ var jsonStateSerializer = {
1958
+ serialize(state) {
1959
+ return new TextEncoder().encode(JSON.stringify(state, (_key, value) => typeof value === "bigint" ? `__bigint__:${value}` : value));
1960
+ },
1961
+ deserialize(bytes) {
1962
+ return JSON.parse(new TextDecoder().decode(bytes), (_key, value) => typeof value === "string" && value.startsWith("__bigint__:") ? BigInt(value.slice(11)) : value);
1963
+ }
1964
+ };
1965
+
1966
+ // src/http/handler.ts
1967
+ var EMPTY_SCHEMA2 = new Schema5([]);
1968
+ function createHttpHandler(protocol, options) {
1864
1969
  const prefix = (options?.prefix ?? "/vgi").replace(/\/+$/, "");
1865
- const onLog = options?.onLog;
1970
+ const signingKey = options?.signingKey ?? randomBytes(32);
1971
+ const tokenTtl = options?.tokenTtl ?? 3600;
1972
+ const corsOrigins = options?.corsOrigins;
1973
+ const maxRequestBytes = options?.maxRequestBytes;
1974
+ const maxStreamResponseBytes = options?.maxStreamResponseBytes;
1975
+ const serverId = options?.serverId ?? crypto.randomUUID().replace(/-/g, "").slice(0, 12);
1976
+ const methods = protocol.getMethods();
1866
1977
  const compressionLevel = options?.compressionLevel;
1867
- let methodCache = null;
1868
- let compressFn;
1869
- let decompressFn;
1870
- let compressionLoaded = false;
1871
- async function ensureCompression() {
1872
- if (compressionLoaded || compressionLevel == null)
1873
- return;
1874
- compressionLoaded = true;
1875
- try {
1876
- const mod = await Promise.resolve().then(() => (init_zstd(), exports_zstd));
1877
- compressFn = mod.zstdCompress;
1878
- decompressFn = mod.zstdDecompress;
1879
- } catch {}
1880
- }
1881
- function buildHeaders() {
1882
- const headers = {
1883
- "Content-Type": ARROW_CONTENT_TYPE
1884
- };
1885
- if (compressionLevel != null) {
1886
- headers["Content-Encoding"] = "zstd";
1887
- headers["Accept-Encoding"] = "zstd";
1888
- }
1889
- return headers;
1890
- }
1891
- function prepareBody(content) {
1892
- if (compressionLevel != null && compressFn) {
1893
- return compressFn(content, compressionLevel);
1978
+ const stateSerializer = options?.stateSerializer ?? jsonStateSerializer;
1979
+ const ctx = {
1980
+ signingKey,
1981
+ tokenTtl,
1982
+ serverId,
1983
+ maxStreamResponseBytes,
1984
+ stateSerializer
1985
+ };
1986
+ function addCorsHeaders(headers) {
1987
+ if (corsOrigins) {
1988
+ headers.set("Access-Control-Allow-Origin", corsOrigins);
1989
+ headers.set("Access-Control-Allow-Methods", "POST, OPTIONS");
1990
+ headers.set("Access-Control-Allow-Headers", "Content-Type");
1894
1991
  }
1895
- return content;
1896
1992
  }
1897
- async function readResponse(resp) {
1898
- let body = new Uint8Array(await resp.arrayBuffer());
1899
- if (resp.headers.get("Content-Encoding") === "zstd" && decompressFn) {
1900
- body = new Uint8Array(decompressFn(body));
1901
- }
1902
- return body;
1993
+ async function compressIfAccepted(response, clientAcceptsZstd) {
1994
+ if (compressionLevel == null || !clientAcceptsZstd)
1995
+ return response;
1996
+ const responseBody = new Uint8Array(await response.arrayBuffer());
1997
+ const compressed = zstdCompress(responseBody, compressionLevel);
1998
+ const headers = new Headers(response.headers);
1999
+ headers.set("Content-Encoding", "zstd");
2000
+ return new Response(compressed, {
2001
+ status: response.status,
2002
+ headers
2003
+ });
1903
2004
  }
1904
- async function ensureMethodCache() {
1905
- if (methodCache)
1906
- return methodCache;
1907
- const desc = await httpIntrospect(baseUrl, { prefix });
1908
- methodCache = new Map(desc.methods.map((m) => [m.name, m]));
1909
- return methodCache;
2005
+ function makeErrorResponse(error, statusCode, schema = EMPTY_SCHEMA2) {
2006
+ const errBatch = buildErrorBatch(schema, error, serverId, null);
2007
+ const body = serializeIpcStream(schema, [errBatch]);
2008
+ const resp = arrowResponse(body, statusCode);
2009
+ addCorsHeaders(resp.headers);
2010
+ return resp;
1910
2011
  }
1911
- return {
1912
- async call(method, params) {
1913
- await ensureCompression();
1914
- const methods = await ensureMethodCache();
1915
- const info = methods.get(method);
1916
- if (!info) {
1917
- throw new Error(`Unknown method: '${method}'`);
1918
- }
1919
- const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
1920
- const body = buildRequestIpc(info.paramsSchema, fullParams, method);
1921
- const resp = await fetch(`${baseUrl}${prefix}/${method}`, {
1922
- method: "POST",
1923
- headers: buildHeaders(),
1924
- body: prepareBody(body)
1925
- });
1926
- const responseBody = await readResponse(resp);
1927
- const { batches } = await readResponseBatches(responseBody);
1928
- let resultBatch = null;
1929
- for (const batch of batches) {
1930
- if (batch.numRows === 0) {
1931
- dispatchLogOrError(batch, onLog);
1932
- continue;
2012
+ return async function handler(request) {
2013
+ const url = new URL(request.url);
2014
+ const path = url.pathname;
2015
+ if (request.method === "OPTIONS") {
2016
+ if (path === `${prefix}/__capabilities__`) {
2017
+ const headers = new Headers;
2018
+ addCorsHeaders(headers);
2019
+ if (maxRequestBytes != null) {
2020
+ headers.set("VGI-Max-Request-Bytes", String(maxRequestBytes));
1933
2021
  }
1934
- resultBatch = batch;
2022
+ return new Response(null, { status: 204, headers });
1935
2023
  }
1936
- if (!resultBatch) {
1937
- return null;
2024
+ if (corsOrigins) {
2025
+ const headers = new Headers;
2026
+ addCorsHeaders(headers);
2027
+ return new Response(null, { status: 204, headers });
1938
2028
  }
1939
- const rows = extractBatchRows(resultBatch);
1940
- if (rows.length === 0)
1941
- return null;
1942
- const result = rows[0];
1943
- if (info.resultSchema.fields.length === 0)
1944
- return null;
1945
- return result;
1946
- },
1947
- async stream(method, params) {
1948
- await ensureCompression();
1949
- const methods = await ensureMethodCache();
1950
- const info = methods.get(method);
1951
- if (!info) {
1952
- throw new Error(`Unknown method: '${method}'`);
2029
+ return new Response(null, { status: 405 });
2030
+ }
2031
+ if (request.method !== "POST") {
2032
+ return new Response("Method Not Allowed", { status: 405 });
2033
+ }
2034
+ const contentType = request.headers.get("Content-Type");
2035
+ if (!contentType || !contentType.includes(ARROW_CONTENT_TYPE)) {
2036
+ return new Response(`Unsupported Media Type: expected ${ARROW_CONTENT_TYPE}`, { status: 415 });
2037
+ }
2038
+ if (maxRequestBytes != null) {
2039
+ const contentLength = request.headers.get("Content-Length");
2040
+ if (contentLength && parseInt(contentLength, 10) > maxRequestBytes) {
2041
+ return new Response("Request body too large", { status: 413 });
1953
2042
  }
1954
- const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
1955
- const body = buildRequestIpc(info.paramsSchema, fullParams, method);
1956
- const resp = await fetch(`${baseUrl}${prefix}/${method}/init`, {
1957
- method: "POST",
1958
- headers: buildHeaders(),
1959
- body: prepareBody(body)
1960
- });
1961
- const responseBody = await readResponse(resp);
1962
- let header = null;
1963
- let stateToken = null;
1964
- const pendingBatches = [];
1965
- let finished = false;
1966
- let streamSchema = null;
1967
- if (info.headerSchema) {
1968
- const reader = await readSequentialStreams(responseBody);
1969
- const headerStream = await reader.readStream();
1970
- if (headerStream) {
1971
- for (const batch of headerStream.batches) {
1972
- if (batch.numRows === 0) {
1973
- dispatchLogOrError(batch, onLog);
1974
- continue;
1975
- }
1976
- const rows = extractBatchRows(batch);
1977
- if (rows.length > 0) {
1978
- header = rows[0];
1979
- }
1980
- }
1981
- }
1982
- const dataStream = await reader.readStream();
1983
- if (dataStream) {
1984
- streamSchema = dataStream.schema;
1985
- }
1986
- const headerErrorBatches = [];
1987
- if (dataStream) {
1988
- for (const batch of dataStream.batches) {
1989
- if (batch.numRows === 0) {
1990
- const token = batch.metadata?.get(STATE_KEY);
1991
- if (token) {
1992
- stateToken = token;
1993
- continue;
1994
- }
1995
- const level = batch.metadata?.get(LOG_LEVEL_KEY);
1996
- if (level === "EXCEPTION") {
1997
- headerErrorBatches.push(batch);
1998
- continue;
1999
- }
2000
- dispatchLogOrError(batch, onLog);
2001
- continue;
2002
- }
2003
- pendingBatches.push(batch);
2004
- }
2005
- }
2006
- if (headerErrorBatches.length > 0) {
2007
- if (pendingBatches.length > 0 || stateToken !== null) {
2008
- pendingBatches.push(...headerErrorBatches);
2009
- } else {
2010
- for (const batch of headerErrorBatches) {
2011
- dispatchLogOrError(batch, onLog);
2012
- }
2013
- }
2043
+ }
2044
+ const clientAcceptsZstd = (request.headers.get("Accept-Encoding") ?? "").includes("zstd");
2045
+ let body = new Uint8Array(await request.arrayBuffer());
2046
+ const contentEncoding = request.headers.get("Content-Encoding");
2047
+ if (contentEncoding === "zstd") {
2048
+ body = zstdDecompress(body);
2049
+ }
2050
+ if (path === `${prefix}/${DESCRIBE_METHOD_NAME}`) {
2051
+ try {
2052
+ const response = httpDispatchDescribe(protocol.name, methods, serverId);
2053
+ addCorsHeaders(response.headers);
2054
+ return compressIfAccepted(response, clientAcceptsZstd);
2055
+ } catch (error) {
2056
+ return compressIfAccepted(makeErrorResponse(error, 500), clientAcceptsZstd);
2057
+ }
2058
+ }
2059
+ if (!path.startsWith(`${prefix}/`)) {
2060
+ return new Response("Not Found", { status: 404 });
2061
+ }
2062
+ const subPath = path.slice(prefix.length + 1);
2063
+ let methodName;
2064
+ let action;
2065
+ if (subPath.endsWith("/init")) {
2066
+ methodName = subPath.slice(0, -5);
2067
+ action = "init";
2068
+ } else if (subPath.endsWith("/exchange")) {
2069
+ methodName = subPath.slice(0, -9);
2070
+ action = "exchange";
2071
+ } else {
2072
+ methodName = subPath;
2073
+ action = "call";
2074
+ }
2075
+ const method = methods.get(methodName);
2076
+ if (!method) {
2077
+ const available = [...methods.keys()].sort();
2078
+ const err = new Error(`Unknown method: '${methodName}'. Available methods: [${available.join(", ")}]`);
2079
+ return compressIfAccepted(makeErrorResponse(err, 404), clientAcceptsZstd);
2080
+ }
2081
+ try {
2082
+ let response;
2083
+ if (action === "call") {
2084
+ if (method.type !== "unary" /* UNARY */) {
2085
+ throw new HttpRpcError(`Method '${methodName}' is a stream method. Use /init and /exchange endpoints.`, 400);
2014
2086
  }
2015
- if (!dataStream && !stateToken) {
2016
- finished = true;
2087
+ response = await httpDispatchUnary(method, body, ctx);
2088
+ } else if (action === "init") {
2089
+ if (method.type !== "stream" /* STREAM */) {
2090
+ throw new HttpRpcError(`Method '${methodName}' is a unary method. Use POST ${prefix}/${methodName} instead.`, 400);
2017
2091
  }
2092
+ response = await httpDispatchStreamInit(method, body, ctx);
2018
2093
  } else {
2019
- const { schema: responseSchema, batches } = await readResponseBatches(responseBody);
2020
- streamSchema = responseSchema;
2021
- const errorBatches = [];
2022
- for (const batch of batches) {
2023
- if (batch.numRows === 0) {
2024
- const token = batch.metadata?.get(STATE_KEY);
2025
- if (token) {
2026
- stateToken = token;
2027
- continue;
2028
- }
2029
- const level = batch.metadata?.get(LOG_LEVEL_KEY);
2030
- if (level === "EXCEPTION") {
2031
- errorBatches.push(batch);
2032
- continue;
2033
- }
2034
- dispatchLogOrError(batch, onLog);
2035
- continue;
2036
- }
2037
- pendingBatches.push(batch);
2038
- }
2039
- if (errorBatches.length > 0) {
2040
- if (pendingBatches.length > 0 || stateToken !== null) {
2041
- pendingBatches.push(...errorBatches);
2042
- } else {
2043
- for (const batch of errorBatches) {
2044
- dispatchLogOrError(batch, onLog);
2045
- }
2046
- }
2094
+ if (method.type !== "stream" /* STREAM */) {
2095
+ throw new HttpRpcError(`Method '${methodName}' is a unary method. Use POST ${prefix}/${methodName} instead.`, 400);
2047
2096
  }
2097
+ response = await httpDispatchStreamExchange(method, body, ctx);
2048
2098
  }
2049
- if (pendingBatches.length === 0 && stateToken === null) {
2050
- finished = true;
2099
+ addCorsHeaders(response.headers);
2100
+ return compressIfAccepted(response, clientAcceptsZstd);
2101
+ } catch (error) {
2102
+ if (error instanceof HttpRpcError) {
2103
+ return compressIfAccepted(makeErrorResponse(error, error.statusCode), clientAcceptsZstd);
2051
2104
  }
2052
- const outputSchema = (streamSchema && streamSchema.fields.length > 0 ? streamSchema : null) ?? (pendingBatches.length > 0 ? pendingBatches[0].schema : null) ?? info.outputSchema ?? info.resultSchema;
2053
- return new HttpStreamSession({
2054
- baseUrl,
2055
- prefix,
2056
- method,
2057
- stateToken,
2058
- outputSchema,
2059
- inputSchema: info.inputSchema,
2060
- onLog,
2061
- pendingBatches,
2062
- finished,
2063
- header,
2064
- compressionLevel,
2065
- compressFn,
2066
- decompressFn
2067
- });
2068
- },
2069
- async describe() {
2070
- return httpIntrospect(baseUrl, { prefix });
2071
- },
2072
- close() {}
2105
+ return compressIfAccepted(makeErrorResponse(error, 500), clientAcceptsZstd);
2106
+ }
2073
2107
  };
2074
2108
  }
2075
- // src/client/pipe.ts
2109
+ // src/protocol.ts
2110
+ import { Schema as Schema7 } from "@query-farm/apache-arrow";
2111
+
2112
+ // src/schema.ts
2076
2113
  import {
2077
- RecordBatch as RecordBatch9,
2078
- RecordBatchStreamWriter as RecordBatchStreamWriter4,
2079
- Schema as Schema12,
2080
- Field as Field5,
2081
- Struct as Struct6,
2082
- makeData as makeData6,
2083
- vectorFromArray as vectorFromArray5
2114
+ Binary as Binary3,
2115
+ Bool as Bool3,
2116
+ DataType as DataType4,
2117
+ Field as Field4,
2118
+ Float32,
2119
+ Float64 as Float642,
2120
+ Int16,
2121
+ Int32,
2122
+ Int64 as Int642,
2123
+ Schema as Schema6,
2124
+ Utf8 as Utf83
2084
2125
  } from "@query-farm/apache-arrow";
2085
- class PipeIncrementalWriter {
2086
- writer;
2087
- writeFn;
2088
- closed = false;
2089
- constructor(writeFn, schema) {
2090
- this.writeFn = writeFn;
2091
- this.writer = new RecordBatchStreamWriter4;
2092
- this.writer.reset(undefined, schema);
2093
- this.drain();
2094
- }
2095
- write(batch) {
2096
- if (this.closed)
2097
- throw new Error("PipeIncrementalWriter already closed");
2098
- this.writer._writeRecordBatch(batch);
2099
- this.drain();
2100
- }
2101
- close() {
2102
- if (this.closed)
2103
- return;
2104
- this.closed = true;
2105
- const eos = new Uint8Array(new Int32Array([-1, 0]).buffer);
2106
- this.writeFn(eos);
2126
+ var str = new Utf83;
2127
+ var bytes = new Binary3;
2128
+ var int = new Int642;
2129
+ var int32 = new Int32;
2130
+ var float = new Float642;
2131
+ var float32 = new Float32;
2132
+ var bool = new Bool3;
2133
+ function toSchema(spec) {
2134
+ if (spec instanceof Schema6)
2135
+ return spec;
2136
+ const fields = [];
2137
+ for (const [name, value] of Object.entries(spec)) {
2138
+ if (value instanceof Field4) {
2139
+ fields.push(value);
2140
+ } else if (value instanceof DataType4) {
2141
+ fields.push(new Field4(name, value, false));
2142
+ } else {
2143
+ throw new TypeError(`Invalid schema value for "${name}": expected DataType or Field, got ${typeof value}`);
2144
+ }
2107
2145
  }
2108
- drain() {
2109
- const values = this.writer._sink._values;
2110
- for (const chunk of values) {
2111
- this.writeFn(chunk);
2146
+ return new Schema6(fields);
2147
+ }
2148
+ var TYPE_MAP = [
2149
+ [Utf83, "str"],
2150
+ [Binary3, "bytes"],
2151
+ [Bool3, "bool"],
2152
+ [Float642, "float"],
2153
+ [Float32, "float"],
2154
+ [Int642, "int"],
2155
+ [Int32, "int"],
2156
+ [Int16, "int"]
2157
+ ];
2158
+ function inferParamTypes(spec) {
2159
+ const schema = toSchema(spec);
2160
+ if (schema.fields.length === 0)
2161
+ return;
2162
+ const result = {};
2163
+ for (const field of schema.fields) {
2164
+ let mapped;
2165
+ for (const [ctor, name] of TYPE_MAP) {
2166
+ if (field.type instanceof ctor) {
2167
+ mapped = name;
2168
+ break;
2169
+ }
2112
2170
  }
2113
- values.length = 0;
2171
+ if (!mapped)
2172
+ return;
2173
+ result[field.name] = mapped;
2114
2174
  }
2175
+ return result;
2115
2176
  }
2116
2177
 
2117
- class PipeStreamSession {
2118
- _reader;
2119
- _writeFn;
2120
- _onLog;
2121
- _header;
2122
- _inputWriter = null;
2123
- _inputSchema = null;
2124
- _outputStreamOpened = false;
2125
- _closed = false;
2126
- _outputSchema;
2127
- _releaseBusy;
2128
- _setDrainPromise;
2129
- constructor(opts) {
2130
- this._reader = opts.reader;
2131
- this._writeFn = opts.writeFn;
2132
- this._onLog = opts.onLog;
2133
- this._header = opts.header;
2134
- this._outputSchema = opts.outputSchema;
2135
- this._releaseBusy = opts.releaseBusy;
2136
- this._setDrainPromise = opts.setDrainPromise;
2178
+ // src/protocol.ts
2179
+ var EMPTY_SCHEMA3 = new Schema7([]);
2180
+
2181
+ class Protocol {
2182
+ name;
2183
+ _methods = new Map;
2184
+ constructor(name) {
2185
+ this.name = name;
2137
2186
  }
2138
- get header() {
2139
- return this._header;
2187
+ unary(name, config) {
2188
+ const params = toSchema(config.params);
2189
+ this._methods.set(name, {
2190
+ name,
2191
+ type: "unary" /* UNARY */,
2192
+ paramsSchema: params,
2193
+ resultSchema: toSchema(config.result),
2194
+ handler: config.handler,
2195
+ doc: config.doc,
2196
+ defaults: config.defaults,
2197
+ paramTypes: config.paramTypes ?? inferParamTypes(params)
2198
+ });
2199
+ return this;
2140
2200
  }
2141
- async _readOutputBatch() {
2142
- while (true) {
2143
- const batch = await this._reader.readNextBatch();
2144
- if (batch === null)
2145
- return null;
2146
- if (batch.numRows === 0) {
2147
- if (dispatchLogOrError(batch, this._onLog)) {
2148
- continue;
2149
- }
2150
- }
2151
- return batch;
2201
+ producer(name, config) {
2202
+ const params = toSchema(config.params);
2203
+ this._methods.set(name, {
2204
+ name,
2205
+ type: "stream" /* STREAM */,
2206
+ paramsSchema: params,
2207
+ resultSchema: EMPTY_SCHEMA3,
2208
+ outputSchema: toSchema(config.outputSchema),
2209
+ inputSchema: EMPTY_SCHEMA3,
2210
+ producerInit: config.init,
2211
+ producerFn: config.produce,
2212
+ headerSchema: config.headerSchema ? toSchema(config.headerSchema) : undefined,
2213
+ headerInit: config.headerInit,
2214
+ doc: config.doc,
2215
+ defaults: config.defaults,
2216
+ paramTypes: config.paramTypes ?? inferParamTypes(params)
2217
+ });
2218
+ return this;
2219
+ }
2220
+ exchange(name, config) {
2221
+ const params = toSchema(config.params);
2222
+ this._methods.set(name, {
2223
+ name,
2224
+ type: "stream" /* STREAM */,
2225
+ paramsSchema: params,
2226
+ resultSchema: EMPTY_SCHEMA3,
2227
+ inputSchema: toSchema(config.inputSchema),
2228
+ outputSchema: toSchema(config.outputSchema),
2229
+ exchangeInit: config.init,
2230
+ exchangeFn: config.exchange,
2231
+ headerSchema: config.headerSchema ? toSchema(config.headerSchema) : undefined,
2232
+ headerInit: config.headerInit,
2233
+ doc: config.doc,
2234
+ defaults: config.defaults,
2235
+ paramTypes: config.paramTypes ?? inferParamTypes(params)
2236
+ });
2237
+ return this;
2238
+ }
2239
+ getMethods() {
2240
+ return new Map(this._methods);
2241
+ }
2242
+ }
2243
+ // src/server.ts
2244
+ import { Schema as Schema9 } from "@query-farm/apache-arrow";
2245
+
2246
+ // src/dispatch/stream.ts
2247
+ import { Schema as Schema8 } from "@query-farm/apache-arrow";
2248
+ var EMPTY_SCHEMA4 = new Schema8([]);
2249
+ async function dispatchStream(method, params, writer, reader, serverId, requestId) {
2250
+ const isProducer = !!method.producerFn;
2251
+ let state;
2252
+ try {
2253
+ if (isProducer) {
2254
+ state = await method.producerInit(params);
2255
+ } else {
2256
+ state = await method.exchangeInit(params);
2257
+ }
2258
+ } catch (error) {
2259
+ const errSchema = method.headerSchema ?? EMPTY_SCHEMA4;
2260
+ const errBatch = buildErrorBatch(errSchema, error, serverId, requestId);
2261
+ writer.writeStream(errSchema, [errBatch]);
2262
+ const inputSchema2 = await reader.openNextStream();
2263
+ if (inputSchema2) {
2264
+ while (await reader.readNextBatch() !== null) {}
2152
2265
  }
2266
+ return;
2153
2267
  }
2154
- async _ensureOutputStream() {
2155
- if (this._outputStreamOpened)
2268
+ const outputSchema = state?.__outputSchema ?? method.outputSchema;
2269
+ const effectiveProducer = state?.__isProducer ?? isProducer;
2270
+ if (method.headerSchema && method.headerInit) {
2271
+ try {
2272
+ const headerOut = new OutputCollector(method.headerSchema, true, serverId, requestId);
2273
+ const headerValues = method.headerInit(params, state, headerOut);
2274
+ const headerBatch = buildResultBatch(method.headerSchema, headerValues, serverId, requestId);
2275
+ const headerBatches = [...headerOut.batches.map((b) => b.batch), headerBatch];
2276
+ writer.writeStream(method.headerSchema, headerBatches);
2277
+ } catch (error) {
2278
+ const errBatch = buildErrorBatch(method.headerSchema, error, serverId, requestId);
2279
+ writer.writeStream(method.headerSchema, [errBatch]);
2280
+ const inputSchema2 = await reader.openNextStream();
2281
+ if (inputSchema2) {
2282
+ while (await reader.readNextBatch() !== null) {}
2283
+ }
2156
2284
  return;
2157
- this._outputStreamOpened = true;
2158
- const schema = await this._reader.openNextStream();
2159
- if (!schema) {
2160
- throw new RpcError("ProtocolError", "Expected output stream but got EOF", "");
2161
2285
  }
2162
2286
  }
2163
- async exchange(input) {
2164
- if (this._closed) {
2165
- throw new RpcError("ProtocolError", "Stream session is closed", "");
2166
- }
2167
- let inputSchema;
2168
- let batch;
2169
- if (input.length === 0) {
2170
- inputSchema = this._inputSchema ?? this._outputSchema;
2171
- const children = inputSchema.fields.map((f) => {
2172
- return makeData6({ type: f.type, length: 0, nullCount: 0 });
2173
- });
2174
- const structType = new Struct6(inputSchema.fields);
2175
- const data = makeData6({
2176
- type: structType,
2177
- length: 0,
2178
- children,
2179
- nullCount: 0
2180
- });
2181
- batch = new RecordBatch9(inputSchema, data);
2182
- } else {
2183
- const keys = Object.keys(input[0]);
2184
- const fields = keys.map((key) => {
2185
- let sample = undefined;
2186
- for (const row of input) {
2187
- if (row[key] != null) {
2188
- sample = row[key];
2189
- break;
2190
- }
2191
- }
2192
- const arrowType = inferArrowType(sample);
2193
- return new Field5(key, arrowType, true);
2194
- });
2195
- inputSchema = new Schema12(fields);
2196
- if (this._inputSchema) {
2197
- const cached = this._inputSchema;
2198
- if (cached.fields.length !== inputSchema.fields.length || cached.fields.some((f, i) => f.name !== inputSchema.fields[i].name)) {
2199
- throw new RpcError("ProtocolError", `Exchange input schema changed: expected [${cached.fields.map((f) => f.name).join(", ")}] ` + `but got [${inputSchema.fields.map((f) => f.name).join(", ")}]`, "");
2200
- }
2287
+ const inputSchema = await reader.openNextStream();
2288
+ if (!inputSchema) {
2289
+ const errBatch = buildErrorBatch(outputSchema, new Error("Expected input stream but got EOF"), serverId, requestId);
2290
+ writer.writeStream(outputSchema, [errBatch]);
2291
+ return;
2292
+ }
2293
+ const stream = writer.openStream(outputSchema);
2294
+ try {
2295
+ while (true) {
2296
+ const inputBatch = await reader.readNextBatch();
2297
+ if (!inputBatch)
2298
+ break;
2299
+ const out = new OutputCollector(outputSchema, effectiveProducer, serverId, requestId);
2300
+ if (isProducer) {
2301
+ await method.producerFn(state, out);
2201
2302
  } else {
2202
- this._inputSchema = inputSchema;
2303
+ await method.exchangeFn(state, inputBatch, out);
2304
+ }
2305
+ for (const emitted of out.batches) {
2306
+ stream.write(emitted.batch);
2307
+ }
2308
+ if (out.finished) {
2309
+ break;
2203
2310
  }
2204
- const children = inputSchema.fields.map((f) => {
2205
- const values = input.map((row) => row[f.name]);
2206
- return vectorFromArray5(values, f.type).data[0];
2207
- });
2208
- const structType = new Struct6(inputSchema.fields);
2209
- const data = makeData6({
2210
- type: structType,
2211
- length: input.length,
2212
- children,
2213
- nullCount: 0
2214
- });
2215
- batch = new RecordBatch9(inputSchema, data);
2216
- }
2217
- if (!this._inputWriter) {
2218
- this._inputWriter = new PipeIncrementalWriter(this._writeFn, inputSchema);
2219
2311
  }
2220
- this._inputWriter.write(batch);
2221
- await this._ensureOutputStream();
2312
+ } catch (error) {
2313
+ stream.write(buildErrorBatch(outputSchema, error, serverId, requestId));
2314
+ }
2315
+ stream.close();
2316
+ try {
2317
+ while (await reader.readNextBatch() !== null) {}
2318
+ } catch {}
2319
+ }
2320
+
2321
+ // src/dispatch/unary.ts
2322
+ async function dispatchUnary(method, params, writer, serverId, requestId) {
2323
+ const schema = method.resultSchema;
2324
+ const out = new OutputCollector(schema, true, serverId, requestId);
2325
+ try {
2326
+ const result = await method.handler(params, out);
2327
+ const resultBatch = buildResultBatch(schema, result, serverId, requestId);
2328
+ const batches = [...out.batches.map((b) => b.batch), resultBatch];
2329
+ writer.writeStream(schema, batches);
2330
+ } catch (error) {
2331
+ const batch = buildErrorBatch(schema, error, serverId, requestId);
2332
+ writer.writeStream(schema, [batch]);
2333
+ }
2334
+ }
2335
+
2336
+ // src/wire/writer.ts
2337
+ import { writeSync } from "node:fs";
2338
+ import { RecordBatchStreamWriter as RecordBatchStreamWriter4 } from "@query-farm/apache-arrow";
2339
+ var STDOUT_FD = 1;
2340
+ function writeAll(fd, data) {
2341
+ let offset = 0;
2342
+ while (offset < data.length) {
2222
2343
  try {
2223
- const outputBatch = await this._readOutputBatch();
2224
- if (outputBatch === null) {
2225
- return [];
2226
- }
2227
- return extractBatchRows(outputBatch);
2344
+ const written = writeSync(fd, data, offset, data.length - offset);
2345
+ if (written <= 0)
2346
+ throw new Error(`writeSync returned ${written}`);
2347
+ offset += written;
2228
2348
  } catch (e) {
2229
- await this._cleanup();
2349
+ if (e.code === "EAGAIN") {
2350
+ Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 1);
2351
+ continue;
2352
+ }
2230
2353
  throw e;
2231
2354
  }
2232
2355
  }
2233
- async _cleanup() {
2234
- if (this._closed)
2235
- return;
2236
- this._closed = true;
2237
- if (this._inputWriter) {
2238
- this._inputWriter.close();
2239
- this._inputWriter = null;
2240
- }
2241
- try {
2242
- if (this._outputStreamOpened) {
2243
- while (await this._reader.readNextBatch() !== null) {}
2244
- }
2245
- } catch {}
2246
- this._releaseBusy();
2356
+ }
2357
+
2358
+ class IpcStreamWriter {
2359
+ fd;
2360
+ constructor(fd = STDOUT_FD) {
2361
+ this.fd = fd;
2247
2362
  }
2248
- async* [Symbol.asyncIterator]() {
2249
- if (this._closed)
2250
- return;
2251
- try {
2252
- const tickSchema = new Schema12([]);
2253
- this._inputWriter = new PipeIncrementalWriter(this._writeFn, tickSchema);
2254
- const structType = new Struct6(tickSchema.fields);
2255
- const tickData = makeData6({
2256
- type: structType,
2257
- length: 0,
2258
- children: [],
2259
- nullCount: 0
2260
- });
2261
- const tickBatch = new RecordBatch9(tickSchema, tickData);
2262
- while (true) {
2263
- this._inputWriter.write(tickBatch);
2264
- await this._ensureOutputStream();
2265
- const outputBatch = await this._readOutputBatch();
2266
- if (outputBatch === null) {
2267
- break;
2268
- }
2269
- yield extractBatchRows(outputBatch);
2270
- }
2271
- } finally {
2272
- if (this._inputWriter) {
2273
- this._inputWriter.close();
2274
- this._inputWriter = null;
2275
- }
2276
- try {
2277
- if (this._outputStreamOpened) {
2278
- while (await this._reader.readNextBatch() !== null) {}
2279
- }
2280
- } catch {}
2281
- this._closed = true;
2282
- this._releaseBusy();
2363
+ writeStream(schema, batches) {
2364
+ const writer = new RecordBatchStreamWriter4;
2365
+ writer.reset(undefined, schema);
2366
+ for (const batch of batches) {
2367
+ writer._writeRecordBatch(batch);
2283
2368
  }
2369
+ writer.close();
2370
+ const bytes2 = writer.toUint8Array(true);
2371
+ writeAll(this.fd, bytes2);
2372
+ }
2373
+ openStream(schema) {
2374
+ return new IncrementalStream(this.fd, schema);
2375
+ }
2376
+ }
2377
+
2378
+ class IncrementalStream {
2379
+ writer;
2380
+ fd;
2381
+ closed = false;
2382
+ constructor(fd, schema) {
2383
+ this.fd = fd;
2384
+ this.writer = new RecordBatchStreamWriter4;
2385
+ this.writer.reset(undefined, schema);
2386
+ this.drain();
2387
+ }
2388
+ write(batch) {
2389
+ if (this.closed)
2390
+ throw new Error("Stream already closed");
2391
+ this.writer._writeRecordBatch(batch);
2392
+ this.drain();
2284
2393
  }
2285
2394
  close() {
2286
- if (this._closed)
2395
+ if (this.closed)
2287
2396
  return;
2288
- this._closed = true;
2289
- if (this._inputWriter) {
2290
- this._inputWriter.close();
2291
- this._inputWriter = null;
2292
- } else {
2293
- const emptySchema = new Schema12([]);
2294
- const ipc = serializeIpcStream(emptySchema, []);
2295
- this._writeFn(ipc);
2296
- }
2297
- const drainPromise = (async () => {
2298
- try {
2299
- if (!this._outputStreamOpened) {
2300
- const schema = await this._reader.openNextStream();
2301
- if (schema) {
2302
- while (await this._reader.readNextBatch() !== null) {}
2303
- }
2304
- } else {
2305
- while (await this._reader.readNextBatch() !== null) {}
2306
- }
2307
- } catch {} finally {
2308
- this._releaseBusy();
2309
- }
2310
- })();
2311
- this._setDrainPromise(drainPromise);
2397
+ this.closed = true;
2398
+ const eos = new Uint8Array(new Int32Array([-1, 0]).buffer);
2399
+ writeAll(this.fd, eos);
2312
2400
  }
2313
- }
2314
- function pipeConnect(readable, writable, options) {
2315
- const onLog = options?.onLog;
2316
- let reader = null;
2317
- let readerPromise = null;
2318
- let methodCache = null;
2319
- let protocolName = "";
2320
- let _busy = false;
2321
- let _drainPromise = null;
2322
- let closed = false;
2323
- const writeFn = (bytes2) => {
2324
- writable.write(bytes2);
2325
- writable.flush?.();
2326
- };
2327
- async function ensureReader() {
2328
- if (reader)
2329
- return reader;
2330
- if (!readerPromise) {
2331
- readerPromise = IpcStreamReader.create(readable);
2401
+ drain() {
2402
+ const values = this.writer._sink._values;
2403
+ for (const chunk of values) {
2404
+ writeAll(this.fd, chunk);
2332
2405
  }
2333
- reader = await readerPromise;
2334
- return reader;
2406
+ values.length = 0;
2335
2407
  }
2336
- async function acquireBusy() {
2337
- if (_drainPromise) {
2338
- await _drainPromise;
2339
- _drainPromise = null;
2340
- }
2341
- if (_busy) {
2342
- throw new Error("Pipe transport is busy — another call or stream is in progress. " + "Pipe connections are single-threaded; wait for the current operation to complete.");
2408
+ }
2409
+
2410
+ // src/server.ts
2411
+ var EMPTY_SCHEMA5 = new Schema9([]);
2412
+
2413
+ class VgiRpcServer {
2414
+ protocol;
2415
+ enableDescribe;
2416
+ serverId;
2417
+ describeBatch = null;
2418
+ constructor(protocol, options) {
2419
+ this.protocol = protocol;
2420
+ this.enableDescribe = options?.enableDescribe ?? true;
2421
+ this.serverId = options?.serverId ?? crypto.randomUUID().replace(/-/g, "").slice(0, 12);
2422
+ if (this.enableDescribe) {
2423
+ const { batch } = buildDescribeBatch(protocol.name, protocol.getMethods(), this.serverId);
2424
+ this.describeBatch = batch;
2343
2425
  }
2344
- _busy = true;
2345
- }
2346
- function releaseBusy() {
2347
- _busy = false;
2348
2426
  }
2349
- function setDrainPromise(p) {
2350
- _drainPromise = p;
2351
- }
2352
- async function ensureMethodCache() {
2353
- if (methodCache)
2354
- return methodCache;
2355
- await acquireBusy();
2427
+ async run() {
2428
+ const stdin = process.stdin;
2429
+ if (process.stdin.isTTY || process.stdout.isTTY) {
2430
+ process.stderr.write("WARNING: This process communicates via Arrow IPC on stdin/stdout " + `and is not intended to be run interactively.
2431
+ ` + "It should be launched as a subprocess by an RPC client " + `(e.g. vgi_rpc.connect()).
2432
+ `);
2433
+ }
2434
+ const reader = await IpcStreamReader.create(stdin);
2435
+ const writer = new IpcStreamWriter;
2356
2436
  try {
2357
- const emptySchema = new Schema12([]);
2358
- const body = buildRequestIpc(emptySchema, {}, DESCRIBE_METHOD_NAME);
2359
- writeFn(body);
2360
- const r = await ensureReader();
2361
- const response = await r.readStream();
2362
- if (!response) {
2363
- throw new Error("EOF reading __describe__ response");
2437
+ while (true) {
2438
+ await this.serveOne(reader, writer);
2364
2439
  }
2365
- const desc = await parseDescribeResponse(response.batches, onLog);
2366
- protocolName = desc.protocolName;
2367
- methodCache = new Map(desc.methods.map((m) => [m.name, m]));
2368
- return methodCache;
2440
+ } catch (e) {
2441
+ if (e.message?.includes("closed") || e.message?.includes("Expected Schema Message") || e.message?.includes("null or length 0") || e.code === "EPIPE" || e.code === "ERR_STREAM_PREMATURE_CLOSE" || e.code === "ERR_STREAM_DESTROYED" || e instanceof Error && e.message.includes("EOF")) {
2442
+ return;
2443
+ }
2444
+ throw e;
2369
2445
  } finally {
2370
- releaseBusy();
2446
+ await reader.cancel();
2371
2447
  }
2372
2448
  }
2373
- return {
2374
- async call(method, params) {
2375
- const methods = await ensureMethodCache();
2376
- await acquireBusy();
2377
- try {
2378
- const info = methods.get(method);
2379
- if (!info) {
2380
- throw new Error(`Unknown method: '${method}'`);
2381
- }
2382
- const r = await ensureReader();
2383
- const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
2384
- const body = buildRequestIpc(info.paramsSchema, fullParams, method);
2385
- writeFn(body);
2386
- const response = await r.readStream();
2387
- if (!response) {
2388
- throw new Error("EOF reading response");
2389
- }
2390
- let resultBatch = null;
2391
- for (const batch of response.batches) {
2392
- if (batch.numRows === 0) {
2393
- dispatchLogOrError(batch, onLog);
2394
- continue;
2395
- }
2396
- resultBatch = batch;
2397
- }
2398
- if (!resultBatch) {
2399
- return null;
2400
- }
2401
- const rows = extractBatchRows(resultBatch);
2402
- if (rows.length === 0)
2403
- return null;
2404
- if (info.resultSchema.fields.length === 0)
2405
- return null;
2406
- return rows[0];
2407
- } finally {
2408
- releaseBusy();
2409
- }
2410
- },
2411
- async stream(method, params) {
2412
- const methods = await ensureMethodCache();
2413
- await acquireBusy();
2414
- try {
2415
- const info = methods.get(method);
2416
- if (!info) {
2417
- throw new Error(`Unknown method: '${method}'`);
2418
- }
2419
- const r = await ensureReader();
2420
- const fullParams = { ...info.defaults ?? {}, ...params ?? {} };
2421
- const body = buildRequestIpc(info.paramsSchema, fullParams, method);
2422
- writeFn(body);
2423
- let header = null;
2424
- if (info.headerSchema) {
2425
- const headerStream = await r.readStream();
2426
- if (headerStream) {
2427
- for (const batch of headerStream.batches) {
2428
- if (batch.numRows === 0) {
2429
- dispatchLogOrError(batch, onLog);
2430
- continue;
2431
- }
2432
- const rows = extractBatchRows(batch);
2433
- if (rows.length > 0) {
2434
- header = rows[0];
2435
- }
2436
- }
2437
- }
2438
- }
2439
- const outputSchema = info.outputSchema ?? info.resultSchema;
2440
- return new PipeStreamSession({
2441
- reader: r,
2442
- writeFn,
2443
- onLog,
2444
- header,
2445
- outputSchema,
2446
- releaseBusy,
2447
- setDrainPromise
2448
- });
2449
- } catch (e) {
2450
- try {
2451
- const r = await ensureReader();
2452
- const emptySchema = new Schema12([]);
2453
- const ipc = serializeIpcStream(emptySchema, []);
2454
- writeFn(ipc);
2455
- const outStream = await r.readStream();
2456
- } catch {}
2457
- releaseBusy();
2458
- throw e;
2459
- }
2460
- },
2461
- async describe() {
2462
- const methods = await ensureMethodCache();
2463
- return {
2464
- protocolName,
2465
- methods: [...methods.values()]
2466
- };
2467
- },
2468
- close() {
2469
- if (closed)
2470
- return;
2471
- closed = true;
2472
- writable.end();
2449
+ async serveOne(reader, writer) {
2450
+ const stream = await reader.readStream();
2451
+ if (!stream) {
2452
+ throw new Error("EOF");
2473
2453
  }
2474
- };
2475
- }
2476
- function subprocessConnect(cmd, options) {
2477
- const proc = Bun.spawn(cmd, {
2478
- stdin: "pipe",
2479
- stdout: "pipe",
2480
- stderr: options?.stderr ?? "ignore",
2481
- cwd: options?.cwd,
2482
- env: options?.env ? { ...process.env, ...options.env } : undefined
2483
- });
2484
- const stdout = proc.stdout;
2485
- const writable = {
2486
- write(data) {
2487
- proc.stdin.write(data);
2488
- },
2489
- flush() {
2490
- proc.stdin.flush();
2491
- },
2492
- end() {
2493
- proc.stdin.end();
2454
+ const { schema, batches } = stream;
2455
+ if (batches.length === 0) {
2456
+ const err = new RpcError("ProtocolError", "Request stream contains no batches", "");
2457
+ const errBatch = buildErrorBatch(EMPTY_SCHEMA5, err, this.serverId, null);
2458
+ writer.writeStream(EMPTY_SCHEMA5, [errBatch]);
2459
+ return;
2494
2460
  }
2495
- };
2496
- const client = pipeConnect(stdout, writable, {
2497
- onLog: options?.onLog
2498
- });
2499
- const originalClose = client.close;
2500
- client.close = () => {
2501
- originalClose.call(client);
2461
+ const batch = batches[0];
2462
+ let methodName;
2463
+ let params;
2464
+ let requestId;
2502
2465
  try {
2503
- proc.kill();
2504
- } catch {}
2505
- };
2506
- return client;
2466
+ const parsed = parseRequest(schema, batch);
2467
+ methodName = parsed.methodName;
2468
+ params = parsed.params;
2469
+ requestId = parsed.requestId;
2470
+ } catch (e) {
2471
+ const errBatch = buildErrorBatch(EMPTY_SCHEMA5, e, this.serverId, null);
2472
+ writer.writeStream(EMPTY_SCHEMA5, [errBatch]);
2473
+ if (e instanceof VersionError || e instanceof RpcError) {
2474
+ return;
2475
+ }
2476
+ throw e;
2477
+ }
2478
+ if (methodName === DESCRIBE_METHOD_NAME && this.describeBatch) {
2479
+ writer.writeStream(this.describeBatch.schema, [this.describeBatch]);
2480
+ return;
2481
+ }
2482
+ const methods = this.protocol.getMethods();
2483
+ const method = methods.get(methodName);
2484
+ if (!method) {
2485
+ const available = [...methods.keys()].sort();
2486
+ const err = new Error(`Unknown method: '${methodName}'. Available methods: [${available.join(", ")}]`);
2487
+ const errBatch = buildErrorBatch(EMPTY_SCHEMA5, err, this.serverId, requestId);
2488
+ writer.writeStream(EMPTY_SCHEMA5, [errBatch]);
2489
+ return;
2490
+ }
2491
+ if (method.type === "unary" /* UNARY */) {
2492
+ await dispatchUnary(method, params, writer, this.serverId, requestId);
2493
+ } else {
2494
+ await dispatchStream(method, params, writer, reader, this.serverId, requestId);
2495
+ }
2496
+ }
2507
2497
  }
2508
2498
  export {
2509
2499
  unpackStateToken,
@@ -2547,4 +2537,4 @@ export {
2547
2537
  ARROW_CONTENT_TYPE
2548
2538
  };
2549
2539
 
2550
- //# debugId=90D671AA0075054164756E2164756E21
2540
+ //# debugId=E5E571F45BA75F8D64756E2164756E21