@query-farm/vgi-rpc 0.3.4 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/dist/auth.d.ts +13 -0
- package/dist/auth.d.ts.map +1 -0
- package/dist/client/connect.d.ts.map +1 -1
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.d.ts.map +1 -1
- package/dist/client/introspect.d.ts +1 -0
- package/dist/client/introspect.d.ts.map +1 -1
- package/dist/client/oauth.d.ts +62 -0
- package/dist/client/oauth.d.ts.map +1 -0
- package/dist/client/pipe.d.ts +3 -0
- package/dist/client/pipe.d.ts.map +1 -1
- package/dist/client/stream.d.ts +5 -0
- package/dist/client/stream.d.ts.map +1 -1
- package/dist/client/types.d.ts +6 -0
- package/dist/client/types.d.ts.map +1 -1
- package/dist/constants.d.ts +3 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/dispatch/describe.d.ts.map +1 -1
- package/dist/dispatch/stream.d.ts +2 -1
- package/dist/dispatch/stream.d.ts.map +1 -1
- package/dist/dispatch/unary.d.ts +2 -1
- package/dist/dispatch/unary.d.ts.map +1 -1
- package/dist/external.d.ts +45 -0
- package/dist/external.d.ts.map +1 -0
- package/dist/gcs.d.ts +38 -0
- package/dist/gcs.d.ts.map +1 -0
- package/dist/http/auth.d.ts +32 -0
- package/dist/http/auth.d.ts.map +1 -0
- package/dist/http/bearer.d.ts +34 -0
- package/dist/http/bearer.d.ts.map +1 -0
- package/dist/http/dispatch.d.ts +4 -0
- package/dist/http/dispatch.d.ts.map +1 -1
- package/dist/http/handler.d.ts.map +1 -1
- package/dist/http/index.d.ts +8 -0
- package/dist/http/index.d.ts.map +1 -1
- package/dist/http/jwt.d.ts +21 -0
- package/dist/http/jwt.d.ts.map +1 -0
- package/dist/http/mtls.d.ts +78 -0
- package/dist/http/mtls.d.ts.map +1 -0
- package/dist/http/pages.d.ts +9 -0
- package/dist/http/pages.d.ts.map +1 -0
- package/dist/http/types.d.ts +22 -1
- package/dist/http/types.d.ts.map +1 -1
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2576 -317
- package/dist/index.js.map +27 -18
- package/dist/otel.d.ts +47 -0
- package/dist/otel.d.ts.map +1 -0
- package/dist/s3.d.ts +43 -0
- package/dist/s3.d.ts.map +1 -0
- package/dist/server.d.ts +6 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/types.d.ts +38 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/wire/response.d.ts.map +1 -1
- package/package.json +46 -2
- package/src/auth.ts +31 -0
- package/src/client/connect.ts +28 -6
- package/src/client/index.ts +11 -0
- package/src/client/introspect.ts +15 -3
- package/src/client/oauth.ts +167 -0
- package/src/client/pipe.ts +19 -4
- package/src/client/stream.ts +32 -7
- package/src/client/types.ts +6 -0
- package/src/constants.ts +4 -1
- package/src/dispatch/describe.ts +20 -0
- package/src/dispatch/stream.ts +18 -4
- package/src/dispatch/unary.ts +6 -1
- package/src/external.ts +209 -0
- package/src/gcs.ts +86 -0
- package/src/http/auth.ts +110 -0
- package/src/http/bearer.ts +107 -0
- package/src/http/dispatch.ts +32 -10
- package/src/http/handler.ts +120 -3
- package/src/http/index.ts +14 -0
- package/src/http/jwt.ts +80 -0
- package/src/http/mtls.ts +298 -0
- package/src/http/pages.ts +298 -0
- package/src/http/types.ts +23 -1
- package/src/index.ts +32 -0
- package/src/otel.ts +161 -0
- package/src/s3.ts +94 -0
- package/src/server.ts +42 -8
- package/src/types.ts +51 -3
- package/src/wire/response.ts +28 -14
package/src/client/pipe.ts
CHANGED
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
} from "@query-farm/apache-arrow";
|
|
13
13
|
import { DESCRIBE_METHOD_NAME } from "../constants.js";
|
|
14
14
|
import { RpcError } from "../errors.js";
|
|
15
|
+
import { type ExternalLocationConfig, isExternalLocationBatch, resolveExternalLocation } from "../external.js";
|
|
15
16
|
import { serializeIpcStream } from "../http/common.js";
|
|
16
17
|
import { IpcStreamReader } from "../wire/reader.js";
|
|
17
18
|
import type { RpcClient } from "./connect.js";
|
|
@@ -86,6 +87,7 @@ export class PipeStreamSession implements StreamSession {
|
|
|
86
87
|
private _outputSchema: Schema;
|
|
87
88
|
private _releaseBusy: () => void;
|
|
88
89
|
private _setDrainPromise: (p: Promise<void>) => void;
|
|
90
|
+
private _externalConfig?: ExternalLocationConfig;
|
|
89
91
|
|
|
90
92
|
constructor(opts: {
|
|
91
93
|
reader: IpcStreamReader;
|
|
@@ -95,6 +97,7 @@ export class PipeStreamSession implements StreamSession {
|
|
|
95
97
|
outputSchema: Schema;
|
|
96
98
|
releaseBusy: () => void;
|
|
97
99
|
setDrainPromise: (p: Promise<void>) => void;
|
|
100
|
+
externalConfig?: ExternalLocationConfig;
|
|
98
101
|
}) {
|
|
99
102
|
this._reader = opts.reader;
|
|
100
103
|
this._writeFn = opts.writeFn;
|
|
@@ -103,6 +106,7 @@ export class PipeStreamSession implements StreamSession {
|
|
|
103
106
|
this._outputSchema = opts.outputSchema;
|
|
104
107
|
this._releaseBusy = opts.releaseBusy;
|
|
105
108
|
this._setDrainPromise = opts.setDrainPromise;
|
|
109
|
+
this._externalConfig = opts.externalConfig;
|
|
106
110
|
}
|
|
107
111
|
|
|
108
112
|
get header(): Record<string, any> | null {
|
|
@@ -120,6 +124,10 @@ export class PipeStreamSession implements StreamSession {
|
|
|
120
124
|
if (batch === null) return null; // Server closed output stream
|
|
121
125
|
|
|
122
126
|
if (batch.numRows === 0) {
|
|
127
|
+
// Check for external location pointer batch
|
|
128
|
+
if (isExternalLocationBatch(batch)) {
|
|
129
|
+
return await resolveExternalLocation(batch, this._externalConfig);
|
|
130
|
+
}
|
|
123
131
|
// Check if it's a log/error batch. If so, dispatch and continue.
|
|
124
132
|
// Otherwise it's a zero-row data batch — return it.
|
|
125
133
|
if (dispatchLogOrError(batch, this._onLog)) {
|
|
@@ -375,6 +383,7 @@ export function pipeConnect(
|
|
|
375
383
|
options?: PipeConnectOptions,
|
|
376
384
|
): RpcClient {
|
|
377
385
|
const onLog = options?.onLog;
|
|
386
|
+
const externalConfig = options?.externalLocation;
|
|
378
387
|
|
|
379
388
|
let reader: IpcStreamReader | null = null;
|
|
380
389
|
let readerPromise: Promise<IpcStreamReader> | null = null;
|
|
@@ -483,12 +492,16 @@ export function pipeConnect(
|
|
|
483
492
|
throw new Error("EOF reading response");
|
|
484
493
|
}
|
|
485
494
|
|
|
486
|
-
// Process batches: dispatch logs, find result
|
|
495
|
+
// Process batches: dispatch logs, resolve external pointers, find result
|
|
487
496
|
let resultBatch: RecordBatch | null = null;
|
|
488
|
-
for (
|
|
497
|
+
for (let batch of response.batches) {
|
|
489
498
|
if (batch.numRows === 0) {
|
|
490
|
-
|
|
491
|
-
|
|
499
|
+
if (isExternalLocationBatch(batch)) {
|
|
500
|
+
batch = await resolveExternalLocation(batch, externalConfig);
|
|
501
|
+
} else {
|
|
502
|
+
dispatchLogOrError(batch, onLog);
|
|
503
|
+
continue;
|
|
504
|
+
}
|
|
492
505
|
}
|
|
493
506
|
resultBatch = batch;
|
|
494
507
|
}
|
|
@@ -557,6 +570,7 @@ export function pipeConnect(
|
|
|
557
570
|
outputSchema,
|
|
558
571
|
releaseBusy,
|
|
559
572
|
setDrainPromise,
|
|
573
|
+
externalConfig,
|
|
560
574
|
});
|
|
561
575
|
} catch (e) {
|
|
562
576
|
// Init error (e.g., server raised exception during init).
|
|
@@ -624,6 +638,7 @@ export function subprocessConnect(cmd: string[], options?: SubprocessConnectOpti
|
|
|
624
638
|
|
|
625
639
|
const client = pipeConnect(stdout, writable, {
|
|
626
640
|
onLog: options?.onLog,
|
|
641
|
+
externalLocation: options?.externalLocation,
|
|
627
642
|
});
|
|
628
643
|
|
|
629
644
|
// Wrap close to also kill the subprocess
|
package/src/client/stream.ts
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
import { Field, makeData, RecordBatch, Schema, Struct, vectorFromArray } from "@query-farm/apache-arrow";
|
|
5
5
|
import { STATE_KEY } from "../constants.js";
|
|
6
6
|
import { RpcError } from "../errors.js";
|
|
7
|
+
import { type ExternalLocationConfig, isExternalLocationBatch, resolveExternalLocation } from "../external.js";
|
|
7
8
|
import { ARROW_CONTENT_TYPE, serializeIpcStream } from "../http/common.js";
|
|
8
9
|
import { dispatchLogOrError, extractBatchRows, inferArrowType, readResponseBatches } from "./ipc.js";
|
|
9
10
|
import type { LogMessage, StreamSession } from "./types.js";
|
|
@@ -25,6 +26,8 @@ export class HttpStreamSession implements StreamSession {
|
|
|
25
26
|
private _compressionLevel?: number;
|
|
26
27
|
private _compressFn?: CompressFn;
|
|
27
28
|
private _decompressFn?: DecompressFn;
|
|
29
|
+
private _authorization?: string;
|
|
30
|
+
private _externalConfig?: ExternalLocationConfig;
|
|
28
31
|
|
|
29
32
|
constructor(opts: {
|
|
30
33
|
baseUrl: string;
|
|
@@ -40,6 +43,8 @@ export class HttpStreamSession implements StreamSession {
|
|
|
40
43
|
compressionLevel?: number;
|
|
41
44
|
compressFn?: CompressFn;
|
|
42
45
|
decompressFn?: DecompressFn;
|
|
46
|
+
authorization?: string;
|
|
47
|
+
externalConfig?: ExternalLocationConfig;
|
|
43
48
|
}) {
|
|
44
49
|
this._baseUrl = opts.baseUrl;
|
|
45
50
|
this._prefix = opts.prefix;
|
|
@@ -54,6 +59,8 @@ export class HttpStreamSession implements StreamSession {
|
|
|
54
59
|
this._compressionLevel = opts.compressionLevel;
|
|
55
60
|
this._compressFn = opts.compressFn;
|
|
56
61
|
this._decompressFn = opts.decompressFn;
|
|
62
|
+
this._authorization = opts.authorization;
|
|
63
|
+
this._externalConfig = opts.externalConfig;
|
|
57
64
|
}
|
|
58
65
|
|
|
59
66
|
get header(): Record<string, any> | null {
|
|
@@ -68,6 +75,9 @@ export class HttpStreamSession implements StreamSession {
|
|
|
68
75
|
headers["Content-Encoding"] = "zstd";
|
|
69
76
|
headers["Accept-Encoding"] = "zstd";
|
|
70
77
|
}
|
|
78
|
+
if (this._authorization) {
|
|
79
|
+
headers.Authorization = this._authorization;
|
|
80
|
+
}
|
|
71
81
|
return headers;
|
|
72
82
|
}
|
|
73
83
|
|
|
@@ -154,6 +164,9 @@ export class HttpStreamSession implements StreamSession {
|
|
|
154
164
|
headers: this._buildHeaders(),
|
|
155
165
|
body: this._prepareBody(body) as unknown as BodyInit,
|
|
156
166
|
});
|
|
167
|
+
if (resp.status === 401) {
|
|
168
|
+
throw new RpcError("AuthenticationError", "Authentication required", "");
|
|
169
|
+
}
|
|
157
170
|
|
|
158
171
|
const responseBody = await this._readResponse(resp);
|
|
159
172
|
const { batches: responseBatches } = await readResponseBatches(responseBody);
|
|
@@ -202,10 +215,14 @@ export class HttpStreamSession implements StreamSession {
|
|
|
202
215
|
*/
|
|
203
216
|
async *[Symbol.asyncIterator](): AsyncIterableIterator<Record<string, any>[]> {
|
|
204
217
|
// Yield pre-loaded batches from init
|
|
205
|
-
for (
|
|
218
|
+
for (let batch of this._pendingBatches) {
|
|
206
219
|
if (batch.numRows === 0) {
|
|
207
|
-
|
|
208
|
-
|
|
220
|
+
if (isExternalLocationBatch(batch)) {
|
|
221
|
+
batch = await resolveExternalLocation(batch, this._externalConfig);
|
|
222
|
+
} else {
|
|
223
|
+
dispatchLogOrError(batch, this._onLog);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
209
226
|
}
|
|
210
227
|
yield extractBatchRows(batch);
|
|
211
228
|
}
|
|
@@ -220,7 +237,7 @@ export class HttpStreamSession implements StreamSession {
|
|
|
220
237
|
const { batches } = await readResponseBatches(responseBody);
|
|
221
238
|
|
|
222
239
|
let gotContinuation = false;
|
|
223
|
-
for (
|
|
240
|
+
for (let batch of batches) {
|
|
224
241
|
if (batch.numRows === 0) {
|
|
225
242
|
// Check for continuation token
|
|
226
243
|
const token = batch.metadata?.get(STATE_KEY);
|
|
@@ -229,9 +246,14 @@ export class HttpStreamSession implements StreamSession {
|
|
|
229
246
|
gotContinuation = true;
|
|
230
247
|
continue;
|
|
231
248
|
}
|
|
232
|
-
//
|
|
233
|
-
|
|
234
|
-
|
|
249
|
+
// Check for external location pointer
|
|
250
|
+
if (isExternalLocationBatch(batch)) {
|
|
251
|
+
batch = await resolveExternalLocation(batch, this._externalConfig);
|
|
252
|
+
} else {
|
|
253
|
+
// Log/error batch
|
|
254
|
+
dispatchLogOrError(batch, this._onLog);
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
235
257
|
}
|
|
236
258
|
|
|
237
259
|
yield extractBatchRows(batch);
|
|
@@ -261,6 +283,9 @@ export class HttpStreamSession implements StreamSession {
|
|
|
261
283
|
headers: this._buildHeaders(),
|
|
262
284
|
body: this._prepareBody(body) as unknown as BodyInit,
|
|
263
285
|
});
|
|
286
|
+
if (resp.status === 401) {
|
|
287
|
+
throw new RpcError("AuthenticationError", "Authentication required", "");
|
|
288
|
+
}
|
|
264
289
|
|
|
265
290
|
return this._readResponse(resp);
|
|
266
291
|
}
|
package/src/client/types.ts
CHANGED
|
@@ -5,6 +5,10 @@ export interface HttpConnectOptions {
|
|
|
5
5
|
prefix?: string;
|
|
6
6
|
onLog?: (msg: LogMessage) => void;
|
|
7
7
|
compressionLevel?: number;
|
|
8
|
+
/** Authorization header value (e.g. "Bearer <token>"). Sent with every request. */
|
|
9
|
+
authorization?: string;
|
|
10
|
+
/** External storage config for resolving externalized batches. */
|
|
11
|
+
externalLocation?: import("../external.js").ExternalLocationConfig;
|
|
8
12
|
}
|
|
9
13
|
|
|
10
14
|
export interface LogMessage {
|
|
@@ -22,6 +26,8 @@ export interface StreamSession {
|
|
|
22
26
|
|
|
23
27
|
export interface PipeConnectOptions {
|
|
24
28
|
onLog?: (msg: LogMessage) => void;
|
|
29
|
+
/** External storage config for resolving externalized batches. */
|
|
30
|
+
externalLocation?: import("../external.js").ExternalLocationConfig;
|
|
25
31
|
}
|
|
26
32
|
|
|
27
33
|
export interface SubprocessConnectOptions extends PipeConnectOptions {
|
package/src/constants.ts
CHANGED
|
@@ -15,8 +15,11 @@ export const REQUEST_ID_KEY = "vgi_rpc.request_id";
|
|
|
15
15
|
|
|
16
16
|
export const PROTOCOL_NAME_KEY = "vgi_rpc.protocol_name";
|
|
17
17
|
export const DESCRIBE_VERSION_KEY = "vgi_rpc.describe_version";
|
|
18
|
-
export const DESCRIBE_VERSION = "
|
|
18
|
+
export const DESCRIBE_VERSION = "3";
|
|
19
19
|
|
|
20
20
|
export const DESCRIBE_METHOD_NAME = "__describe__";
|
|
21
21
|
|
|
22
22
|
export const STATE_KEY = "vgi_rpc.stream_state#b64";
|
|
23
|
+
|
|
24
|
+
export const LOCATION_KEY = "vgi_rpc.location";
|
|
25
|
+
export const LOCATION_SHA256_KEY = "vgi_rpc.location.sha256";
|
package/src/dispatch/describe.ts
CHANGED
|
@@ -37,6 +37,8 @@ export const DESCRIBE_SCHEMA = new Schema([
|
|
|
37
37
|
new Field("param_defaults_json", new Utf8(), true),
|
|
38
38
|
new Field("has_header", new Bool(), false),
|
|
39
39
|
new Field("header_schema_ipc", new Binary(), true),
|
|
40
|
+
new Field("is_exchange", new Bool(), true),
|
|
41
|
+
new Field("param_docs_json", new Utf8(), true),
|
|
40
42
|
]);
|
|
41
43
|
|
|
42
44
|
/**
|
|
@@ -60,6 +62,8 @@ export function buildDescribeBatch(
|
|
|
60
62
|
const paramDefaultsJsons: (string | null)[] = [];
|
|
61
63
|
const hasHeaders: boolean[] = [];
|
|
62
64
|
const headerSchemas: (Uint8Array | null)[] = [];
|
|
65
|
+
const isExchanges: (boolean | null)[] = [];
|
|
66
|
+
const paramDocsJsons: (string | null)[] = [];
|
|
63
67
|
|
|
64
68
|
for (const [name, method] of sortedEntries) {
|
|
65
69
|
names.push(name);
|
|
@@ -95,6 +99,18 @@ export function buildDescribeBatch(
|
|
|
95
99
|
|
|
96
100
|
hasHeaders.push(!!method.headerSchema);
|
|
97
101
|
headerSchemas.push(method.headerSchema ? serializeSchema(method.headerSchema) : null);
|
|
102
|
+
|
|
103
|
+
// is_exchange: true for exchange, false for producer, null for unary
|
|
104
|
+
if (method.exchangeFn) {
|
|
105
|
+
isExchanges.push(true);
|
|
106
|
+
} else if (method.producerFn) {
|
|
107
|
+
isExchanges.push(false);
|
|
108
|
+
} else {
|
|
109
|
+
isExchanges.push(null);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// param_docs_json: no docstring source in TypeScript, always null
|
|
113
|
+
paramDocsJsons.push(null);
|
|
98
114
|
}
|
|
99
115
|
|
|
100
116
|
// Build the batch using vectorFromArray for each column
|
|
@@ -108,6 +124,8 @@ export function buildDescribeBatch(
|
|
|
108
124
|
const paramDefaultsArr = vectorFromArray(paramDefaultsJsons, new Utf8());
|
|
109
125
|
const hasHeaderArr = vectorFromArray(hasHeaders, new Bool());
|
|
110
126
|
const headerSchemaArr = vectorFromArray(headerSchemas, new Binary());
|
|
127
|
+
const isExchangeArr = vectorFromArray(isExchanges, new Bool());
|
|
128
|
+
const paramDocsArr = vectorFromArray(paramDocsJsons, new Utf8());
|
|
111
129
|
|
|
112
130
|
const children = [
|
|
113
131
|
nameArr.data[0],
|
|
@@ -120,6 +138,8 @@ export function buildDescribeBatch(
|
|
|
120
138
|
paramDefaultsArr.data[0],
|
|
121
139
|
hasHeaderArr.data[0],
|
|
122
140
|
headerSchemaArr.data[0],
|
|
141
|
+
isExchangeArr.data[0],
|
|
142
|
+
paramDocsArr.data[0],
|
|
123
143
|
];
|
|
124
144
|
|
|
125
145
|
const structType = new Struct(DESCRIBE_SCHEMA.fields);
|
package/src/dispatch/stream.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
// SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
import { Schema } from "@query-farm/apache-arrow";
|
|
5
|
+
import { type ExternalLocationConfig, maybeExternalizeBatch } from "../external.js";
|
|
5
6
|
import type { MethodDefinition } from "../types.js";
|
|
6
7
|
import { OutputCollector } from "../types.js";
|
|
7
8
|
import { conformBatchToSchema } from "../util/conform.js";
|
|
@@ -33,6 +34,7 @@ export async function dispatchStream(
|
|
|
33
34
|
reader: IpcStreamReader,
|
|
34
35
|
serverId: string,
|
|
35
36
|
requestId: string | null,
|
|
37
|
+
externalConfig?: ExternalLocationConfig,
|
|
36
38
|
): Promise<void> {
|
|
37
39
|
const isProducer = !!method.producerFn;
|
|
38
40
|
|
|
@@ -107,12 +109,20 @@ export async function dispatchStream(
|
|
|
107
109
|
let inputBatch = await reader.readNextBatch();
|
|
108
110
|
if (!inputBatch) break;
|
|
109
111
|
|
|
110
|
-
// Cast compatible input types when schema doesn't match exactly
|
|
112
|
+
// Cast compatible input types when schema doesn't match exactly.
|
|
113
|
+
// If conformance fails (e.g., completely different schemas like a dummy
|
|
114
|
+
// registration schema vs actual data), pass the original batch through —
|
|
115
|
+
// the exchange handler may handle dynamic schemas internally.
|
|
111
116
|
if (expectedInputSchema && !isProducer && inputBatch.schema !== expectedInputSchema) {
|
|
112
117
|
try {
|
|
113
118
|
inputBatch = conformBatchToSchema(inputBatch, expectedInputSchema);
|
|
114
|
-
} catch {
|
|
115
|
-
|
|
119
|
+
} catch (e) {
|
|
120
|
+
if (e instanceof TypeError) {
|
|
121
|
+
// Field name/count mismatch — propagate as error (matches Python behavior).
|
|
122
|
+
throw e;
|
|
123
|
+
}
|
|
124
|
+
// Other conformance failures: pass through for dynamic schema handlers.
|
|
125
|
+
console.debug?.(`Schema conformance skipped: ${e instanceof Error ? e.message : e}`);
|
|
116
126
|
}
|
|
117
127
|
}
|
|
118
128
|
|
|
@@ -125,7 +135,11 @@ export async function dispatchStream(
|
|
|
125
135
|
}
|
|
126
136
|
|
|
127
137
|
for (const emitted of out.batches) {
|
|
128
|
-
|
|
138
|
+
let batch = emitted.batch;
|
|
139
|
+
if (externalConfig) {
|
|
140
|
+
batch = await maybeExternalizeBatch(batch, externalConfig);
|
|
141
|
+
}
|
|
142
|
+
stream.write(batch);
|
|
129
143
|
}
|
|
130
144
|
|
|
131
145
|
if (out.finished) {
|
package/src/dispatch/unary.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// © Copyright 2025-2026, Query.Farm LLC - https://query.farm
|
|
2
2
|
// SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
import { type ExternalLocationConfig, maybeExternalizeBatch } from "../external.js";
|
|
4
5
|
import type { MethodDefinition } from "../types.js";
|
|
5
6
|
import { OutputCollector } from "../types.js";
|
|
6
7
|
import { buildErrorBatch, buildResultBatch } from "../wire/response.js";
|
|
@@ -17,13 +18,17 @@ export async function dispatchUnary(
|
|
|
17
18
|
writer: IpcStreamWriter,
|
|
18
19
|
serverId: string,
|
|
19
20
|
requestId: string | null,
|
|
21
|
+
externalConfig?: ExternalLocationConfig,
|
|
20
22
|
): Promise<void> {
|
|
21
23
|
const schema = method.resultSchema;
|
|
22
24
|
const out = new OutputCollector(schema, true, serverId, requestId);
|
|
23
25
|
|
|
24
26
|
try {
|
|
25
27
|
const result = await method.handler!(params, out);
|
|
26
|
-
|
|
28
|
+
let resultBatch = buildResultBatch(schema, result, serverId, requestId);
|
|
29
|
+
if (externalConfig) {
|
|
30
|
+
resultBatch = await maybeExternalizeBatch(resultBatch, externalConfig);
|
|
31
|
+
}
|
|
27
32
|
// Collect log batches (from clientLog) + result batch
|
|
28
33
|
const batches = [...out.batches.map((b) => b.batch), resultBatch];
|
|
29
34
|
writer.writeStream(schema, batches);
|
package/src/external.ts
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
// © Copyright 2025-2026, Query.Farm LLC - https://query.farm
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* External storage support for large Arrow IPC batches.
|
|
6
|
+
*
|
|
7
|
+
* When a batch exceeds a configurable threshold, it is serialized to IPC,
|
|
8
|
+
* optionally compressed with zstd, and uploaded to pluggable storage.
|
|
9
|
+
* The batch is replaced with a zero-row "pointer batch" containing the
|
|
10
|
+
* download URL and SHA-256 checksum in metadata.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { type RecordBatch, RecordBatchReader, RecordBatchStreamWriter, type Schema } from "@query-farm/apache-arrow";
|
|
14
|
+
import { LOCATION_KEY, LOCATION_SHA256_KEY, LOG_LEVEL_KEY } from "./constants.js";
|
|
15
|
+
import { zstdCompress, zstdDecompress } from "./util/zstd.js";
|
|
16
|
+
import { buildEmptyBatch } from "./wire/response.js";
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Interfaces and configuration
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
/** Pluggable storage backend for uploading large batches. */
|
|
23
|
+
export interface ExternalStorage {
|
|
24
|
+
/** Upload IPC data and return a URL for retrieval. */
|
|
25
|
+
upload(data: Uint8Array, contentEncoding: string): Promise<string>;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Configuration for external storage of large batches. */
|
|
29
|
+
export interface ExternalLocationConfig {
|
|
30
|
+
/** Storage backend for uploading. */
|
|
31
|
+
storage: ExternalStorage;
|
|
32
|
+
/** Minimum batch byte size to trigger externalization. Default: 1MB. */
|
|
33
|
+
externalizeThresholdBytes?: number;
|
|
34
|
+
/** Optional zstd compression for uploaded data. */
|
|
35
|
+
compression?: { algorithm: "zstd"; level?: number };
|
|
36
|
+
/** URL validator called before fetching. Throw to reject. Default: HTTPS-only. */
|
|
37
|
+
urlValidator?: ((url: string) => void) | null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const DEFAULT_THRESHOLD = 1_048_576; // 1 MB
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// URL validation
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
/** Default validator that rejects non-HTTPS URLs. */
|
|
47
|
+
export function httpsOnlyValidator(url: string): void {
|
|
48
|
+
const parsed = new URL(url);
|
|
49
|
+
if (parsed.protocol !== "https:") {
|
|
50
|
+
throw new Error(`External location URL must use HTTPS, got "${parsed.protocol}"`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// SHA-256 helpers
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
async function sha256Hex(data: Uint8Array): Promise<string> {
|
|
59
|
+
// Copy to a plain ArrayBuffer to satisfy Web Crypto API type requirements
|
|
60
|
+
const buf = new ArrayBuffer(data.byteLength);
|
|
61
|
+
new Uint8Array(buf).set(data);
|
|
62
|
+
const hash = await crypto.subtle.digest("SHA-256", buf);
|
|
63
|
+
return Array.from(new Uint8Array(hash))
|
|
64
|
+
.map((b) => b.toString(16).padStart(2, "0"))
|
|
65
|
+
.join("");
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Detection
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
/** Returns true if the batch is a zero-row pointer to external data. */
|
|
73
|
+
export function isExternalLocationBatch(batch: RecordBatch): boolean {
|
|
74
|
+
if (batch.numRows !== 0) return false;
|
|
75
|
+
const meta = batch.metadata;
|
|
76
|
+
if (!meta) return false;
|
|
77
|
+
return meta.has(LOCATION_KEY) && !meta.has(LOG_LEVEL_KEY);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// Pointer batch creation
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
/** Create a zero-row pointer batch with location URL and optional SHA-256. */
|
|
85
|
+
export function makeExternalLocationBatch(schema: Schema, url: string, sha256?: string): RecordBatch {
|
|
86
|
+
const metadata = new Map<string, string>();
|
|
87
|
+
metadata.set(LOCATION_KEY, url);
|
|
88
|
+
if (sha256) {
|
|
89
|
+
metadata.set(LOCATION_SHA256_KEY, sha256);
|
|
90
|
+
}
|
|
91
|
+
return buildEmptyBatch(schema, metadata);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
// IPC serialization helpers
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
function serializeBatchToIpc(batch: RecordBatch): Uint8Array {
|
|
99
|
+
const writer = new RecordBatchStreamWriter();
|
|
100
|
+
writer.reset(undefined, batch.schema);
|
|
101
|
+
writer.write(batch);
|
|
102
|
+
writer.close();
|
|
103
|
+
return writer.toUint8Array(true);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function batchByteSize(batch: RecordBatch): number {
|
|
107
|
+
// Arrow TS data.byteLength doesn't reflect actual data size.
|
|
108
|
+
// Estimate from IPC serialization size for threshold check.
|
|
109
|
+
const writer = new RecordBatchStreamWriter();
|
|
110
|
+
writer.reset(undefined, batch.schema);
|
|
111
|
+
writer.write(batch);
|
|
112
|
+
writer.close();
|
|
113
|
+
return writer.toUint8Array(true).byteLength;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// Write path: externalization
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Maybe externalize a batch if it exceeds the threshold.
|
|
122
|
+
* Returns the original batch unchanged if below threshold or no config.
|
|
123
|
+
*/
|
|
124
|
+
export async function maybeExternalizeBatch(
|
|
125
|
+
batch: RecordBatch,
|
|
126
|
+
config?: ExternalLocationConfig | null,
|
|
127
|
+
): Promise<RecordBatch> {
|
|
128
|
+
if (!config?.storage) return batch;
|
|
129
|
+
if (batch.numRows === 0) return batch;
|
|
130
|
+
|
|
131
|
+
const threshold = config.externalizeThresholdBytes ?? DEFAULT_THRESHOLD;
|
|
132
|
+
if (batchByteSize(batch) < threshold) return batch;
|
|
133
|
+
|
|
134
|
+
// Serialize to IPC
|
|
135
|
+
let ipcData = serializeBatchToIpc(batch);
|
|
136
|
+
|
|
137
|
+
// Compute SHA-256 of raw IPC bytes (pre-compression)
|
|
138
|
+
const checksum = await sha256Hex(ipcData);
|
|
139
|
+
|
|
140
|
+
// Optionally compress
|
|
141
|
+
let contentEncoding = "";
|
|
142
|
+
if (config.compression?.algorithm === "zstd") {
|
|
143
|
+
ipcData = zstdCompress(ipcData, config.compression.level ?? 3) as Uint8Array;
|
|
144
|
+
contentEncoding = "zstd";
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Upload
|
|
148
|
+
const url = await config.storage.upload(ipcData, contentEncoding);
|
|
149
|
+
|
|
150
|
+
// Return pointer batch
|
|
151
|
+
return makeExternalLocationBatch(batch.schema, url, checksum);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ---------------------------------------------------------------------------
|
|
155
|
+
// Read path: resolution
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Resolve an external pointer batch by fetching the data from the URL.
|
|
160
|
+
* Returns the original batch unchanged if not a pointer or no config.
|
|
161
|
+
*/
|
|
162
|
+
export async function resolveExternalLocation(
|
|
163
|
+
batch: RecordBatch,
|
|
164
|
+
config?: ExternalLocationConfig | null,
|
|
165
|
+
): Promise<RecordBatch> {
|
|
166
|
+
if (!config) return batch;
|
|
167
|
+
if (!isExternalLocationBatch(batch)) return batch;
|
|
168
|
+
|
|
169
|
+
const url = batch.metadata?.get(LOCATION_KEY);
|
|
170
|
+
if (!url) return batch;
|
|
171
|
+
|
|
172
|
+
// Validate URL
|
|
173
|
+
const validator = config.urlValidator === null ? undefined : (config.urlValidator ?? httpsOnlyValidator);
|
|
174
|
+
if (validator) {
|
|
175
|
+
validator(url);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Fetch
|
|
179
|
+
const response = await fetch(url);
|
|
180
|
+
if (!response.ok) {
|
|
181
|
+
throw new Error(`External location fetch failed: ${response.status} ${response.statusText} [url: ${url}]`);
|
|
182
|
+
}
|
|
183
|
+
let data = new Uint8Array(await response.arrayBuffer());
|
|
184
|
+
|
|
185
|
+
// Decompress if needed
|
|
186
|
+
const contentEncoding = response.headers.get("Content-Encoding");
|
|
187
|
+
if (contentEncoding === "zstd") {
|
|
188
|
+
data = new Uint8Array(zstdDecompress(data));
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Verify SHA-256 if present
|
|
192
|
+
const expectedSha256 = batch.metadata?.get(LOCATION_SHA256_KEY);
|
|
193
|
+
if (expectedSha256) {
|
|
194
|
+
const actualSha256 = await sha256Hex(data);
|
|
195
|
+
if (actualSha256 !== expectedSha256) {
|
|
196
|
+
throw new Error(`SHA-256 checksum mismatch for ${url}: expected ${expectedSha256}, got ${actualSha256}`);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Parse IPC stream
|
|
201
|
+
const reader = await RecordBatchReader.from(data);
|
|
202
|
+
await reader.open();
|
|
203
|
+
const resolved = reader.next();
|
|
204
|
+
if (!resolved || resolved.done || !resolved.value) {
|
|
205
|
+
throw new Error(`No data batch found in external IPC stream from ${url}`);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return resolved.value;
|
|
209
|
+
}
|
package/src/gcs.ts
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
// © Copyright 2025-2026, Query.Farm LLC - https://query.farm
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Google Cloud Storage backend for external storage of large Arrow IPC batches.
|
|
6
|
+
*
|
|
7
|
+
* Requires `@google-cloud/storage` as a peer dependency.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { createGCSStorage } from "@query-farm/vgi-rpc/gcs";
|
|
12
|
+
*
|
|
13
|
+
* const storage = createGCSStorage({
|
|
14
|
+
* bucket: "my-bucket",
|
|
15
|
+
* prefix: "vgi-rpc/",
|
|
16
|
+
* });
|
|
17
|
+
* const handler = createHttpHandler(protocol, {
|
|
18
|
+
* externalLocation: { storage, externalizeThresholdBytes: 1_048_576 },
|
|
19
|
+
* });
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import type { ExternalStorage } from "./external.js";
|
|
24
|
+
|
|
25
|
+
/** Configuration for the GCS storage backend. */
|
|
26
|
+
export interface GCSStorageConfig {
|
|
27
|
+
/** GCS bucket name. */
|
|
28
|
+
bucket: string;
|
|
29
|
+
/** Key prefix for uploaded objects. Default: "vgi-rpc/". */
|
|
30
|
+
prefix?: string;
|
|
31
|
+
/** Lifetime of signed GET URLs in seconds. Default: 3600 (1 hour). */
|
|
32
|
+
presignExpirySeconds?: number;
|
|
33
|
+
/** GCS project ID. If omitted, uses Application Default Credentials. */
|
|
34
|
+
projectId?: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Create a GCS-backed ExternalStorage.
|
|
39
|
+
*
|
|
40
|
+
* Lazily imports `@google-cloud/storage` on first upload to avoid
|
|
41
|
+
* loading the SDK unless needed.
|
|
42
|
+
*/
|
|
43
|
+
export function createGCSStorage(config: GCSStorageConfig): ExternalStorage {
|
|
44
|
+
const bucket = config.bucket;
|
|
45
|
+
const prefix = config.prefix ?? "vgi-rpc/";
|
|
46
|
+
const presignExpiry = config.presignExpirySeconds ?? 3600;
|
|
47
|
+
|
|
48
|
+
let storageClient: any = null;
|
|
49
|
+
|
|
50
|
+
async function ensureClient(): Promise<any> {
|
|
51
|
+
if (storageClient) return storageClient;
|
|
52
|
+
const { Storage } = await import("@google-cloud/storage");
|
|
53
|
+
const clientConfig: Record<string, any> = {};
|
|
54
|
+
if (config.projectId) clientConfig.projectId = config.projectId;
|
|
55
|
+
storageClient = new Storage(clientConfig);
|
|
56
|
+
return storageClient;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
async upload(data: Uint8Array, contentEncoding: string): Promise<string> {
|
|
61
|
+
const client = await ensureClient();
|
|
62
|
+
const bucketRef = client.bucket(bucket);
|
|
63
|
+
const blobName = `${prefix}${crypto.randomUUID()}${contentEncoding === "zstd" ? ".arrow.zst" : ".arrow"}`;
|
|
64
|
+
const blob = bucketRef.file(blobName);
|
|
65
|
+
|
|
66
|
+
const options: Record<string, any> = {
|
|
67
|
+
contentType: "application/vnd.apache.arrow.stream",
|
|
68
|
+
resumable: false,
|
|
69
|
+
};
|
|
70
|
+
if (contentEncoding) {
|
|
71
|
+
options.metadata = { contentEncoding };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
await blob.save(Buffer.from(data), options);
|
|
75
|
+
|
|
76
|
+
// Generate signed GET URL
|
|
77
|
+
const [url] = await blob.getSignedUrl({
|
|
78
|
+
version: "v4" as const,
|
|
79
|
+
action: "read" as const,
|
|
80
|
+
expires: Date.now() + presignExpiry * 1000,
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
return url;
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
}
|