@query-farm/vgi-rpc 0.6.4 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-log.d.ts +50 -0
- package/dist/access-log.d.ts.map +1 -0
- package/dist/arrow/impl-arrowjs/index.d.ts +96 -0
- package/dist/arrow/impl-arrowjs/index.d.ts.map +1 -0
- package/dist/arrow/impl-flechette/index.d.ts +102 -0
- package/dist/arrow/impl-flechette/index.d.ts.map +1 -0
- package/dist/arrow/impl-flechette/message-meta.d.ts +11 -0
- package/dist/arrow/impl-flechette/message-meta.d.ts.map +1 -0
- package/dist/arrow/index.d.ts +4 -0
- package/dist/arrow/index.d.ts.map +1 -0
- package/dist/arrow/predicates.d.ts +44 -0
- package/dist/arrow/predicates.d.ts.map +1 -0
- package/dist/arrow/types.d.ts +62 -0
- package/dist/arrow/types.d.ts.map +1 -0
- package/dist/client/capabilities.d.ts +25 -0
- package/dist/client/capabilities.d.ts.map +1 -0
- package/dist/client/connect.d.ts.map +1 -1
- package/dist/client/introspect.d.ts +7 -0
- package/dist/client/introspect.d.ts.map +1 -1
- package/dist/client/ipc.d.ts +8 -2
- package/dist/client/ipc.d.ts.map +1 -1
- package/dist/client/pipe.d.ts.map +1 -1
- package/dist/client/stream.d.ts +11 -2
- package/dist/client/stream.d.ts.map +1 -1
- package/dist/client/uploadUrl.d.ts +25 -0
- package/dist/client/uploadUrl.d.ts.map +1 -0
- package/dist/constants.d.ts +15 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/crypto.d.ts +22 -0
- package/dist/crypto.d.ts.map +1 -0
- package/dist/dispatch/describe.d.ts +10 -6
- package/dist/dispatch/describe.d.ts.map +1 -1
- package/dist/dispatch/stream.d.ts +2 -2
- package/dist/dispatch/stream.d.ts.map +1 -1
- package/dist/dispatch/unary.d.ts +2 -2
- package/dist/dispatch/unary.d.ts.map +1 -1
- package/dist/errors.d.ts +46 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/external.d.ts +25 -5
- package/dist/external.d.ts.map +1 -1
- package/dist/http/bearer.d.ts.map +1 -1
- package/dist/http/common.d.ts +42 -7
- package/dist/http/common.d.ts.map +1 -1
- package/dist/http/dispatch.d.ts +20 -2
- package/dist/http/dispatch.d.ts.map +1 -1
- package/dist/http/handler.d.ts.map +1 -1
- package/dist/http/index.d.ts +1 -0
- package/dist/http/index.d.ts.map +1 -1
- package/dist/http/mtls.d.ts +2 -1
- package/dist/http/mtls.d.ts.map +1 -1
- package/dist/http/oauth-pkce.d.ts +141 -0
- package/dist/http/oauth-pkce.d.ts.map +1 -0
- package/dist/http/pages.d.ts +3 -0
- package/dist/http/pages.d.ts.map +1 -1
- package/dist/http/sticky.d.ts +124 -0
- package/dist/http/sticky.d.ts.map +1 -0
- package/dist/http/token.d.ts +38 -12
- package/dist/http/token.d.ts.map +1 -1
- package/dist/http/types.d.ts +66 -5
- package/dist/http/types.d.ts.map +1 -1
- package/dist/index.d.ts +6 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1275 -3511
- package/dist/index.js.map +19 -37
- package/dist/launcher/hash.d.ts +22 -0
- package/dist/launcher/hash.d.ts.map +1 -0
- package/dist/launcher/index.d.ts +23 -0
- package/dist/launcher/index.d.ts.map +1 -0
- package/dist/launcher/launch.d.ts +27 -0
- package/dist/launcher/launch.d.ts.map +1 -0
- package/dist/launcher/lock.d.ts +19 -0
- package/dist/launcher/lock.d.ts.map +1 -0
- package/dist/launcher/serve-unix.d.ts +54 -0
- package/dist/launcher/serve-unix.d.ts.map +1 -0
- package/dist/launcher/state.d.ts +59 -0
- package/dist/launcher/state.d.ts.map +1 -0
- package/dist/otel.d.ts.map +1 -1
- package/dist/protocol.d.ts +16 -2
- package/dist/protocol.d.ts.map +1 -1
- package/dist/schema.d.ts +45 -18
- package/dist/schema.d.ts.map +1 -1
- package/dist/server.d.ts +23 -2
- package/dist/server.d.ts.map +1 -1
- package/dist/types.d.ts +216 -12
- package/dist/types.d.ts.map +1 -1
- package/dist/util/gzip.d.ts +10 -0
- package/dist/util/gzip.d.ts.map +1 -0
- package/dist/util/schema.d.ts +3 -15
- package/dist/util/schema.d.ts.map +1 -1
- package/dist/util/web-crypto.d.ts +22 -0
- package/dist/util/web-crypto.d.ts.map +1 -0
- package/dist/util/zstd.d.ts +26 -3
- package/dist/util/zstd.d.ts.map +1 -1
- package/dist/wire/opaque.d.ts +11 -0
- package/dist/wire/opaque.d.ts.map +1 -0
- package/dist/wire/reader.d.ts +5 -5
- package/dist/wire/reader.d.ts.map +1 -1
- package/dist/wire/request.d.ts +11 -3
- package/dist/wire/request.d.ts.map +1 -1
- package/dist/wire/response.d.ts +6 -6
- package/dist/wire/response.d.ts.map +1 -1
- package/dist/wire/writer.d.ts +49 -39
- package/dist/wire/writer.d.ts.map +1 -1
- package/package.json +24 -10
- package/src/access-log.ts +195 -0
- package/src/arrow/impl-arrowjs/index.ts +433 -0
- package/src/arrow/impl-flechette/index.ts +414 -0
- package/src/arrow/impl-flechette/message-meta.ts +174 -0
- package/src/arrow/index.ts +89 -0
- package/src/arrow/predicates.ts +56 -0
- package/src/arrow/types.ts +73 -0
- package/src/client/capabilities.ts +84 -0
- package/src/client/connect.ts +103 -26
- package/src/client/introspect.ts +60 -38
- package/src/client/ipc.ts +37 -27
- package/src/client/pipe.ts +12 -9
- package/src/client/stream.ts +34 -19
- package/src/client/uploadUrl.ts +169 -0
- package/src/constants.ts +18 -1
- package/src/crypto.ts +95 -0
- package/src/dispatch/describe.ts +146 -107
- package/src/dispatch/stream.ts +53 -24
- package/src/dispatch/unary.ts +5 -4
- package/src/errors.ts +76 -0
- package/src/external.ts +43 -29
- package/src/http/bearer.ts +2 -5
- package/src/http/common.ts +90 -23
- package/src/http/dispatch.ts +373 -46
- package/src/http/handler.ts +790 -68
- package/src/http/index.ts +1 -0
- package/src/http/mtls.ts +18 -3
- package/src/http/oauth-pkce.ts +1035 -0
- package/src/http/pages.ts +30 -15
- package/src/http/sticky.ts +429 -0
- package/src/http/token.ts +165 -75
- package/src/http/types.ts +67 -5
- package/src/index.ts +40 -1
- package/src/launcher/hash.ts +104 -0
- package/src/launcher/index.ts +35 -0
- package/src/launcher/launch.ts +284 -0
- package/src/launcher/lock.ts +171 -0
- package/src/launcher/serve-unix.ts +385 -0
- package/src/launcher/state.ts +245 -0
- package/src/otel.ts +39 -33
- package/src/protocol.ts +27 -3
- package/src/schema.ts +107 -56
- package/src/server.ts +196 -20
- package/src/types.ts +322 -18
- package/src/util/gzip.ts +63 -0
- package/src/util/schema.ts +4 -22
- package/src/util/web-crypto.ts +98 -0
- package/src/util/zstd.ts +133 -14
- package/src/wire/opaque.ts +37 -0
- package/src/wire/reader.ts +5 -4
- package/src/wire/request.ts +67 -8
- package/src/wire/response.ts +51 -85
- package/src/wire/writer.ts +165 -69
- package/dist/util/conform.d.ts +0 -18
- package/dist/util/conform.d.ts.map +0 -1
- package/src/util/conform.ts +0 -94
package/src/http/dispatch.ts
CHANGED
|
@@ -1,46 +1,100 @@
|
|
|
1
1
|
// © Copyright 2025-2026, Query.Farm LLC - https://query.farm
|
|
2
2
|
// SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
conformBatchToSchema,
|
|
6
|
+
deserializeBatch,
|
|
7
|
+
deserializeSchema as facadeDeserializeSchema,
|
|
8
|
+
schema as makeSchema,
|
|
9
|
+
serializeBatch,
|
|
10
|
+
type VgiBatch,
|
|
11
|
+
type VgiSchema,
|
|
12
|
+
withBatchMetadata,
|
|
13
|
+
} from "../arrow/index.js";
|
|
5
14
|
import type { AuthContext } from "../auth.js";
|
|
6
|
-
import { STATE_KEY } from "../constants.js";
|
|
15
|
+
import { CANCEL_KEY, STATE_KEY } from "../constants.js";
|
|
7
16
|
import { buildDescribeBatch, DESCRIBE_SCHEMA } from "../dispatch/describe.js";
|
|
8
|
-
import {
|
|
17
|
+
import {
|
|
18
|
+
type ExternalLocationConfig,
|
|
19
|
+
isExternalLocationBatch,
|
|
20
|
+
maybeExternalizeBatch,
|
|
21
|
+
resolveExternalLocation,
|
|
22
|
+
} from "../external.js";
|
|
9
23
|
import type { MethodDefinition } from "../types.js";
|
|
10
|
-
import { OutputCollector } from "../types.js";
|
|
11
|
-
import { conformBatchToSchema } from "../util/conform.js";
|
|
24
|
+
import { OutputCollector, TransportKind } from "../types.js";
|
|
12
25
|
import { serializeSchema } from "../util/schema.js";
|
|
13
|
-
import { parseRequest } from "../wire/request.js";
|
|
26
|
+
import { applyDefaults, parseRequest } from "../wire/request.js";
|
|
14
27
|
import { buildEmptyBatch, buildErrorBatch, buildResultBatch } from "../wire/response.js";
|
|
15
|
-
import { arrowResponse, HttpRpcError, readRequestFromBody, serializeIpcStream } from "./common.js";
|
|
28
|
+
import { appendCookieHeaders, arrowResponse, HttpRpcError, readRequestFromBody, serializeIpcStream } from "./common.js";
|
|
16
29
|
import { packStateToken, unpackStateToken } from "./token.js";
|
|
17
30
|
import type { StateSerializer } from "./types.js";
|
|
18
31
|
|
|
19
|
-
async function deserializeSchema(bytes: Uint8Array): Promise<
|
|
20
|
-
|
|
21
|
-
await reader.open();
|
|
22
|
-
return reader.schema!;
|
|
32
|
+
async function deserializeSchema(bytes: Uint8Array): Promise<VgiSchema> {
|
|
33
|
+
return facadeDeserializeSchema(bytes);
|
|
23
34
|
}
|
|
24
35
|
|
|
25
|
-
const EMPTY_SCHEMA =
|
|
36
|
+
const EMPTY_SCHEMA = makeSchema([]);
|
|
26
37
|
|
|
27
38
|
export interface DispatchContext {
|
|
28
|
-
|
|
39
|
+
tokenKey: Uint8Array;
|
|
29
40
|
tokenTtl: number;
|
|
30
41
|
serverId: string;
|
|
42
|
+
/** Producer-only soft wire-cap (deprecated alias for the producer-loop
|
|
43
|
+
* byte budget). Unary/exchange ignore this. */
|
|
31
44
|
maxStreamResponseBytes?: number;
|
|
45
|
+
/** Soft wire-cap for producer streams; hard wire-cap for unary/exchange.
|
|
46
|
+
* Externalised payloads do not count toward this. */
|
|
47
|
+
maxResponseBytes?: number;
|
|
48
|
+
/** Hard cap on bytes uploaded to external storage during one HTTP response. */
|
|
49
|
+
maxExternalizedResponseBytes?: number;
|
|
32
50
|
stateSerializer: StateSerializer;
|
|
33
51
|
authContext?: AuthContext;
|
|
34
52
|
externalLocation?: ExternalLocationConfig;
|
|
53
|
+
/** Incoming HTTP request cookies. Empty/absent on non-HTTP paths. */
|
|
54
|
+
cookies?: ReadonlyMap<string, string>;
|
|
55
|
+
/** Transport identifier surfaced to handlers via CallContext.kind.
|
|
56
|
+
* Defaults to HTTP when unset (the only caller that overrides it is
|
|
57
|
+
* the AF_UNIX launcher path). */
|
|
58
|
+
kind?: TransportKind;
|
|
59
|
+
/** Per-request sticky-session sink. Installed by the handler when sticky
|
|
60
|
+
* is enabled and the dispatcher attaches it to the OutputCollector so
|
|
61
|
+
* `ctx.session` / `ctx.openSession` / `ctx.closeSession` work. */
|
|
62
|
+
stickyContext?: import("../types.js").StickyContext;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Predict the external upload size if maybeExternalizeBatch ran on this batch
|
|
66
|
+
* right now. Returns 0 when externalisation would not fire. Mirrors the
|
|
67
|
+
* threshold logic so a pre-flight check matches the real upload size. */
|
|
68
|
+
function predictExternalizeBytes(batch: VgiBatch, config: ExternalLocationConfig | undefined): number {
|
|
69
|
+
if (!config?.storage) return 0;
|
|
70
|
+
if (batch.numRows === 0) return 0;
|
|
71
|
+
// arrow-js exposes `.data.byteLength` for an O(1) batch-bytes estimate;
|
|
72
|
+
// flechette doesn't surface this, but maybeExternalizeBatch will measure
|
|
73
|
+
// exact size on the actual upload path. Best-effort prediction here.
|
|
74
|
+
const size = (batch as any).data?.byteLength ?? 0;
|
|
75
|
+
const threshold = config.externalizeThresholdBytes ?? 1_048_576;
|
|
76
|
+
if (size < threshold) return 0;
|
|
77
|
+
return size;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** Build an Arrow IPC stream containing only an EXCEPTION batch, wrapped in a
|
|
81
|
+
* 500 response so common.ts/arrowResponse rewrites it to 200 + X-VGI-RPC-Error.
|
|
82
|
+
* Used for cap-overshoot strict-fail. */
|
|
83
|
+
function makeCapErrorResponse(schema: VgiSchema, error: Error, ctx: DispatchContext): Response {
|
|
84
|
+
const errBatch = buildErrorBatch(schema, error, ctx.serverId, null);
|
|
85
|
+
const response = arrowResponse(serializeIpcStream(schema, [errBatch]), 500);
|
|
86
|
+
(response as any).__dispatchError = error;
|
|
87
|
+
return response;
|
|
35
88
|
}
|
|
36
89
|
|
|
37
90
|
/** Dispatch a __describe__ request. */
|
|
38
|
-
export function httpDispatchDescribe(
|
|
91
|
+
export async function httpDispatchDescribe(
|
|
39
92
|
protocolName: string,
|
|
40
93
|
methods: Map<string, MethodDefinition>,
|
|
41
94
|
serverId: string,
|
|
42
|
-
|
|
43
|
-
|
|
95
|
+
protocolVersion?: string,
|
|
96
|
+
): Promise<Response> {
|
|
97
|
+
const { batch } = await buildDescribeBatch(protocolName, methods, serverId, protocolVersion);
|
|
44
98
|
const body = serializeIpcStream(DESCRIBE_SCHEMA, [batch]);
|
|
45
99
|
return arrowResponse(body);
|
|
46
100
|
}
|
|
@@ -52,26 +106,95 @@ export async function httpDispatchUnary(
|
|
|
52
106
|
ctx: DispatchContext,
|
|
53
107
|
): Promise<Response> {
|
|
54
108
|
const schema = method.resultSchema;
|
|
55
|
-
const { schema: reqSchema, batch:
|
|
56
|
-
|
|
109
|
+
const { schema: reqSchema, batch: reqBatchRaw } = await readRequestFromBody(body);
|
|
110
|
+
|
|
111
|
+
// If the client externalized the request payload, fetch the inner batch
|
|
112
|
+
// and re-attach the outer dispatch metadata (method, version, request id)
|
|
113
|
+
// before parsing parameters. Mirrors the Python _read_request stage-1
|
|
114
|
+
// behaviour in vgi_rpc/rpc/_wire.py.
|
|
115
|
+
let reqBatch = reqBatchRaw;
|
|
116
|
+
let effectiveSchema = reqSchema;
|
|
117
|
+
if (ctx.externalLocation && isExternalLocationBatch(reqBatchRaw)) {
|
|
118
|
+
const resolved = await resolveExternalLocation(reqBatchRaw, ctx.externalLocation);
|
|
119
|
+
const mergedMeta = new Map<string, string>(resolved.metadata ?? []);
|
|
120
|
+
for (const [k, v] of reqBatchRaw.metadata ?? []) {
|
|
121
|
+
// Outer dispatch metadata wins for vgi_rpc.* keys (the inner batch
|
|
122
|
+
// shouldn't carry them but if it does, the outer is authoritative).
|
|
123
|
+
mergedMeta.set(k, v);
|
|
124
|
+
}
|
|
125
|
+
reqBatch = withBatchMetadata(resolved, mergedMeta);
|
|
126
|
+
effectiveSchema = resolved.schema;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const parsed = parseRequest(effectiveSchema, reqBatch);
|
|
57
130
|
|
|
58
131
|
if (parsed.methodName !== method.name) {
|
|
59
132
|
throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
|
|
60
133
|
}
|
|
61
134
|
|
|
62
|
-
|
|
135
|
+
applyDefaults(parsed.params, method.defaults);
|
|
136
|
+
|
|
137
|
+
const externalizationEnabled = !!ctx.externalLocation?.storage;
|
|
138
|
+
const out = new OutputCollector(
|
|
139
|
+
schema,
|
|
140
|
+
true,
|
|
141
|
+
ctx.serverId,
|
|
142
|
+
parsed.requestId,
|
|
143
|
+
ctx.authContext,
|
|
144
|
+
ctx.cookies,
|
|
145
|
+
ctx.kind ?? TransportKind.HTTP,
|
|
146
|
+
{
|
|
147
|
+
// Unary is one-shot: the entire wire and external budgets are
|
|
148
|
+
// available for this single emit.
|
|
149
|
+
remainingResponseBytes: ctx.maxResponseBytes,
|
|
150
|
+
remainingExternalizedResponseBytes: externalizationEnabled ? ctx.maxExternalizedResponseBytes : undefined,
|
|
151
|
+
externalizationEnabled,
|
|
152
|
+
},
|
|
153
|
+
);
|
|
154
|
+
out.enableCookieSink();
|
|
155
|
+
if (ctx.stickyContext) out.attachStickyContext(ctx.stickyContext);
|
|
63
156
|
|
|
64
157
|
try {
|
|
65
158
|
const result = await method.handler!(parsed.params, out);
|
|
66
159
|
let resultBatch = buildResultBatch(schema, result, ctx.serverId, parsed.requestId);
|
|
67
160
|
if (ctx.externalLocation) {
|
|
161
|
+
// Pre-flight max_externalized_response_bytes BEFORE incurring the
|
|
162
|
+
// upload — operator's intent is "don't emit data beyond this per
|
|
163
|
+
// call," not "emit and then complain." Mirror the Python check.
|
|
164
|
+
const predicted = predictExternalizeBytes(resultBatch, ctx.externalLocation);
|
|
165
|
+
if (ctx.maxExternalizedResponseBytes != null && predicted > ctx.maxExternalizedResponseBytes) {
|
|
166
|
+
const overshoot = new Error(
|
|
167
|
+
`Externalised payload exceeds max_externalized_response_bytes (${predicted} > ${ctx.maxExternalizedResponseBytes}) for method '${method.name}'`,
|
|
168
|
+
);
|
|
169
|
+
overshoot.name = "RuntimeError";
|
|
170
|
+
const response = makeCapErrorResponse(schema, overshoot, ctx);
|
|
171
|
+
appendCookieHeaders(response.headers, out.drainResponseCookies());
|
|
172
|
+
return response;
|
|
173
|
+
}
|
|
68
174
|
resultBatch = await maybeExternalizeBatch(resultBatch, ctx.externalLocation);
|
|
69
175
|
}
|
|
70
176
|
const batches = [...out.batches.map((b) => b.batch), resultBatch];
|
|
71
|
-
|
|
177
|
+
const body = serializeIpcStream(schema, batches);
|
|
178
|
+
// Hard wire-cap enforcement — overshoot replaces the response with a
|
|
179
|
+
// fresh EXCEPTION-only stream.
|
|
180
|
+
if (ctx.maxResponseBytes != null && body.byteLength > ctx.maxResponseBytes) {
|
|
181
|
+
const overshoot = new Error(
|
|
182
|
+
`HTTP body exceeds max_response_bytes (${body.byteLength} > ${ctx.maxResponseBytes}) for method '${method.name}'`,
|
|
183
|
+
);
|
|
184
|
+
overshoot.name = "RuntimeError";
|
|
185
|
+
const response = makeCapErrorResponse(schema, overshoot, ctx);
|
|
186
|
+
appendCookieHeaders(response.headers, out.drainResponseCookies());
|
|
187
|
+
return response;
|
|
188
|
+
}
|
|
189
|
+
const response = arrowResponse(body);
|
|
190
|
+
appendCookieHeaders(response.headers, out.drainResponseCookies());
|
|
191
|
+
return response;
|
|
72
192
|
} catch (error: any) {
|
|
73
193
|
const errBatch = buildErrorBatch(schema, error, ctx.serverId, parsed.requestId);
|
|
74
194
|
const response = arrowResponse(serializeIpcStream(schema, [errBatch]), 500);
|
|
195
|
+
// Apply any cookies queued before the exception — matches Python's
|
|
196
|
+
// "cookies-on-error" behavior.
|
|
197
|
+
appendCookieHeaders(response.headers, out.drainResponseCookies());
|
|
75
198
|
// Attach the error so the dispatch hook can see it
|
|
76
199
|
(response as any).__dispatchError = error;
|
|
77
200
|
return response;
|
|
@@ -95,6 +218,8 @@ export async function httpDispatchStreamInit(
|
|
|
95
218
|
throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
|
|
96
219
|
}
|
|
97
220
|
|
|
221
|
+
applyDefaults(parsed.params, method.defaults);
|
|
222
|
+
|
|
98
223
|
// Init state
|
|
99
224
|
let state: any;
|
|
100
225
|
try {
|
|
@@ -113,13 +238,27 @@ export async function httpDispatchStreamInit(
|
|
|
113
238
|
|
|
114
239
|
// Support dynamic output schemas (same as pipe transport)
|
|
115
240
|
const resolvedOutputSchema = state?.__outputSchema ?? outputSchema;
|
|
241
|
+
// Mirror the output-schema override for inputs: VGI's `init` registers
|
|
242
|
+
// exchange with a dummy `_tick` inputSchema and binds the real per-call
|
|
243
|
+
// input shape via state.__inputSchema. Without this, the dummy schema
|
|
244
|
+
// gets baked into the state token and conformBatchToSchema rejects the
|
|
245
|
+
// first real input batch on the next exchange round.
|
|
246
|
+
const resolvedInputSchema = state?.__inputSchema ?? inputSchema;
|
|
116
247
|
const effectiveProducer = state?.__isProducer ?? isProducer;
|
|
117
248
|
|
|
118
249
|
// Build header IPC stream if method has a header schema
|
|
119
250
|
let headerBytes: Uint8Array | null = null;
|
|
120
251
|
if (method.headerSchema && method.headerInit) {
|
|
121
252
|
try {
|
|
122
|
-
const headerOut = new OutputCollector(
|
|
253
|
+
const headerOut = new OutputCollector(
|
|
254
|
+
method.headerSchema,
|
|
255
|
+
true,
|
|
256
|
+
ctx.serverId,
|
|
257
|
+
parsed.requestId,
|
|
258
|
+
ctx.authContext,
|
|
259
|
+
ctx.cookies,
|
|
260
|
+
ctx.kind ?? TransportKind.HTTP,
|
|
261
|
+
);
|
|
123
262
|
const headerValues = method.headerInit(parsed.params, state, headerOut);
|
|
124
263
|
const headerBatch = buildResultBatch(method.headerSchema, headerValues, ctx.serverId, parsed.requestId);
|
|
125
264
|
const headerBatches = [...headerOut.batches.map((b) => b.batch), headerBatch];
|
|
@@ -136,13 +275,21 @@ export async function httpDispatchStreamInit(
|
|
|
136
275
|
// Producer method — produce data inline in the init response.
|
|
137
276
|
// For exchange-registered methods acting as producers (__isProducer),
|
|
138
277
|
// produceStreamResponse falls back to exchangeFn with tick batches.
|
|
139
|
-
return produceStreamResponse(
|
|
278
|
+
return produceStreamResponse(
|
|
279
|
+
method,
|
|
280
|
+
state,
|
|
281
|
+
resolvedOutputSchema,
|
|
282
|
+
resolvedInputSchema,
|
|
283
|
+
ctx,
|
|
284
|
+
parsed.requestId,
|
|
285
|
+
headerBytes,
|
|
286
|
+
);
|
|
140
287
|
} else {
|
|
141
288
|
// Exchange: serialize state into signed token, return zero-row batch with token
|
|
142
289
|
const stateBytes = ctx.stateSerializer.serialize(state);
|
|
143
290
|
const schemaBytes = serializeSchema(resolvedOutputSchema);
|
|
144
|
-
const inputSchemaBytes = serializeSchema(
|
|
145
|
-
const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.
|
|
291
|
+
const inputSchemaBytes = serializeSchema(resolvedInputSchema);
|
|
292
|
+
const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.tokenKey, ctx.authContext?.principal);
|
|
146
293
|
|
|
147
294
|
const tokenMeta = new Map<string, string>();
|
|
148
295
|
tokenMeta.set(STATE_KEY, token);
|
|
@@ -176,9 +323,17 @@ export async function httpDispatchStreamExchange(
|
|
|
176
323
|
throw new HttpRpcError("Missing state token in exchange request", 400);
|
|
177
324
|
}
|
|
178
325
|
|
|
326
|
+
// Cancel signal — observed alongside the state token. Must be checked
|
|
327
|
+
// before conformBatchToSchema so that zero-row empty-schema cancel batches
|
|
328
|
+
// don't fail the cast.
|
|
329
|
+
const cancelled = reqBatch.metadata?.get(CANCEL_KEY) != null;
|
|
330
|
+
|
|
331
|
+
// Bind verification to the caller's identity — a token sealed for
|
|
332
|
+
// principal A will fail AEAD decryption when replayed by principal B
|
|
333
|
+
// (or by an anonymous caller, and vice versa).
|
|
179
334
|
let unpacked: import("./token.js").UnpackedToken;
|
|
180
335
|
try {
|
|
181
|
-
unpacked = unpackStateToken(tokenBase64, ctx.
|
|
336
|
+
unpacked = unpackStateToken(tokenBase64, ctx.tokenKey, ctx.tokenTtl, ctx.authContext?.principal);
|
|
182
337
|
} catch (error: any) {
|
|
183
338
|
throw new HttpRpcError(`Invalid state token: ${error.message}`, 400);
|
|
184
339
|
}
|
|
@@ -193,17 +348,21 @@ export async function httpDispatchStreamExchange(
|
|
|
193
348
|
|
|
194
349
|
// Recover schemas from the token (the state itself may not contain
|
|
195
350
|
// Schema objects after JSON round-trip — always prefer the token).
|
|
196
|
-
let outputSchema:
|
|
351
|
+
let outputSchema: VgiSchema;
|
|
197
352
|
if (unpacked.schemaBytes.length > 0) {
|
|
198
353
|
outputSchema = await deserializeSchema(unpacked.schemaBytes);
|
|
199
354
|
} else {
|
|
200
355
|
outputSchema = state?.__outputSchema ?? method.outputSchema!;
|
|
201
356
|
}
|
|
202
|
-
let inputSchema:
|
|
357
|
+
let inputSchema: VgiSchema;
|
|
203
358
|
if (unpacked.inputSchemaBytes.length > 0) {
|
|
204
359
|
inputSchema = await deserializeSchema(unpacked.inputSchemaBytes);
|
|
205
360
|
} else {
|
|
206
|
-
|
|
361
|
+
// state.__inputSchema mirrors the __outputSchema pattern — set by
|
|
362
|
+
// dynamic-input exchange methods (e.g. VGI's init, which binds to a
|
|
363
|
+
// user-supplied input shape per invocation). Matches the fix already
|
|
364
|
+
// applied in src/dispatch/stream.ts for the subprocess path.
|
|
365
|
+
inputSchema = state?.__inputSchema ?? method.inputSchema ?? EMPTY_SCHEMA;
|
|
207
366
|
}
|
|
208
367
|
const effectiveProducer = state?.__isProducer ?? isProducer;
|
|
209
368
|
if (process.env.VGI_DISPATCH_DEBUG)
|
|
@@ -211,6 +370,20 @@ export async function httpDispatchStreamExchange(
|
|
|
211
370
|
`[httpDispatchStreamExchange] method=${method.name} effectiveProducer=${effectiveProducer} stateKeys=${Object.keys(state || {})}`,
|
|
212
371
|
);
|
|
213
372
|
|
|
373
|
+
if (cancelled) {
|
|
374
|
+
// Client asked for cancellation. Invoke the optional hook once and
|
|
375
|
+
// return an empty IPC stream (no continuation token) so the client
|
|
376
|
+
// knows the stream has ended.
|
|
377
|
+
if (method.onCancel) {
|
|
378
|
+
try {
|
|
379
|
+
await method.onCancel(state);
|
|
380
|
+
} catch (err) {
|
|
381
|
+
console.debug?.(`onCancel hook failed: ${err instanceof Error ? err.message : err}`);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
return arrowResponse(serializeIpcStream(outputSchema, []));
|
|
385
|
+
}
|
|
386
|
+
|
|
214
387
|
if (effectiveProducer) {
|
|
215
388
|
// Producer continuation — produce more data inline.
|
|
216
389
|
// For exchange-registered methods, falls back to exchangeFn with tick batches.
|
|
@@ -219,10 +392,44 @@ export async function httpDispatchStreamExchange(
|
|
|
219
392
|
// Exchange path — also handles exchange-registered methods acting as
|
|
220
393
|
// producers (__isProducer=true). Use producer mode on the OutputCollector
|
|
221
394
|
// when effectiveProducer so finish() is allowed.
|
|
222
|
-
const
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
395
|
+
const externalizationEnabled = !!ctx.externalLocation?.storage;
|
|
396
|
+
const out = new OutputCollector(
|
|
397
|
+
outputSchema,
|
|
398
|
+
effectiveProducer,
|
|
399
|
+
ctx.serverId,
|
|
400
|
+
null,
|
|
401
|
+
ctx.authContext,
|
|
402
|
+
ctx.cookies,
|
|
403
|
+
ctx.kind ?? TransportKind.HTTP,
|
|
404
|
+
{
|
|
405
|
+
// Exchange is lockstep: one process() call, one output batch,
|
|
406
|
+
// one HTTP response. The whole budget belongs to this emit.
|
|
407
|
+
remainingResponseBytes: ctx.maxResponseBytes,
|
|
408
|
+
remainingExternalizedResponseBytes: externalizationEnabled ? ctx.maxExternalizedResponseBytes : undefined,
|
|
409
|
+
externalizationEnabled,
|
|
410
|
+
},
|
|
411
|
+
);
|
|
412
|
+
if (ctx.stickyContext) out.attachStickyContext(ctx.stickyContext);
|
|
413
|
+
|
|
414
|
+
// Cast compatible input types (e.g., decimal→double, int32→int64).
|
|
415
|
+
// Gated on effectiveProducer (not isProducer) so methods that flip to
|
|
416
|
+
// producer mode via state.__isProducer skip the conform entirely — the
|
|
417
|
+
// tick batches they receive have a dummy shape that shouldn't be
|
|
418
|
+
// checked against the declared input schema. Any conformance failure
|
|
419
|
+
// falls through with the original batch; the handler owns input-shape
|
|
420
|
+
// validation if it cares. Mirrors dispatch/stream.ts.
|
|
421
|
+
let conformedBatch = reqBatch;
|
|
422
|
+
if (!effectiveProducer && inputSchema !== EMPTY_SCHEMA && reqBatch.schema !== inputSchema) {
|
|
423
|
+
try {
|
|
424
|
+
conformedBatch = conformBatchToSchema(reqBatch, inputSchema);
|
|
425
|
+
} catch (e) {
|
|
426
|
+
// Field name/count mismatch is a hard contract violation — surface it
|
|
427
|
+
// as an error rather than letting handlers see a wrong-shape batch
|
|
428
|
+
// (mirrors the subprocess dispatch in src/dispatch/stream.ts).
|
|
429
|
+
if (e instanceof TypeError) throw e;
|
|
430
|
+
console.debug?.(`Schema conformance skipped: ${e instanceof Error ? e.message : e}`);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
226
433
|
|
|
227
434
|
try {
|
|
228
435
|
if (method.exchangeFn) {
|
|
@@ -244,27 +451,37 @@ export async function httpDispatchStreamExchange(
|
|
|
244
451
|
}
|
|
245
452
|
|
|
246
453
|
// Collect emitted batches
|
|
247
|
-
const batches:
|
|
454
|
+
const batches: VgiBatch[] = [];
|
|
248
455
|
|
|
249
456
|
if (out.finished) {
|
|
250
457
|
// Stream is done — return data WITHOUT state token.
|
|
251
458
|
// The absence of a token tells the client there's no more data.
|
|
252
459
|
for (const emitted of out.batches) {
|
|
253
|
-
|
|
460
|
+
// Preserve per-emit metadata (vgi_batch_index,
|
|
461
|
+
// vgi_partition_values#b64) as the RecordBatch custom_metadata.
|
|
462
|
+
if (emitted.metadata && emitted.metadata.size > 0) {
|
|
463
|
+
const md = new Map<string, string>(emitted.batch.metadata ?? []);
|
|
464
|
+
for (const [k, v] of emitted.metadata) md.set(k, v);
|
|
465
|
+
batches.push(withBatchMetadata(emitted.batch, md));
|
|
466
|
+
} else {
|
|
467
|
+
batches.push(emitted.batch);
|
|
468
|
+
}
|
|
254
469
|
}
|
|
255
470
|
} else {
|
|
256
471
|
// More data may follow — repack state into token for next exchange.
|
|
257
472
|
const stateBytes = ctx.stateSerializer.serialize(state);
|
|
258
473
|
const schemaBytes = serializeSchema(outputSchema);
|
|
259
474
|
const inputSchemaBytes = serializeSchema(inputSchema);
|
|
260
|
-
const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.
|
|
475
|
+
const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.tokenKey, ctx.authContext?.principal);
|
|
261
476
|
|
|
262
477
|
for (const emitted of out.batches) {
|
|
263
478
|
const batch = emitted.batch;
|
|
264
479
|
if (batch.numRows > 0) {
|
|
265
480
|
const mergedMeta = new Map<string, string>(batch.metadata ?? []);
|
|
481
|
+
// Fold in per-emit metadata (vgi_batch_index, vgi_partition_values#b64).
|
|
482
|
+
if (emitted.metadata) for (const [k, v] of emitted.metadata) mergedMeta.set(k, v);
|
|
266
483
|
mergedMeta.set(STATE_KEY, token);
|
|
267
|
-
batches.push(
|
|
484
|
+
batches.push(withBatchMetadata(batch, mergedMeta));
|
|
268
485
|
} else {
|
|
269
486
|
batches.push(batch);
|
|
270
487
|
}
|
|
@@ -280,7 +497,17 @@ export async function httpDispatchStreamExchange(
|
|
|
280
497
|
}
|
|
281
498
|
}
|
|
282
499
|
|
|
283
|
-
|
|
500
|
+
const body = serializeIpcStream(outputSchema, batches);
|
|
501
|
+
// Hard wire-cap enforcement for stream-exchange — overshoot replaces
|
|
502
|
+
// the response with an EXCEPTION-only stream so the client surfaces RpcError.
|
|
503
|
+
if (ctx.maxResponseBytes != null && body.byteLength > ctx.maxResponseBytes) {
|
|
504
|
+
const overshoot = new Error(
|
|
505
|
+
`HTTP body exceeds max_response_bytes (${body.byteLength} > ${ctx.maxResponseBytes}) for method '${method.name}'`,
|
|
506
|
+
);
|
|
507
|
+
overshoot.name = "RuntimeError";
|
|
508
|
+
return makeCapErrorResponse(outputSchema, overshoot, ctx);
|
|
509
|
+
}
|
|
510
|
+
return arrowResponse(body);
|
|
284
511
|
}
|
|
285
512
|
}
|
|
286
513
|
|
|
@@ -288,19 +515,52 @@ export async function httpDispatchStreamExchange(
|
|
|
288
515
|
async function produceStreamResponse(
|
|
289
516
|
method: MethodDefinition,
|
|
290
517
|
state: any,
|
|
291
|
-
outputSchema:
|
|
292
|
-
inputSchema:
|
|
518
|
+
outputSchema: VgiSchema,
|
|
519
|
+
inputSchema: VgiSchema,
|
|
293
520
|
ctx: DispatchContext,
|
|
294
521
|
requestId: string | null,
|
|
295
522
|
headerBytes: Uint8Array | null,
|
|
296
523
|
): Promise<Response> {
|
|
297
|
-
const allBatches:
|
|
298
|
-
|
|
524
|
+
const allBatches: VgiBatch[] = [];
|
|
525
|
+
// Producer wire cap: prefer the legacy stream-only soft cap when set
|
|
526
|
+
// (lets old callers keep the "one batch per response" hack alive),
|
|
527
|
+
// else fall through to maxResponseBytes (which is hard for unary/
|
|
528
|
+
// exchange but soft for producer — continuation tokens cover overshoot).
|
|
529
|
+
const maxBytes = ctx.maxStreamResponseBytes ?? ctx.maxResponseBytes;
|
|
530
|
+
const maxExternalBytes = ctx.maxExternalizedResponseBytes;
|
|
531
|
+
const externalizationEnabled = !!ctx.externalLocation?.storage;
|
|
299
532
|
let estimatedBytes = 0;
|
|
533
|
+
/** Cumulative external-channel bytes across iterations. External cap is
|
|
534
|
+
* *hard* — externalised uploads have no continuation-token escape valve. */
|
|
535
|
+
let cumulativeExternalBytes = 0;
|
|
300
536
|
let producerError: Error | undefined;
|
|
537
|
+
/** Set when the external cap is breached; the loop replaces the partial
|
|
538
|
+
* stream with an EXCEPTION batch and breaks. */
|
|
539
|
+
let externalOvershoot: Error | undefined;
|
|
301
540
|
|
|
302
541
|
while (true) {
|
|
303
|
-
|
|
542
|
+
// Snapshot per-iteration budgets so the worker can size its emit.
|
|
543
|
+
const remainingWire = maxBytes != null ? Math.max(0, maxBytes - estimatedBytes) : undefined;
|
|
544
|
+
const remainingExternal =
|
|
545
|
+
externalizationEnabled && maxExternalBytes != null
|
|
546
|
+
? Math.max(0, maxExternalBytes - cumulativeExternalBytes)
|
|
547
|
+
: undefined;
|
|
548
|
+
|
|
549
|
+
const out = new OutputCollector(
|
|
550
|
+
outputSchema,
|
|
551
|
+
true,
|
|
552
|
+
ctx.serverId,
|
|
553
|
+
requestId,
|
|
554
|
+
ctx.authContext,
|
|
555
|
+
ctx.cookies,
|
|
556
|
+
ctx.kind ?? TransportKind.HTTP,
|
|
557
|
+
{
|
|
558
|
+
remainingResponseBytes: remainingWire,
|
|
559
|
+
remainingExternalizedResponseBytes: remainingExternal,
|
|
560
|
+
externalizationEnabled,
|
|
561
|
+
},
|
|
562
|
+
);
|
|
563
|
+
if (ctx.stickyContext) out.attachStickyContext(ctx.stickyContext);
|
|
304
564
|
|
|
305
565
|
try {
|
|
306
566
|
if (method.producerFn) {
|
|
@@ -322,12 +582,73 @@ async function produceStreamResponse(
|
|
|
322
582
|
}
|
|
323
583
|
|
|
324
584
|
for (const emitted of out.batches) {
|
|
325
|
-
|
|
585
|
+
let batch = emitted.batch;
|
|
586
|
+
// Externalize before charging wire bytes — externalised payloads
|
|
587
|
+
// ride on the side channel and only the small pointer batch ends
|
|
588
|
+
// up on the wire. Pre-flight check + cumulative accounting mirror
|
|
589
|
+
// Python's _run_http_producer_turn so a worker exfiltrating big
|
|
590
|
+
// batches via tiny pointer outputs still hits the external cap.
|
|
591
|
+
if (externalizationEnabled && ctx.externalLocation) {
|
|
592
|
+
const predicted = predictExternalizeBytes(batch, ctx.externalLocation);
|
|
593
|
+
if (predicted > 0 && maxExternalBytes != null && cumulativeExternalBytes + predicted > maxExternalBytes) {
|
|
594
|
+
externalOvershoot = new Error(
|
|
595
|
+
`Externalised payload exceeds max_externalized_response_bytes (${cumulativeExternalBytes + predicted} > ${maxExternalBytes}) for method '${method.name}'`,
|
|
596
|
+
);
|
|
597
|
+
externalOvershoot.name = "RuntimeError";
|
|
598
|
+
break;
|
|
599
|
+
}
|
|
600
|
+
if (predicted > 0) {
|
|
601
|
+
batch = await maybeExternalizeBatch(batch, ctx.externalLocation);
|
|
602
|
+
cumulativeExternalBytes += predicted;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
// Preserve per-emit metadata (vgi_batch_index, vgi_partition_values#b64)
|
|
606
|
+
// as the RecordBatch custom_metadata so the C++ extension reads it.
|
|
607
|
+
if (emitted.metadata && emitted.metadata.size > 0) {
|
|
608
|
+
const md = new Map<string, string>(batch.metadata ?? []);
|
|
609
|
+
for (const [k, v] of emitted.metadata) md.set(k, v);
|
|
610
|
+
batch = withBatchMetadata(batch, md);
|
|
611
|
+
}
|
|
612
|
+
allBatches.push(batch);
|
|
326
613
|
if (maxBytes != null) {
|
|
327
|
-
|
|
614
|
+
// arrow-js exposes O(1) byteLength via batch.data; flechette has no
|
|
615
|
+
// equivalent. Best-effort estimate via serializeBatch in the latter.
|
|
616
|
+
let sz = (batch as any).data?.byteLength ?? 0;
|
|
617
|
+
if (sz === 0) {
|
|
618
|
+
// Either a zero-row externalisation pointer batch or a flechette
|
|
619
|
+
// batch that doesn't expose `data.byteLength`. The pointer case
|
|
620
|
+
// is real "work done" — the worker's emit became an upload, and
|
|
621
|
+
// we still need to advance the wire-cap loop so it eventually
|
|
622
|
+
// breaks. Use a serialized-size estimate; for pointer batches
|
|
623
|
+
// this captures the metadata-bearing zero-row body, for plain
|
|
624
|
+
// batches it's the actual wire size.
|
|
625
|
+
try {
|
|
626
|
+
sz = serializeBatch(batch).byteLength;
|
|
627
|
+
} catch {
|
|
628
|
+
sz = 0;
|
|
629
|
+
}
|
|
630
|
+
// Producer cancellation contract: the loop MUST make progress
|
|
631
|
+
// every iteration so an externalized infinite producer (e.g.,
|
|
632
|
+
// `cancellable_producer` with externalize-threshold=1) eventually
|
|
633
|
+
// mints a continuation token and lets the client cancel. Charge
|
|
634
|
+
// at least 1 byte when neither byteLength nor serialization
|
|
635
|
+
// gives us a real measurement.
|
|
636
|
+
if (sz === 0) sz = 1;
|
|
637
|
+
}
|
|
638
|
+
estimatedBytes += sz;
|
|
328
639
|
}
|
|
329
640
|
}
|
|
330
641
|
|
|
642
|
+
if (externalOvershoot) {
|
|
643
|
+
// Replace the partial stream with a fresh one carrying only the
|
|
644
|
+
// EXCEPTION batch — clients see RpcError before any data, matching
|
|
645
|
+
// the unary/exchange strict-fail contract.
|
|
646
|
+
allBatches.length = 0;
|
|
647
|
+
allBatches.push(buildErrorBatch(outputSchema, externalOvershoot, ctx.serverId, requestId));
|
|
648
|
+
producerError = externalOvershoot;
|
|
649
|
+
break;
|
|
650
|
+
}
|
|
651
|
+
|
|
331
652
|
if (out.finished) {
|
|
332
653
|
break;
|
|
333
654
|
}
|
|
@@ -337,7 +658,7 @@ async function produceStreamResponse(
|
|
|
337
658
|
const stateBytes = ctx.stateSerializer.serialize(state);
|
|
338
659
|
const schemaBytes = serializeSchema(outputSchema);
|
|
339
660
|
const inputSchemaBytes = serializeSchema(inputSchema);
|
|
340
|
-
const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.
|
|
661
|
+
const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.tokenKey, ctx.authContext?.principal);
|
|
341
662
|
const tokenMeta = new Map<string, string>();
|
|
342
663
|
tokenMeta.set(STATE_KEY, token);
|
|
343
664
|
allBatches.push(buildEmptyBatch(outputSchema, tokenMeta));
|
|
@@ -352,7 +673,13 @@ async function produceStreamResponse(
|
|
|
352
673
|
} else {
|
|
353
674
|
responseBody = dataBytes;
|
|
354
675
|
}
|
|
355
|
-
|
|
676
|
+
// External-cap overshoot is a strict-fail: emit 500 so arrowResponse
|
|
677
|
+
// translates to 200 + X-VGI-RPC-Error. In-handler producer errors
|
|
678
|
+
// stay 200-with-EXCEPTION-batch (the existing contract — clients see
|
|
679
|
+
// RpcError on body decode but proxies don't get the header signal).
|
|
680
|
+
// Mirrors c5c7091 for the cap-overshoot path only.
|
|
681
|
+
const status = externalOvershoot ? 500 : 200;
|
|
682
|
+
const response = arrowResponse(responseBody, status);
|
|
356
683
|
if (producerError) {
|
|
357
684
|
(response as any).__dispatchError = producerError;
|
|
358
685
|
}
|