@query-farm/vgi-rpc 0.6.4 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/dist/access-log.d.ts +55 -0
  2. package/dist/access-log.d.ts.map +1 -0
  3. package/dist/arrow/impl-arrowjs/index.d.ts +96 -0
  4. package/dist/arrow/impl-arrowjs/index.d.ts.map +1 -0
  5. package/dist/arrow/impl-flechette/index.d.ts +102 -0
  6. package/dist/arrow/impl-flechette/index.d.ts.map +1 -0
  7. package/dist/arrow/impl-flechette/message-meta.d.ts +11 -0
  8. package/dist/arrow/impl-flechette/message-meta.d.ts.map +1 -0
  9. package/dist/arrow/index.d.ts +4 -0
  10. package/dist/arrow/index.d.ts.map +1 -0
  11. package/dist/arrow/predicates.d.ts +44 -0
  12. package/dist/arrow/predicates.d.ts.map +1 -0
  13. package/dist/arrow/types.d.ts +62 -0
  14. package/dist/arrow/types.d.ts.map +1 -0
  15. package/dist/auth.d.ts +5 -0
  16. package/dist/auth.d.ts.map +1 -1
  17. package/dist/client/capabilities.d.ts +25 -0
  18. package/dist/client/capabilities.d.ts.map +1 -0
  19. package/dist/client/connect.d.ts +10 -0
  20. package/dist/client/connect.d.ts.map +1 -1
  21. package/dist/client/introspect.d.ts +21 -0
  22. package/dist/client/introspect.d.ts.map +1 -1
  23. package/dist/client/ipc.d.ts +8 -2
  24. package/dist/client/ipc.d.ts.map +1 -1
  25. package/dist/client/oauth.d.ts +9 -0
  26. package/dist/client/oauth.d.ts.map +1 -1
  27. package/dist/client/pipe.d.ts +24 -0
  28. package/dist/client/pipe.d.ts.map +1 -1
  29. package/dist/client/stream.d.ts +19 -2
  30. package/dist/client/stream.d.ts.map +1 -1
  31. package/dist/client/types.d.ts +23 -0
  32. package/dist/client/types.d.ts.map +1 -1
  33. package/dist/client/uploadUrl.d.ts +25 -0
  34. package/dist/client/uploadUrl.d.ts.map +1 -0
  35. package/dist/constants.d.ts +30 -2
  36. package/dist/constants.d.ts.map +1 -1
  37. package/dist/crypto.d.ts +22 -0
  38. package/dist/crypto.d.ts.map +1 -0
  39. package/dist/dispatch/describe.d.ts +10 -6
  40. package/dist/dispatch/describe.d.ts.map +1 -1
  41. package/dist/dispatch/stream.d.ts +2 -2
  42. package/dist/dispatch/stream.d.ts.map +1 -1
  43. package/dist/dispatch/unary.d.ts +2 -2
  44. package/dist/dispatch/unary.d.ts.map +1 -1
  45. package/dist/errors.d.ts +64 -1
  46. package/dist/errors.d.ts.map +1 -1
  47. package/dist/external.d.ts +27 -5
  48. package/dist/external.d.ts.map +1 -1
  49. package/dist/http/auth.d.ts +13 -0
  50. package/dist/http/auth.d.ts.map +1 -1
  51. package/dist/http/bearer.d.ts.map +1 -1
  52. package/dist/http/common.d.ts +43 -7
  53. package/dist/http/common.d.ts.map +1 -1
  54. package/dist/http/dispatch.d.ts +20 -2
  55. package/dist/http/dispatch.d.ts.map +1 -1
  56. package/dist/http/handler.d.ts.map +1 -1
  57. package/dist/http/index.d.ts +1 -0
  58. package/dist/http/index.d.ts.map +1 -1
  59. package/dist/http/jwt.d.ts +1 -0
  60. package/dist/http/jwt.d.ts.map +1 -1
  61. package/dist/http/mtls.d.ts +9 -1
  62. package/dist/http/mtls.d.ts.map +1 -1
  63. package/dist/http/oauth-pkce.d.ts +141 -0
  64. package/dist/http/oauth-pkce.d.ts.map +1 -0
  65. package/dist/http/pages.d.ts +3 -0
  66. package/dist/http/pages.d.ts.map +1 -1
  67. package/dist/http/sticky.d.ts +124 -0
  68. package/dist/http/sticky.d.ts.map +1 -0
  69. package/dist/http/token.d.ts +43 -12
  70. package/dist/http/token.d.ts.map +1 -1
  71. package/dist/http/types.d.ts +68 -5
  72. package/dist/http/types.d.ts.map +1 -1
  73. package/dist/index.d.ts +6 -4
  74. package/dist/index.d.ts.map +1 -1
  75. package/dist/index.js +1275 -3511
  76. package/dist/index.js.map +20 -38
  77. package/dist/launcher/hash.d.ts +22 -0
  78. package/dist/launcher/hash.d.ts.map +1 -0
  79. package/dist/launcher/index.d.ts +23 -0
  80. package/dist/launcher/index.d.ts.map +1 -0
  81. package/dist/launcher/launch.d.ts +27 -0
  82. package/dist/launcher/launch.d.ts.map +1 -0
  83. package/dist/launcher/lock.d.ts +19 -0
  84. package/dist/launcher/lock.d.ts.map +1 -0
  85. package/dist/launcher/serve-unix.d.ts +55 -0
  86. package/dist/launcher/serve-unix.d.ts.map +1 -0
  87. package/dist/launcher/state.d.ts +71 -0
  88. package/dist/launcher/state.d.ts.map +1 -0
  89. package/dist/otel.d.ts.map +1 -1
  90. package/dist/protocol.d.ts +19 -2
  91. package/dist/protocol.d.ts.map +1 -1
  92. package/dist/schema.d.ts +45 -18
  93. package/dist/schema.d.ts.map +1 -1
  94. package/dist/server.d.ts +23 -2
  95. package/dist/server.d.ts.map +1 -1
  96. package/dist/types.d.ts +270 -12
  97. package/dist/types.d.ts.map +1 -1
  98. package/dist/util/gzip.d.ts +10 -0
  99. package/dist/util/gzip.d.ts.map +1 -0
  100. package/dist/util/schema.d.ts +3 -15
  101. package/dist/util/schema.d.ts.map +1 -1
  102. package/dist/util/web-crypto.d.ts +22 -0
  103. package/dist/util/web-crypto.d.ts.map +1 -0
  104. package/dist/util/zstd.d.ts +26 -3
  105. package/dist/util/zstd.d.ts.map +1 -1
  106. package/dist/wire/opaque.d.ts +11 -0
  107. package/dist/wire/opaque.d.ts.map +1 -0
  108. package/dist/wire/reader.d.ts +5 -5
  109. package/dist/wire/reader.d.ts.map +1 -1
  110. package/dist/wire/request.d.ts +11 -3
  111. package/dist/wire/request.d.ts.map +1 -1
  112. package/dist/wire/response.d.ts +6 -6
  113. package/dist/wire/response.d.ts.map +1 -1
  114. package/dist/wire/writer.d.ts +49 -39
  115. package/dist/wire/writer.d.ts.map +1 -1
  116. package/package.json +35 -21
  117. package/src/access-log.ts +200 -0
  118. package/src/arrow/impl-arrowjs/index.ts +433 -0
  119. package/src/arrow/impl-flechette/index.ts +414 -0
  120. package/src/arrow/impl-flechette/message-meta.ts +174 -0
  121. package/src/arrow/index.ts +89 -0
  122. package/src/arrow/predicates.ts +56 -0
  123. package/src/arrow/types.ts +73 -0
  124. package/src/auth.ts +5 -0
  125. package/src/client/capabilities.ts +84 -0
  126. package/src/client/connect.ts +113 -26
  127. package/src/client/introspect.ts +74 -38
  128. package/src/client/ipc.ts +37 -27
  129. package/src/client/oauth.ts +9 -0
  130. package/src/client/pipe.ts +36 -9
  131. package/src/client/stream.ts +43 -20
  132. package/src/client/types.ts +23 -0
  133. package/src/client/uploadUrl.ts +169 -0
  134. package/src/constants.ts +34 -2
  135. package/src/crypto.ts +95 -0
  136. package/src/dispatch/describe.ts +146 -107
  137. package/src/dispatch/stream.ts +53 -24
  138. package/src/dispatch/unary.ts +5 -4
  139. package/src/errors.ts +87 -0
  140. package/src/external.ts +49 -30
  141. package/src/http/auth.ts +13 -0
  142. package/src/http/bearer.ts +2 -5
  143. package/src/http/common.ts +91 -23
  144. package/src/http/dispatch.ts +373 -46
  145. package/src/http/handler.ts +790 -68
  146. package/src/http/index.ts +1 -0
  147. package/src/http/jwt.ts +1 -0
  148. package/src/http/mtls.ts +25 -3
  149. package/src/http/oauth-pkce.ts +1035 -0
  150. package/src/http/pages.ts +30 -15
  151. package/src/http/sticky.ts +429 -0
  152. package/src/http/token.ts +170 -75
  153. package/src/http/types.ts +69 -5
  154. package/src/index.ts +40 -1
  155. package/src/launcher/hash.ts +104 -0
  156. package/src/launcher/index.ts +35 -0
  157. package/src/launcher/launch.ts +284 -0
  158. package/src/launcher/lock.ts +171 -0
  159. package/src/launcher/serve-unix.ts +386 -0
  160. package/src/launcher/state.ts +257 -0
  161. package/src/otel.ts +39 -33
  162. package/src/protocol.ts +30 -3
  163. package/src/schema.ts +107 -56
  164. package/src/server.ts +196 -20
  165. package/src/types.ts +376 -18
  166. package/src/util/gzip.ts +63 -0
  167. package/src/util/schema.ts +4 -22
  168. package/src/util/web-crypto.ts +98 -0
  169. package/src/util/zstd.ts +133 -14
  170. package/src/wire/opaque.ts +37 -0
  171. package/src/wire/reader.ts +5 -4
  172. package/src/wire/request.ts +67 -8
  173. package/src/wire/response.ts +51 -85
  174. package/src/wire/writer.ts +165 -69
  175. package/dist/util/conform.d.ts +0 -18
  176. package/dist/util/conform.d.ts.map +0 -1
  177. package/src/util/conform.ts +0 -94
@@ -1,46 +1,100 @@
1
1
  // © Copyright 2025-2026, Query.Farm LLC - https://query.farm
2
2
  // SPDX-License-Identifier: Apache-2.0
3
3
 
4
- import { RecordBatch, RecordBatchReader, Schema } from "@query-farm/apache-arrow";
4
+ import {
5
+ conformBatchToSchema,
6
+ deserializeBatch,
7
+ deserializeSchema as facadeDeserializeSchema,
8
+ schema as makeSchema,
9
+ serializeBatch,
10
+ type VgiBatch,
11
+ type VgiSchema,
12
+ withBatchMetadata,
13
+ } from "../arrow/index.js";
5
14
  import type { AuthContext } from "../auth.js";
6
- import { STATE_KEY } from "../constants.js";
15
+ import { CANCEL_KEY, STATE_KEY } from "../constants.js";
7
16
  import { buildDescribeBatch, DESCRIBE_SCHEMA } from "../dispatch/describe.js";
8
- import { type ExternalLocationConfig, maybeExternalizeBatch } from "../external.js";
17
+ import {
18
+ type ExternalLocationConfig,
19
+ isExternalLocationBatch,
20
+ maybeExternalizeBatch,
21
+ resolveExternalLocation,
22
+ } from "../external.js";
9
23
  import type { MethodDefinition } from "../types.js";
10
- import { OutputCollector } from "../types.js";
11
- import { conformBatchToSchema } from "../util/conform.js";
24
+ import { OutputCollector, TransportKind } from "../types.js";
12
25
  import { serializeSchema } from "../util/schema.js";
13
- import { parseRequest } from "../wire/request.js";
26
+ import { applyDefaults, parseRequest } from "../wire/request.js";
14
27
  import { buildEmptyBatch, buildErrorBatch, buildResultBatch } from "../wire/response.js";
15
- import { arrowResponse, HttpRpcError, readRequestFromBody, serializeIpcStream } from "./common.js";
28
+ import { appendCookieHeaders, arrowResponse, HttpRpcError, readRequestFromBody, serializeIpcStream } from "./common.js";
16
29
  import { packStateToken, unpackStateToken } from "./token.js";
17
30
  import type { StateSerializer } from "./types.js";
18
31
 
19
- async function deserializeSchema(bytes: Uint8Array): Promise<Schema> {
20
- const reader = await RecordBatchReader.from(bytes);
21
- await reader.open();
22
- return reader.schema!;
32
+ async function deserializeSchema(bytes: Uint8Array): Promise<VgiSchema> {
33
+ return facadeDeserializeSchema(bytes);
23
34
  }
24
35
 
25
- const EMPTY_SCHEMA = new Schema([]);
36
+ const EMPTY_SCHEMA = makeSchema([]);
26
37
 
27
38
  export interface DispatchContext {
28
- signingKey: Uint8Array;
39
+ tokenKey: Uint8Array;
29
40
  tokenTtl: number;
30
41
  serverId: string;
42
+ /** Producer-only soft wire-cap (deprecated alias for the producer-loop
43
+ * byte budget). Unary/exchange ignore this. */
31
44
  maxStreamResponseBytes?: number;
45
+ /** Soft wire-cap for producer streams; hard wire-cap for unary/exchange.
46
+ * Externalised payloads do not count toward this. */
47
+ maxResponseBytes?: number;
48
+ /** Hard cap on bytes uploaded to external storage during one HTTP response. */
49
+ maxExternalizedResponseBytes?: number;
32
50
  stateSerializer: StateSerializer;
33
51
  authContext?: AuthContext;
34
52
  externalLocation?: ExternalLocationConfig;
53
+ /** Incoming HTTP request cookies. Empty/absent on non-HTTP paths. */
54
+ cookies?: ReadonlyMap<string, string>;
55
+ /** Transport identifier surfaced to handlers via CallContext.kind.
56
+ * Defaults to HTTP when unset (the only caller that overrides it is
57
+ * the AF_UNIX launcher path). */
58
+ kind?: TransportKind;
59
+ /** Per-request sticky-session sink. Installed by the handler when sticky
60
+ * is enabled and the dispatcher attaches it to the OutputCollector so
61
+ * `ctx.session` / `ctx.openSession` / `ctx.closeSession` work. */
62
+ stickyContext?: import("../types.js").StickyContext;
63
+ }
64
+
65
+ /** Predict the external upload size if maybeExternalizeBatch ran on this batch
66
+ * right now. Returns 0 when externalisation would not fire. Mirrors the
67
+ * threshold logic so a pre-flight check matches the real upload size. */
68
+ function predictExternalizeBytes(batch: VgiBatch, config: ExternalLocationConfig | undefined): number {
69
+ if (!config?.storage) return 0;
70
+ if (batch.numRows === 0) return 0;
71
+ // arrow-js exposes `.data.byteLength` for an O(1) batch-bytes estimate;
72
+ // flechette doesn't surface this, but maybeExternalizeBatch will measure
73
+ // exact size on the actual upload path. Best-effort prediction here.
74
+ const size = (batch as any).data?.byteLength ?? 0;
75
+ const threshold = config.externalizeThresholdBytes ?? 1_048_576;
76
+ if (size < threshold) return 0;
77
+ return size;
78
+ }
79
+
80
+ /** Build an Arrow IPC stream containing only an EXCEPTION batch, wrapped in a
81
+ * 500 response so common.ts/arrowResponse rewrites it to 200 + X-VGI-RPC-Error.
82
+ * Used for cap-overshoot strict-fail. */
83
+ function makeCapErrorResponse(schema: VgiSchema, error: Error, ctx: DispatchContext): Response {
84
+ const errBatch = buildErrorBatch(schema, error, ctx.serverId, null);
85
+ const response = arrowResponse(serializeIpcStream(schema, [errBatch]), 500);
86
+ (response as any).__dispatchError = error;
87
+ return response;
35
88
  }
36
89
 
37
90
  /** Dispatch a __describe__ request. */
38
- export function httpDispatchDescribe(
91
+ export async function httpDispatchDescribe(
39
92
  protocolName: string,
40
93
  methods: Map<string, MethodDefinition>,
41
94
  serverId: string,
42
- ): Response {
43
- const { batch } = buildDescribeBatch(protocolName, methods, serverId);
95
+ protocolVersion?: string,
96
+ ): Promise<Response> {
97
+ const { batch } = await buildDescribeBatch(protocolName, methods, serverId, protocolVersion);
44
98
  const body = serializeIpcStream(DESCRIBE_SCHEMA, [batch]);
45
99
  return arrowResponse(body);
46
100
  }
@@ -52,26 +106,95 @@ export async function httpDispatchUnary(
52
106
  ctx: DispatchContext,
53
107
  ): Promise<Response> {
54
108
  const schema = method.resultSchema;
55
- const { schema: reqSchema, batch: reqBatch } = await readRequestFromBody(body);
56
- const parsed = parseRequest(reqSchema, reqBatch);
109
+ const { schema: reqSchema, batch: reqBatchRaw } = await readRequestFromBody(body);
110
+
111
+ // If the client externalized the request payload, fetch the inner batch
112
+ // and re-attach the outer dispatch metadata (method, version, request id)
113
+ // before parsing parameters. Mirrors the Python _read_request stage-1
114
+ // behaviour in vgi_rpc/rpc/_wire.py.
115
+ let reqBatch = reqBatchRaw;
116
+ let effectiveSchema = reqSchema;
117
+ if (ctx.externalLocation && isExternalLocationBatch(reqBatchRaw)) {
118
+ const resolved = await resolveExternalLocation(reqBatchRaw, ctx.externalLocation);
119
+ const mergedMeta = new Map<string, string>(resolved.metadata ?? []);
120
+ for (const [k, v] of reqBatchRaw.metadata ?? []) {
121
+ // Outer dispatch metadata wins for vgi_rpc.* keys (the inner batch
122
+ // shouldn't carry them but if it does, the outer is authoritative).
123
+ mergedMeta.set(k, v);
124
+ }
125
+ reqBatch = withBatchMetadata(resolved, mergedMeta);
126
+ effectiveSchema = resolved.schema;
127
+ }
128
+
129
+ const parsed = parseRequest(effectiveSchema, reqBatch);
57
130
 
58
131
  if (parsed.methodName !== method.name) {
59
132
  throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
60
133
  }
61
134
 
62
- const out = new OutputCollector(schema, true, ctx.serverId, parsed.requestId, ctx.authContext);
135
+ applyDefaults(parsed.params, method.defaults);
136
+
137
+ const externalizationEnabled = !!ctx.externalLocation?.storage;
138
+ const out = new OutputCollector(
139
+ schema,
140
+ true,
141
+ ctx.serverId,
142
+ parsed.requestId,
143
+ ctx.authContext,
144
+ ctx.cookies,
145
+ ctx.kind ?? TransportKind.HTTP,
146
+ {
147
+ // Unary is one-shot: the entire wire and external budgets are
148
+ // available for this single emit.
149
+ remainingResponseBytes: ctx.maxResponseBytes,
150
+ remainingExternalizedResponseBytes: externalizationEnabled ? ctx.maxExternalizedResponseBytes : undefined,
151
+ externalizationEnabled,
152
+ },
153
+ );
154
+ out.enableCookieSink();
155
+ if (ctx.stickyContext) out.attachStickyContext(ctx.stickyContext);
63
156
 
64
157
  try {
65
158
  const result = await method.handler!(parsed.params, out);
66
159
  let resultBatch = buildResultBatch(schema, result, ctx.serverId, parsed.requestId);
67
160
  if (ctx.externalLocation) {
161
+ // Pre-flight max_externalized_response_bytes BEFORE incurring the
162
+ // upload — operator's intent is "don't emit data beyond this per
163
+ // call," not "emit and then complain." Mirror the Python check.
164
+ const predicted = predictExternalizeBytes(resultBatch, ctx.externalLocation);
165
+ if (ctx.maxExternalizedResponseBytes != null && predicted > ctx.maxExternalizedResponseBytes) {
166
+ const overshoot = new Error(
167
+ `Externalised payload exceeds max_externalized_response_bytes (${predicted} > ${ctx.maxExternalizedResponseBytes}) for method '${method.name}'`,
168
+ );
169
+ overshoot.name = "RuntimeError";
170
+ const response = makeCapErrorResponse(schema, overshoot, ctx);
171
+ appendCookieHeaders(response.headers, out.drainResponseCookies());
172
+ return response;
173
+ }
68
174
  resultBatch = await maybeExternalizeBatch(resultBatch, ctx.externalLocation);
69
175
  }
70
176
  const batches = [...out.batches.map((b) => b.batch), resultBatch];
71
- return arrowResponse(serializeIpcStream(schema, batches));
177
+ const body = serializeIpcStream(schema, batches);
178
+ // Hard wire-cap enforcement — overshoot replaces the response with a
179
+ // fresh EXCEPTION-only stream.
180
+ if (ctx.maxResponseBytes != null && body.byteLength > ctx.maxResponseBytes) {
181
+ const overshoot = new Error(
182
+ `HTTP body exceeds max_response_bytes (${body.byteLength} > ${ctx.maxResponseBytes}) for method '${method.name}'`,
183
+ );
184
+ overshoot.name = "RuntimeError";
185
+ const response = makeCapErrorResponse(schema, overshoot, ctx);
186
+ appendCookieHeaders(response.headers, out.drainResponseCookies());
187
+ return response;
188
+ }
189
+ const response = arrowResponse(body);
190
+ appendCookieHeaders(response.headers, out.drainResponseCookies());
191
+ return response;
72
192
  } catch (error: any) {
73
193
  const errBatch = buildErrorBatch(schema, error, ctx.serverId, parsed.requestId);
74
194
  const response = arrowResponse(serializeIpcStream(schema, [errBatch]), 500);
195
+ // Apply any cookies queued before the exception — matches Python's
196
+ // "cookies-on-error" behavior.
197
+ appendCookieHeaders(response.headers, out.drainResponseCookies());
75
198
  // Attach the error so the dispatch hook can see it
76
199
  (response as any).__dispatchError = error;
77
200
  return response;
@@ -95,6 +218,8 @@ export async function httpDispatchStreamInit(
95
218
  throw new HttpRpcError(`Method name in request '${parsed.methodName}' does not match URL '${method.name}'`, 400);
96
219
  }
97
220
 
221
+ applyDefaults(parsed.params, method.defaults);
222
+
98
223
  // Init state
99
224
  let state: any;
100
225
  try {
@@ -113,13 +238,27 @@ export async function httpDispatchStreamInit(
113
238
 
114
239
  // Support dynamic output schemas (same as pipe transport)
115
240
  const resolvedOutputSchema = state?.__outputSchema ?? outputSchema;
241
+ // Mirror the output-schema override for inputs: VGI's `init` registers
242
+ // exchange with a dummy `_tick` inputSchema and binds the real per-call
243
+ // input shape via state.__inputSchema. Without this, the dummy schema
244
+ // gets baked into the state token and conformBatchToSchema rejects the
245
+ // first real input batch on the next exchange round.
246
+ const resolvedInputSchema = state?.__inputSchema ?? inputSchema;
116
247
  const effectiveProducer = state?.__isProducer ?? isProducer;
117
248
 
118
249
  // Build header IPC stream if method has a header schema
119
250
  let headerBytes: Uint8Array | null = null;
120
251
  if (method.headerSchema && method.headerInit) {
121
252
  try {
122
- const headerOut = new OutputCollector(method.headerSchema, true, ctx.serverId, parsed.requestId, ctx.authContext);
253
+ const headerOut = new OutputCollector(
254
+ method.headerSchema,
255
+ true,
256
+ ctx.serverId,
257
+ parsed.requestId,
258
+ ctx.authContext,
259
+ ctx.cookies,
260
+ ctx.kind ?? TransportKind.HTTP,
261
+ );
123
262
  const headerValues = method.headerInit(parsed.params, state, headerOut);
124
263
  const headerBatch = buildResultBatch(method.headerSchema, headerValues, ctx.serverId, parsed.requestId);
125
264
  const headerBatches = [...headerOut.batches.map((b) => b.batch), headerBatch];
@@ -136,13 +275,21 @@ export async function httpDispatchStreamInit(
136
275
  // Producer method — produce data inline in the init response.
137
276
  // For exchange-registered methods acting as producers (__isProducer),
138
277
  // produceStreamResponse falls back to exchangeFn with tick batches.
139
- return produceStreamResponse(method, state, resolvedOutputSchema, inputSchema, ctx, parsed.requestId, headerBytes);
278
+ return produceStreamResponse(
279
+ method,
280
+ state,
281
+ resolvedOutputSchema,
282
+ resolvedInputSchema,
283
+ ctx,
284
+ parsed.requestId,
285
+ headerBytes,
286
+ );
140
287
  } else {
141
288
  // Exchange: serialize state into signed token, return zero-row batch with token
142
289
  const stateBytes = ctx.stateSerializer.serialize(state);
143
290
  const schemaBytes = serializeSchema(resolvedOutputSchema);
144
- const inputSchemaBytes = serializeSchema(inputSchema);
145
- const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
291
+ const inputSchemaBytes = serializeSchema(resolvedInputSchema);
292
+ const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.tokenKey, ctx.authContext?.principal);
146
293
 
147
294
  const tokenMeta = new Map<string, string>();
148
295
  tokenMeta.set(STATE_KEY, token);
@@ -176,9 +323,17 @@ export async function httpDispatchStreamExchange(
176
323
  throw new HttpRpcError("Missing state token in exchange request", 400);
177
324
  }
178
325
 
326
+ // Cancel signal — observed alongside the state token. Must be checked
327
+ // before conformBatchToSchema so that zero-row empty-schema cancel batches
328
+ // don't fail the cast.
329
+ const cancelled = reqBatch.metadata?.get(CANCEL_KEY) != null;
330
+
331
+ // Bind verification to the caller's identity — a token sealed for
332
+ // principal A will fail AEAD decryption when replayed by principal B
333
+ // (or by an anonymous caller, and vice versa).
179
334
  let unpacked: import("./token.js").UnpackedToken;
180
335
  try {
181
- unpacked = unpackStateToken(tokenBase64, ctx.signingKey, ctx.tokenTtl);
336
+ unpacked = unpackStateToken(tokenBase64, ctx.tokenKey, ctx.tokenTtl, ctx.authContext?.principal);
182
337
  } catch (error: any) {
183
338
  throw new HttpRpcError(`Invalid state token: ${error.message}`, 400);
184
339
  }
@@ -193,17 +348,21 @@ export async function httpDispatchStreamExchange(
193
348
 
194
349
  // Recover schemas from the token (the state itself may not contain
195
350
  // Schema objects after JSON round-trip — always prefer the token).
196
- let outputSchema: Schema;
351
+ let outputSchema: VgiSchema;
197
352
  if (unpacked.schemaBytes.length > 0) {
198
353
  outputSchema = await deserializeSchema(unpacked.schemaBytes);
199
354
  } else {
200
355
  outputSchema = state?.__outputSchema ?? method.outputSchema!;
201
356
  }
202
- let inputSchema: Schema;
357
+ let inputSchema: VgiSchema;
203
358
  if (unpacked.inputSchemaBytes.length > 0) {
204
359
  inputSchema = await deserializeSchema(unpacked.inputSchemaBytes);
205
360
  } else {
206
- inputSchema = method.inputSchema ?? EMPTY_SCHEMA;
361
+ // state.__inputSchema mirrors the __outputSchema pattern — set by
362
+ // dynamic-input exchange methods (e.g. VGI's init, which binds to a
363
+ // user-supplied input shape per invocation). Matches the fix already
364
+ // applied in src/dispatch/stream.ts for the subprocess path.
365
+ inputSchema = state?.__inputSchema ?? method.inputSchema ?? EMPTY_SCHEMA;
207
366
  }
208
367
  const effectiveProducer = state?.__isProducer ?? isProducer;
209
368
  if (process.env.VGI_DISPATCH_DEBUG)
@@ -211,6 +370,20 @@ export async function httpDispatchStreamExchange(
211
370
  `[httpDispatchStreamExchange] method=${method.name} effectiveProducer=${effectiveProducer} stateKeys=${Object.keys(state || {})}`,
212
371
  );
213
372
 
373
+ if (cancelled) {
374
+ // Client asked for cancellation. Invoke the optional hook once and
375
+ // return an empty IPC stream (no continuation token) so the client
376
+ // knows the stream has ended.
377
+ if (method.onCancel) {
378
+ try {
379
+ await method.onCancel(state);
380
+ } catch (err) {
381
+ console.debug?.(`onCancel hook failed: ${err instanceof Error ? err.message : err}`);
382
+ }
383
+ }
384
+ return arrowResponse(serializeIpcStream(outputSchema, []));
385
+ }
386
+
214
387
  if (effectiveProducer) {
215
388
  // Producer continuation — produce more data inline.
216
389
  // For exchange-registered methods, falls back to exchangeFn with tick batches.
@@ -219,10 +392,44 @@ export async function httpDispatchStreamExchange(
219
392
  // Exchange path — also handles exchange-registered methods acting as
220
393
  // producers (__isProducer=true). Use producer mode on the OutputCollector
221
394
  // when effectiveProducer so finish() is allowed.
222
- const out = new OutputCollector(outputSchema, effectiveProducer, ctx.serverId, null, ctx.authContext);
223
-
224
- // Cast compatible input types (e.g., decimal→double, int32→int64)
225
- const conformedBatch = conformBatchToSchema(reqBatch, inputSchema);
395
+ const externalizationEnabled = !!ctx.externalLocation?.storage;
396
+ const out = new OutputCollector(
397
+ outputSchema,
398
+ effectiveProducer,
399
+ ctx.serverId,
400
+ null,
401
+ ctx.authContext,
402
+ ctx.cookies,
403
+ ctx.kind ?? TransportKind.HTTP,
404
+ {
405
+ // Exchange is lockstep: one process() call, one output batch,
406
+ // one HTTP response. The whole budget belongs to this emit.
407
+ remainingResponseBytes: ctx.maxResponseBytes,
408
+ remainingExternalizedResponseBytes: externalizationEnabled ? ctx.maxExternalizedResponseBytes : undefined,
409
+ externalizationEnabled,
410
+ },
411
+ );
412
+ if (ctx.stickyContext) out.attachStickyContext(ctx.stickyContext);
413
+
414
+ // Cast compatible input types (e.g., decimal→double, int32→int64).
415
+ // Gated on effectiveProducer (not isProducer) so methods that flip to
416
+ // producer mode via state.__isProducer skip the conform entirely — the
417
+ // tick batches they receive have a dummy shape that shouldn't be
418
+ // checked against the declared input schema. Any conformance failure
419
+ // falls through with the original batch; the handler owns input-shape
420
+ // validation if it cares. Mirrors dispatch/stream.ts.
421
+ let conformedBatch = reqBatch;
422
+ if (!effectiveProducer && inputSchema !== EMPTY_SCHEMA && reqBatch.schema !== inputSchema) {
423
+ try {
424
+ conformedBatch = conformBatchToSchema(reqBatch, inputSchema);
425
+ } catch (e) {
426
+ // Field name/count mismatch is a hard contract violation — surface it
427
+ // as an error rather than letting handlers see a wrong-shape batch
428
+ // (mirrors the subprocess dispatch in src/dispatch/stream.ts).
429
+ if (e instanceof TypeError) throw e;
430
+ console.debug?.(`Schema conformance skipped: ${e instanceof Error ? e.message : e}`);
431
+ }
432
+ }
226
433
 
227
434
  try {
228
435
  if (method.exchangeFn) {
@@ -244,27 +451,37 @@ export async function httpDispatchStreamExchange(
244
451
  }
245
452
 
246
453
  // Collect emitted batches
247
- const batches: RecordBatch[] = [];
454
+ const batches: VgiBatch[] = [];
248
455
 
249
456
  if (out.finished) {
250
457
  // Stream is done — return data WITHOUT state token.
251
458
  // The absence of a token tells the client there's no more data.
252
459
  for (const emitted of out.batches) {
253
- batches.push(emitted.batch);
460
+ // Preserve per-emit metadata (vgi_batch_index,
461
+ // vgi_partition_values#b64) as the RecordBatch custom_metadata.
462
+ if (emitted.metadata && emitted.metadata.size > 0) {
463
+ const md = new Map<string, string>(emitted.batch.metadata ?? []);
464
+ for (const [k, v] of emitted.metadata) md.set(k, v);
465
+ batches.push(withBatchMetadata(emitted.batch, md));
466
+ } else {
467
+ batches.push(emitted.batch);
468
+ }
254
469
  }
255
470
  } else {
256
471
  // More data may follow — repack state into token for next exchange.
257
472
  const stateBytes = ctx.stateSerializer.serialize(state);
258
473
  const schemaBytes = serializeSchema(outputSchema);
259
474
  const inputSchemaBytes = serializeSchema(inputSchema);
260
- const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
475
+ const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.tokenKey, ctx.authContext?.principal);
261
476
 
262
477
  for (const emitted of out.batches) {
263
478
  const batch = emitted.batch;
264
479
  if (batch.numRows > 0) {
265
480
  const mergedMeta = new Map<string, string>(batch.metadata ?? []);
481
+ // Fold in per-emit metadata (vgi_batch_index, vgi_partition_values#b64).
482
+ if (emitted.metadata) for (const [k, v] of emitted.metadata) mergedMeta.set(k, v);
266
483
  mergedMeta.set(STATE_KEY, token);
267
- batches.push(new RecordBatch(batch.schema, batch.data, mergedMeta));
484
+ batches.push(withBatchMetadata(batch, mergedMeta));
268
485
  } else {
269
486
  batches.push(batch);
270
487
  }
@@ -280,7 +497,17 @@ export async function httpDispatchStreamExchange(
280
497
  }
281
498
  }
282
499
 
283
- return arrowResponse(serializeIpcStream(outputSchema, batches));
500
+ const body = serializeIpcStream(outputSchema, batches);
501
+ // Hard wire-cap enforcement for stream-exchange — overshoot replaces
502
+ // the response with an EXCEPTION-only stream so the client surfaces RpcError.
503
+ if (ctx.maxResponseBytes != null && body.byteLength > ctx.maxResponseBytes) {
504
+ const overshoot = new Error(
505
+ `HTTP body exceeds max_response_bytes (${body.byteLength} > ${ctx.maxResponseBytes}) for method '${method.name}'`,
506
+ );
507
+ overshoot.name = "RuntimeError";
508
+ return makeCapErrorResponse(outputSchema, overshoot, ctx);
509
+ }
510
+ return arrowResponse(body);
284
511
  }
285
512
  }
286
513
 
@@ -288,19 +515,52 @@ export async function httpDispatchStreamExchange(
288
515
  async function produceStreamResponse(
289
516
  method: MethodDefinition,
290
517
  state: any,
291
- outputSchema: Schema,
292
- inputSchema: Schema,
518
+ outputSchema: VgiSchema,
519
+ inputSchema: VgiSchema,
293
520
  ctx: DispatchContext,
294
521
  requestId: string | null,
295
522
  headerBytes: Uint8Array | null,
296
523
  ): Promise<Response> {
297
- const allBatches: RecordBatch[] = [];
298
- const maxBytes = ctx.maxStreamResponseBytes;
524
+ const allBatches: VgiBatch[] = [];
525
+ // Producer wire cap: prefer the legacy stream-only soft cap when set
526
+ // (lets old callers keep the "one batch per response" hack alive),
527
+ // else fall through to maxResponseBytes (which is hard for unary/
528
+ // exchange but soft for producer — continuation tokens cover overshoot).
529
+ const maxBytes = ctx.maxStreamResponseBytes ?? ctx.maxResponseBytes;
530
+ const maxExternalBytes = ctx.maxExternalizedResponseBytes;
531
+ const externalizationEnabled = !!ctx.externalLocation?.storage;
299
532
  let estimatedBytes = 0;
533
+ /** Cumulative external-channel bytes across iterations. External cap is
534
+ * *hard* — externalised uploads have no continuation-token escape valve. */
535
+ let cumulativeExternalBytes = 0;
300
536
  let producerError: Error | undefined;
537
+ /** Set when the external cap is breached; the loop replaces the partial
538
+ * stream with an EXCEPTION batch and breaks. */
539
+ let externalOvershoot: Error | undefined;
301
540
 
302
541
  while (true) {
303
- const out = new OutputCollector(outputSchema, true, ctx.serverId, requestId, ctx.authContext);
542
+ // Snapshot per-iteration budgets so the worker can size its emit.
543
+ const remainingWire = maxBytes != null ? Math.max(0, maxBytes - estimatedBytes) : undefined;
544
+ const remainingExternal =
545
+ externalizationEnabled && maxExternalBytes != null
546
+ ? Math.max(0, maxExternalBytes - cumulativeExternalBytes)
547
+ : undefined;
548
+
549
+ const out = new OutputCollector(
550
+ outputSchema,
551
+ true,
552
+ ctx.serverId,
553
+ requestId,
554
+ ctx.authContext,
555
+ ctx.cookies,
556
+ ctx.kind ?? TransportKind.HTTP,
557
+ {
558
+ remainingResponseBytes: remainingWire,
559
+ remainingExternalizedResponseBytes: remainingExternal,
560
+ externalizationEnabled,
561
+ },
562
+ );
563
+ if (ctx.stickyContext) out.attachStickyContext(ctx.stickyContext);
304
564
 
305
565
  try {
306
566
  if (method.producerFn) {
@@ -322,12 +582,73 @@ async function produceStreamResponse(
322
582
  }
323
583
 
324
584
  for (const emitted of out.batches) {
325
- allBatches.push(emitted.batch);
585
+ let batch = emitted.batch;
586
+ // Externalize before charging wire bytes — externalised payloads
587
+ // ride on the side channel and only the small pointer batch ends
588
+ // up on the wire. Pre-flight check + cumulative accounting mirror
589
+ // Python's _run_http_producer_turn so a worker exfiltrating big
590
+ // batches via tiny pointer outputs still hits the external cap.
591
+ if (externalizationEnabled && ctx.externalLocation) {
592
+ const predicted = predictExternalizeBytes(batch, ctx.externalLocation);
593
+ if (predicted > 0 && maxExternalBytes != null && cumulativeExternalBytes + predicted > maxExternalBytes) {
594
+ externalOvershoot = new Error(
595
+ `Externalised payload exceeds max_externalized_response_bytes (${cumulativeExternalBytes + predicted} > ${maxExternalBytes}) for method '${method.name}'`,
596
+ );
597
+ externalOvershoot.name = "RuntimeError";
598
+ break;
599
+ }
600
+ if (predicted > 0) {
601
+ batch = await maybeExternalizeBatch(batch, ctx.externalLocation);
602
+ cumulativeExternalBytes += predicted;
603
+ }
604
+ }
605
+ // Preserve per-emit metadata (vgi_batch_index, vgi_partition_values#b64)
606
+ // as the RecordBatch custom_metadata so the C++ extension reads it.
607
+ if (emitted.metadata && emitted.metadata.size > 0) {
608
+ const md = new Map<string, string>(batch.metadata ?? []);
609
+ for (const [k, v] of emitted.metadata) md.set(k, v);
610
+ batch = withBatchMetadata(batch, md);
611
+ }
612
+ allBatches.push(batch);
326
613
  if (maxBytes != null) {
327
- estimatedBytes += emitted.batch.data.byteLength;
614
+ // arrow-js exposes O(1) byteLength via batch.data; flechette has no
615
+ // equivalent. Best-effort estimate via serializeBatch in the latter.
616
+ let sz = (batch as any).data?.byteLength ?? 0;
617
+ if (sz === 0) {
618
+ // Either a zero-row externalisation pointer batch or a flechette
619
+ // batch that doesn't expose `data.byteLength`. The pointer case
620
+ // is real "work done" — the worker's emit became an upload, and
621
+ // we still need to advance the wire-cap loop so it eventually
622
+ // breaks. Use a serialized-size estimate; for pointer batches
623
+ // this captures the metadata-bearing zero-row body, for plain
624
+ // batches it's the actual wire size.
625
+ try {
626
+ sz = serializeBatch(batch).byteLength;
627
+ } catch {
628
+ sz = 0;
629
+ }
630
+ // Producer cancellation contract: the loop MUST make progress
631
+ // every iteration so an externalized infinite producer (e.g.,
632
+ // `cancellable_producer` with externalize-threshold=1) eventually
633
+ // mints a continuation token and lets the client cancel. Charge
634
+ // at least 1 byte when neither byteLength nor serialization
635
+ // gives us a real measurement.
636
+ if (sz === 0) sz = 1;
637
+ }
638
+ estimatedBytes += sz;
328
639
  }
329
640
  }
330
641
 
642
+ if (externalOvershoot) {
643
+ // Replace the partial stream with a fresh one carrying only the
644
+ // EXCEPTION batch — clients see RpcError before any data, matching
645
+ // the unary/exchange strict-fail contract.
646
+ allBatches.length = 0;
647
+ allBatches.push(buildErrorBatch(outputSchema, externalOvershoot, ctx.serverId, requestId));
648
+ producerError = externalOvershoot;
649
+ break;
650
+ }
651
+
331
652
  if (out.finished) {
332
653
  break;
333
654
  }
@@ -337,7 +658,7 @@ async function produceStreamResponse(
337
658
  const stateBytes = ctx.stateSerializer.serialize(state);
338
659
  const schemaBytes = serializeSchema(outputSchema);
339
660
  const inputSchemaBytes = serializeSchema(inputSchema);
340
- const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.signingKey);
661
+ const token = packStateToken(stateBytes, schemaBytes, inputSchemaBytes, ctx.tokenKey, ctx.authContext?.principal);
341
662
  const tokenMeta = new Map<string, string>();
342
663
  tokenMeta.set(STATE_KEY, token);
343
664
  allBatches.push(buildEmptyBatch(outputSchema, tokenMeta));
@@ -352,7 +673,13 @@ async function produceStreamResponse(
352
673
  } else {
353
674
  responseBody = dataBytes;
354
675
  }
355
- const response = arrowResponse(responseBody);
676
+ // External-cap overshoot is a strict-fail: emit 500 so arrowResponse
677
+ // translates to 200 + X-VGI-RPC-Error. In-handler producer errors
678
+ // stay 200-with-EXCEPTION-batch (the existing contract — clients see
679
+ // RpcError on body decode but proxies don't get the header signal).
680
+ // Mirrors c5c7091 for the cap-overshoot path only.
681
+ const status = externalOvershoot ? 500 : 200;
682
+ const response = arrowResponse(responseBody, status);
356
683
  if (producerError) {
357
684
  (response as any).__dispatchError = producerError;
358
685
  }