@mcoda/codali 0.1.87 → 0.1.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/cli/EvalCommand.d.ts +8 -0
  2. package/dist/cli/EvalCommand.d.ts.map +1 -1
  3. package/dist/cli/EvalCommand.js +93 -1
  4. package/dist/cli.d.ts.map +1 -1
  5. package/dist/cli.js +1 -0
  6. package/dist/docdex/DocdexClient.d.ts +8 -1
  7. package/dist/docdex/DocdexClient.d.ts.map +1 -1
  8. package/dist/docdex/DocdexClient.js +126 -33
  9. package/dist/eval/CodaliGatewayLiveHarness.d.ts +169 -0
  10. package/dist/eval/CodaliGatewayLiveHarness.d.ts.map +1 -0
  11. package/dist/eval/CodaliGatewayLiveHarness.js +824 -0
  12. package/dist/eval/GatewayEvalSuite.d.ts +202 -0
  13. package/dist/eval/GatewayEvalSuite.d.ts.map +1 -0
  14. package/dist/eval/GatewayEvalSuite.js +673 -0
  15. package/dist/gateway/AgentTierResolver.d.ts +74 -0
  16. package/dist/gateway/AgentTierResolver.d.ts.map +1 -0
  17. package/dist/gateway/AgentTierResolver.js +576 -0
  18. package/dist/gateway/AppToolGatewayDispatcher.d.ts +88 -0
  19. package/dist/gateway/AppToolGatewayDispatcher.d.ts.map +1 -0
  20. package/dist/gateway/AppToolGatewayDispatcher.js +381 -0
  21. package/dist/gateway/CodaliGateway.d.ts +73 -0
  22. package/dist/gateway/CodaliGateway.d.ts.map +1 -0
  23. package/dist/gateway/CodaliGateway.js +824 -0
  24. package/dist/gateway/CodaliGatewaySchemas.d.ts +21 -0
  25. package/dist/gateway/CodaliGatewaySchemas.d.ts.map +1 -0
  26. package/dist/gateway/CodaliGatewaySchemas.js +874 -0
  27. package/dist/gateway/CodaliGatewayStore.d.ts +157 -0
  28. package/dist/gateway/CodaliGatewayStore.d.ts.map +1 -0
  29. package/dist/gateway/CodaliGatewayStore.js +206 -0
  30. package/dist/gateway/CodaliGatewayTypes.d.ts +336 -0
  31. package/dist/gateway/CodaliGatewayTypes.d.ts.map +1 -0
  32. package/dist/gateway/CodaliGatewayTypes.js +1 -0
  33. package/dist/gateway/ContextPackBuilder.d.ts +43 -0
  34. package/dist/gateway/ContextPackBuilder.d.ts.map +1 -0
  35. package/dist/gateway/ContextPackBuilder.js +317 -0
  36. package/dist/gateway/EvidenceNormalizer.d.ts +42 -0
  37. package/dist/gateway/EvidenceNormalizer.d.ts.map +1 -0
  38. package/dist/gateway/EvidenceNormalizer.js +488 -0
  39. package/dist/gateway/GatewayPlanner.d.ts +195 -0
  40. package/dist/gateway/GatewayPlanner.d.ts.map +1 -0
  41. package/dist/gateway/GatewayPlanner.js +379 -0
  42. package/dist/gateway/GatewayPolicyCompiler.d.ts +30 -0
  43. package/dist/gateway/GatewayPolicyCompiler.d.ts.map +1 -0
  44. package/dist/gateway/GatewayPolicyCompiler.js +114 -0
  45. package/dist/gateway/GatewaySecurityPolicy.d.ts +14 -0
  46. package/dist/gateway/GatewaySecurityPolicy.d.ts.map +1 -0
  47. package/dist/gateway/GatewaySecurityPolicy.js +350 -0
  48. package/dist/gateway/GatewayStateMachine.d.ts +165 -0
  49. package/dist/gateway/GatewayStateMachine.d.ts.map +1 -0
  50. package/dist/gateway/GatewayStateMachine.js +790 -0
  51. package/dist/gateway/GatewayTraceReplay.d.ts +120 -0
  52. package/dist/gateway/GatewayTraceReplay.d.ts.map +1 -0
  53. package/dist/gateway/GatewayTraceReplay.js +273 -0
  54. package/dist/gateway/ToolCapabilityCompiler.d.ts +50 -0
  55. package/dist/gateway/ToolCapabilityCompiler.d.ts.map +1 -0
  56. package/dist/gateway/ToolCapabilityCompiler.js +442 -0
  57. package/dist/index.d.ts +33 -1
  58. package/dist/index.d.ts.map +1 -1
  59. package/dist/index.js +16 -0
  60. package/dist/runtime/CodaliJobRuntime.d.ts +211 -0
  61. package/dist/runtime/CodaliJobRuntime.d.ts.map +1 -0
  62. package/dist/runtime/CodaliJobRuntime.js +590 -0
  63. package/dist/runtime/CodaliRuntime.d.ts +81 -1
  64. package/dist/runtime/CodaliRuntime.d.ts.map +1 -1
  65. package/dist/runtime/CodaliRuntime.js +619 -4
  66. package/dist/tools/ToolRegistry.d.ts.map +1 -1
  67. package/dist/tools/ToolRegistry.js +4 -0
  68. package/dist/tools/ToolTypes.d.ts +1 -1
  69. package/dist/tools/ToolTypes.d.ts.map +1 -1
  70. package/dist/tools/ToolTypes.js +5 -1
  71. package/package.json +3 -3
@@ -0,0 +1,790 @@
1
+ import { createInMemoryCodaliGatewayStore, } from "./CodaliGatewayStore.js";
2
+ import { compileCodaliGatewayPolicy, } from "./GatewayPolicyCompiler.js";
3
+ import { normalizeCodaliEvidence } from "./EvidenceNormalizer.js";
4
+ import { validateCodaliGatewayVerifierOutput } from "./CodaliGatewaySchemas.js";
5
+ import { CODALI_GATEWAY_SECURITY_PROMPT_HARDENING } from "./GatewaySecurityPolicy.js";
6
+ const DEFAULT_PER_TASK_TIMEOUT_MS = 30000;
7
+ const positiveInteger = (value, fallback) => Number.isFinite(value) && value !== undefined && value > 0
8
+ ? Math.floor(value)
9
+ : fallback;
10
+ const nonNegativeInteger = (value, fallback) => Number.isFinite(value) && value !== undefined && value >= 0
11
+ ? Math.floor(value)
12
+ : fallback;
13
+ const isRecord = (value) => Boolean(value && typeof value === "object" && !Array.isArray(value));
14
+ const readBoolean = (metadata, key) => isRecord(metadata) && typeof metadata[key] === "boolean"
15
+ ? metadata[key]
16
+ : undefined;
17
+ const isRequiredWorkerTask = (task) => {
18
+ if (readBoolean(task.metadata, "required") === false)
19
+ return false;
20
+ if (readBoolean(task.metadata, "optional") === true)
21
+ return false;
22
+ return true;
23
+ };
24
+ const uniqueInOrder = (values) => {
25
+ const seen = new Set();
26
+ const output = [];
27
+ for (const value of values) {
28
+ if (seen.has(value))
29
+ continue;
30
+ seen.add(value);
31
+ output.push(value);
32
+ }
33
+ return output;
34
+ };
35
+ const requestHasTenantScope = (request) => Boolean(request.tenant?.id || request.tenant?.slug || request.tenant?.realm);
36
+ const isImageArtifact = (artifact) => {
37
+ const type = artifact.type.toLowerCase();
38
+ if (type === "image" || type.startsWith("image/") || type.includes("image")) {
39
+ return true;
40
+ }
41
+ const metadata = isRecord(artifact.metadata) ? artifact.metadata : undefined;
42
+ const mimeType = typeof metadata?.mimeType === "string"
43
+ ? metadata.mimeType
44
+ : typeof metadata?.mime_type === "string"
45
+ ? metadata.mime_type
46
+ : undefined;
47
+ return Boolean(mimeType?.toLowerCase().startsWith("image/"));
48
+ };
49
+ export const buildCodaliGatewayWorkerPrompt = (input) => [
50
+ "You are a Codali gateway worker.",
51
+ "Gather evidence only.",
52
+ "Do not answer the user.",
53
+ "Output JSON only.",
54
+ CODALI_GATEWAY_SECURITY_PROMPT_HARDENING.toolOutputBoundary,
55
+ CODALI_GATEWAY_SECURITY_PROMPT_HARDENING.policyImmutability,
56
+ CODALI_GATEWAY_SECURITY_PROMPT_HARDENING.tenantScope,
57
+ "Return structured evidence, source references, tool telemetry, and any errors.",
58
+ `User query: ${input.request.query}`,
59
+ `Task id: ${input.task.id}`,
60
+ `Worker role: ${input.task.workerRole}`,
61
+ `Objective: ${input.task.objective}`,
62
+ `Task query: ${input.task.query ?? input.request.query}`,
63
+ `Output format: ${input.task.outputFormat}`,
64
+ `Allowed tools: ${input.allowedTools.length > 0 ? input.allowedTools.join(", ") : "none"}`,
65
+ `Remaining tool calls: ${input.remainingToolCalls}`,
66
+ `Remaining model calls: ${input.remainingModelCalls}`,
67
+ input.task.expectedSources?.length
68
+ ? `Expected sources: ${input.task.expectedSources.join(", ")}`
69
+ : "Expected sources: none specified",
70
+ input.task.constraints?.length
71
+ ? `Constraints: ${input.task.constraints.join("; ")}`
72
+ : "Constraints: none specified",
73
+ ].join("\n");
74
+ const timeoutError = (taskId, timeoutMs) => {
75
+ const error = new Error(`GATEWAY_WORKER_TIMEOUT: ${taskId} exceeded ${timeoutMs}ms`);
76
+ error.name = "CodaliGatewayWorkerTimeoutError";
77
+ return error;
78
+ };
79
+ const withTimeout = async (promise, timeoutMs, taskId) => new Promise((resolve, reject) => {
80
+ const timer = setTimeout(() => reject(timeoutError(taskId, timeoutMs)), timeoutMs);
81
+ promise.then((value) => {
82
+ clearTimeout(timer);
83
+ resolve(value);
84
+ }, (error) => {
85
+ clearTimeout(timer);
86
+ reject(error);
87
+ });
88
+ });
89
+ export class CodaliGatewayStateMachine {
90
+ constructor(options) {
91
+ this.options = options;
92
+ this.store = options.store ?? createInMemoryCodaliGatewayStore();
93
+ this.now = options.now ?? (() => Date.now());
94
+ }
95
+ async execute(input) {
96
+ const policyCompilation = input.policyCompilation ?? compileCodaliGatewayPolicy({ request: input.request });
97
+ if (!policyCompilation.ok) {
98
+ throw new Error("GATEWAY_POLICY_COMPILE_FAILED: Cannot execute worker tasks.");
99
+ }
100
+ const maxParallelWorkers = positiveInteger(this.options.maxParallelWorkers, policyCompilation.jobBudgets.maxParallelStages ?? 1);
101
+ const maxRuntimeMs = positiveInteger(this.options.maxRuntimeMs, policyCompilation.security.limits.maxRuntimeMs);
102
+ const perTaskTimeoutMs = Math.min(positiveInteger(this.options.perTaskTimeoutMs, DEFAULT_PER_TASK_TIMEOUT_MS), maxRuntimeMs);
103
+ const maxToolCalls = nonNegativeInteger(this.options.maxToolCalls, policyCompilation.security.limits.maxToolCalls);
104
+ const maxModelCalls = nonNegativeInteger(this.options.maxModelCalls, policyCompilation.security.limits.maxModelCalls);
105
+ const maxEvidenceItems = nonNegativeInteger(undefined, policyCompilation.security.limits.maxEvidenceItems);
106
+ const maxImageArtifacts = nonNegativeInteger(this.options.maxImageArtifacts, policyCompilation.security.limits.maxImageArtifacts);
107
+ const maxVerificationIterations = nonNegativeInteger(input.request.policy.maxIterations, 1);
108
+ const startedAtMs = this.now();
109
+ const warnings = [];
110
+ const errors = [];
111
+ const taskResults = [];
112
+ const preparedTasks = await this.prepareTasks(input, policyCompilation, warnings);
113
+ const verificationIterations = [];
114
+ const rejectedFollowUpTasks = [];
115
+ const calledTools = new Set();
116
+ const initialTrace = await this.store.readRunTrace(input.runId);
117
+ let toolCallCount = 0;
118
+ let modelCallCount = initialTrace?.modelCalls.length ?? 0;
119
+ let cursor = 0;
120
+ let requiredFailure = false;
121
+ let followUpTaskCount = 0;
122
+ let verificationStopReason = this.options.verifierRunner
123
+ ? "not_started"
124
+ : "not_configured";
125
+ await this.store.updateRun(input.runId, {
126
+ status: "running",
127
+ metadata: {
128
+ phase: "worker_task_executor",
129
+ workerTaskCount: preparedTasks.length,
130
+ maxParallelWorkers,
131
+ maxRuntimeMs,
132
+ perTaskTimeoutMs,
133
+ maxToolCalls,
134
+ maxModelCalls,
135
+ maxEvidenceItems,
136
+ maxImageArtifacts,
137
+ initialModelCallCount: modelCallCount,
138
+ verifierEnabled: Boolean(this.options.verifierRunner),
139
+ maxVerificationIterations,
140
+ },
141
+ });
142
+ while (!requiredFailure) {
143
+ while (cursor < preparedTasks.length && !requiredFailure) {
144
+ const wave = [];
145
+ let waveToolTaskReservations = 0;
146
+ let waveModelTaskReservations = 0;
147
+ while (cursor < preparedTasks.length && wave.length < maxParallelWorkers) {
148
+ const task = preparedTasks[cursor];
149
+ cursor += 1;
150
+ if (this.now() - startedAtMs >= maxRuntimeMs) {
151
+ const result = await this.skipTask(input.runId, task, "max_runtime_exhausted");
152
+ taskResults.push(result);
153
+ warnings.push(`worker_task_skipped:${task.task.id}:max_runtime_exhausted`);
154
+ continue;
155
+ }
156
+ if (modelCallCount + waveModelTaskReservations >= maxModelCalls) {
157
+ const result = await this.skipTask(input.runId, task, "model_budget_exhausted");
158
+ taskResults.push(result);
159
+ warnings.push(`worker_task_skipped:${task.task.id}:model_budget_exhausted`);
160
+ continue;
161
+ }
162
+ const needsToolBudget = task.allowedTools.length > 0;
163
+ if (needsToolBudget && toolCallCount + waveToolTaskReservations >= maxToolCalls) {
164
+ const result = await this.skipTask(input.runId, task, "tool_budget_exhausted");
165
+ taskResults.push(result);
166
+ warnings.push(`worker_task_skipped:${task.task.id}:tool_budget_exhausted`);
167
+ continue;
168
+ }
169
+ if (needsToolBudget) {
170
+ waveToolTaskReservations += 1;
171
+ }
172
+ waveModelTaskReservations += 1;
173
+ wave.push(task);
174
+ }
175
+ if (wave.length === 0) {
176
+ continue;
177
+ }
178
+ const waveResults = await Promise.all(wave.map((task) => this.runTask({
179
+ input,
180
+ policyCompilation,
181
+ task,
182
+ remainingToolCalls: Math.max(0, maxToolCalls - toolCallCount),
183
+ remainingModelCalls: Math.max(0, maxModelCalls - modelCallCount),
184
+ perTaskTimeoutMs,
185
+ })));
186
+ for (const result of waveResults) {
187
+ taskResults.push(result);
188
+ toolCallCount += result.toolCallCount;
189
+ modelCallCount += result.modelCallCount;
190
+ for (const tool of result.calledTools) {
191
+ if (result.toolCallCount > 0) {
192
+ calledTools.add(tool);
193
+ }
194
+ }
195
+ if (result.status === "failed") {
196
+ const code = result.errorCode ?? "GATEWAY_WORKER_FAILED";
197
+ const label = `${result.taskId}:${code}`;
198
+ if (result.required) {
199
+ errors.push(label);
200
+ requiredFailure = true;
201
+ }
202
+ else {
203
+ warnings.push(`optional_worker_failed:${label}`);
204
+ errors.push(`optional:${label}`);
205
+ }
206
+ }
207
+ }
208
+ if (toolCallCount >= maxToolCalls) {
209
+ while (cursor < preparedTasks.length) {
210
+ const task = preparedTasks[cursor];
211
+ cursor += 1;
212
+ if (task.allowedTools.length === 0) {
213
+ taskResults.push(await this.runTask({
214
+ input,
215
+ policyCompilation,
216
+ task,
217
+ remainingToolCalls: 0,
218
+ remainingModelCalls: Math.max(0, maxModelCalls - modelCallCount),
219
+ perTaskTimeoutMs,
220
+ }));
221
+ continue;
222
+ }
223
+ const result = await this.skipTask(input.runId, task, "tool_budget_exhausted");
224
+ taskResults.push(result);
225
+ warnings.push(`worker_task_skipped:${task.task.id}:tool_budget_exhausted`);
226
+ }
227
+ }
228
+ if (modelCallCount >= maxModelCalls) {
229
+ while (cursor < preparedTasks.length) {
230
+ const task = preparedTasks[cursor];
231
+ cursor += 1;
232
+ const result = await this.skipTask(input.runId, task, "model_budget_exhausted");
233
+ taskResults.push(result);
234
+ warnings.push(`worker_task_skipped:${task.task.id}:model_budget_exhausted`);
235
+ }
236
+ }
237
+ }
238
+ if (requiredFailure || !this.options.verifierRunner) {
239
+ break;
240
+ }
241
+ if (verificationIterations.length >= maxVerificationIterations) {
242
+ verificationStopReason = "max_iterations_reached";
243
+ warnings.push("verification_stop:max_iterations_reached");
244
+ break;
245
+ }
246
+ if (modelCallCount >= maxModelCalls) {
247
+ verificationStopReason = "model_budget_exhausted";
248
+ warnings.push("verification_stop:model_budget_exhausted");
249
+ break;
250
+ }
251
+ const verifierResult = await this.runVerifierIteration({
252
+ input,
253
+ planner: {
254
+ ...input.planner,
255
+ workerTasks: preparedTasks.map((task) => task.task),
256
+ },
257
+ policyCompilation,
258
+ iteration: verificationIterations.length + 1,
259
+ taskResults,
260
+ preparedTasks,
261
+ maxToolCalls,
262
+ toolCallCount,
263
+ });
264
+ modelCallCount += 1;
265
+ rejectedFollowUpTasks.push(...verifierResult.rejectedFollowUpTasks);
266
+ for (const rejected of verifierResult.rejectedFollowUpTasks) {
267
+ warnings.push(`verification_follow_up_rejected:${rejected.taskId ?? "unknown"}:${rejected.reason}`);
268
+ }
269
+ if (verifierResult.errorCode) {
270
+ verificationStopReason = "verifier_failed";
271
+ const label = `${verifierResult.errorCode}:${verifierResult.errorMessage ?? "unknown"}`;
272
+ warnings.push("verification_stop:verifier_failed");
273
+ errors.push(label);
274
+ break;
275
+ }
276
+ if (verifierResult.record) {
277
+ verificationIterations.push(verifierResult.record);
278
+ }
279
+ if (verifierResult.record?.output.passed) {
280
+ verificationStopReason = "verifier_passed";
281
+ break;
282
+ }
283
+ if (verifierResult.acceptedFollowUpTasks.length === 0) {
284
+ verificationStopReason = this.resolveVerificationStopReason(verifierResult.rejectedFollowUpTasks);
285
+ warnings.push(`verification_stop:${verificationStopReason}`);
286
+ break;
287
+ }
288
+ const followUpInput = {
289
+ ...input,
290
+ planner: {
291
+ ...input.planner,
292
+ workerTasks: verifierResult.acceptedFollowUpTasks,
293
+ },
294
+ };
295
+ const preparedFollowUps = await this.prepareTasks(followUpInput, policyCompilation, warnings);
296
+ preparedTasks.push(...preparedFollowUps);
297
+ followUpTaskCount += preparedFollowUps.length;
298
+ warnings.push(`verification_follow_up_accepted:${preparedFollowUps
299
+ .map((task) => task.task.id)
300
+ .join(",")}`);
301
+ }
302
+ if (requiredFailure) {
303
+ if (this.options.verifierRunner && verificationStopReason === "not_started") {
304
+ verificationStopReason = "required_worker_failed";
305
+ }
306
+ while (cursor < preparedTasks.length) {
307
+ const task = preparedTasks[cursor];
308
+ cursor += 1;
309
+ const result = await this.skipTask(input.runId, task, "required_worker_failed");
310
+ taskResults.push(result);
311
+ warnings.push(`worker_task_skipped:${task.task.id}:required_worker_failed`);
312
+ }
313
+ }
314
+ const verification = this.buildVerificationLoopResult({
315
+ stopReason: verificationStopReason,
316
+ iterations: verificationIterations,
317
+ followUpTaskCount,
318
+ rejectedFollowUpTasks,
319
+ });
320
+ const taskOrder = new Map(preparedTasks.map((task, index) => [task.task.id, index]));
321
+ const orderedTaskResults = [...taskResults].sort((left, right) => (taskOrder.get(left.taskId) ?? Number.MAX_SAFE_INTEGER) -
322
+ (taskOrder.get(right.taskId) ?? Number.MAX_SAFE_INTEGER));
323
+ const finalStatus = this.resolveStatus(orderedTaskResults, verification);
324
+ await this.store.updateRun(input.runId, {
325
+ status: finalStatus,
326
+ warnings,
327
+ errors,
328
+ metadata: {
329
+ phase: "worker_task_executor",
330
+ workerTaskCount: preparedTasks.length,
331
+ completedWorkerTaskCount: orderedTaskResults.filter((result) => result.status !== "skipped").length,
332
+ toolCallCount,
333
+ modelCallCount,
334
+ calledTools: [...calledTools].sort(),
335
+ verification,
336
+ },
337
+ });
338
+ return {
339
+ runId: input.runId,
340
+ status: finalStatus,
341
+ taskResults: orderedTaskResults,
342
+ verification,
343
+ warnings,
344
+ errors,
345
+ toolCallCount,
346
+ calledTools: [...calledTools].sort(),
347
+ modelCallCount,
348
+ trace: await this.store.readRunTrace(input.runId),
349
+ };
350
+ }
351
+ async runVerifierIteration(args) {
352
+ const startedAtMs = this.now();
353
+ try {
354
+ const trace = await this.store.readRunTrace(args.input.runId);
355
+ const rawOutput = await this.options.verifierRunner?.verify({
356
+ runId: args.input.runId,
357
+ request: args.input.request,
358
+ planner: args.planner,
359
+ iteration: args.iteration,
360
+ evidence: trace?.evidence ?? [],
361
+ taskResults: args.taskResults,
362
+ remainingToolCalls: Math.max(0, args.maxToolCalls - args.toolCallCount),
363
+ policyCompilation: args.policyCompilation,
364
+ });
365
+ const validated = validateCodaliGatewayVerifierOutput(rawOutput);
366
+ if (!validated.ok) {
367
+ throw new Error(`GATEWAY_VERIFIER_OUTPUT_INVALID: ${validated.issues
368
+ .map((issue) => `${issue.path}:${issue.message}`)
369
+ .join("; ")}`);
370
+ }
371
+ const followUps = this.filterVerifierFollowUpTasks({
372
+ output: validated.value,
373
+ preparedTasks: args.preparedTasks,
374
+ policyCompilation: args.policyCompilation,
375
+ maxToolCalls: args.maxToolCalls,
376
+ toolCallCount: args.toolCallCount,
377
+ allowImageWorker: args.input.request.policy.allowImageWorker === true,
378
+ });
379
+ const record = {
380
+ iteration: args.iteration,
381
+ output: validated.value,
382
+ acceptedFollowUpTaskIds: followUps.accepted.map((task) => task.id),
383
+ rejectedFollowUpTasks: followUps.rejected,
384
+ };
385
+ await this.store.appendModelCall({
386
+ runId: args.input.runId,
387
+ role: "verifier",
388
+ status: "success",
389
+ latencyMs: Math.max(0, this.now() - startedAtMs),
390
+ input: {
391
+ iteration: args.iteration,
392
+ evidenceCount: trace?.evidence.length ?? 0,
393
+ taskResultCount: args.taskResults.length,
394
+ remainingToolCalls: Math.max(0, args.maxToolCalls - args.toolCallCount),
395
+ },
396
+ output: validated.value,
397
+ metadata: {
398
+ iteration: args.iteration,
399
+ acceptedFollowUpTaskIds: record.acceptedFollowUpTaskIds,
400
+ rejectedFollowUpTasks: followUps.rejected,
401
+ },
402
+ });
403
+ return {
404
+ record,
405
+ acceptedFollowUpTasks: followUps.accepted,
406
+ rejectedFollowUpTasks: followUps.rejected,
407
+ };
408
+ }
409
+ catch (error) {
410
+ const errorMessage = error instanceof Error ? error.message : String(error);
411
+ await this.store.appendModelCall({
412
+ runId: args.input.runId,
413
+ role: "verifier",
414
+ status: "failed",
415
+ latencyMs: Math.max(0, this.now() - startedAtMs),
416
+ errorCode: "GATEWAY_VERIFIER_FAILED",
417
+ errorMessage,
418
+ metadata: {
419
+ iteration: args.iteration,
420
+ },
421
+ });
422
+ return {
423
+ acceptedFollowUpTasks: [],
424
+ rejectedFollowUpTasks: [],
425
+ errorCode: "GATEWAY_VERIFIER_FAILED",
426
+ errorMessage,
427
+ };
428
+ }
429
+ }
430
+ filterVerifierFollowUpTasks(args) {
431
+ const existingTaskIds = new Set(args.preparedTasks.map((task) => task.task.id));
432
+ const effectiveAllowedTools = new Set(args.policyCompilation.effectiveAllowedTools);
433
+ const accepted = [];
434
+ const rejected = [];
435
+ for (const task of args.output.followUpTasks) {
436
+ if (existingTaskIds.has(task.id)) {
437
+ rejected.push({ taskId: task.id, reason: "duplicate_task_id" });
438
+ continue;
439
+ }
440
+ if (task.workerRole === "image_worker" &&
441
+ !args.allowImageWorker) {
442
+ rejected.push({ taskId: task.id, reason: "image_worker_disabled" });
443
+ continue;
444
+ }
445
+ const blockedTools = task.toolsAllowed.filter((tool) => !effectiveAllowedTools.has(tool));
446
+ if (blockedTools.length > 0) {
447
+ rejected.push({
448
+ taskId: task.id,
449
+ reason: "required_tool_unavailable",
450
+ tools: uniqueInOrder(blockedTools),
451
+ });
452
+ continue;
453
+ }
454
+ if (task.toolsAllowed.length > 0 && args.toolCallCount >= args.maxToolCalls) {
455
+ rejected.push({
456
+ taskId: task.id,
457
+ reason: "tool_budget_exhausted",
458
+ tools: uniqueInOrder(task.toolsAllowed),
459
+ });
460
+ continue;
461
+ }
462
+ existingTaskIds.add(task.id);
463
+ accepted.push({
464
+ ...task,
465
+ toolsAllowed: uniqueInOrder(task.toolsAllowed),
466
+ metadata: {
467
+ ...(task.metadata ?? {}),
468
+ verifierFollowUp: true,
469
+ },
470
+ });
471
+ }
472
+ return { accepted, rejected };
473
+ }
474
+ resolveVerificationStopReason(rejected) {
475
+ if (rejected.some((task) => task.reason === "tool_budget_exhausted")) {
476
+ return "tool_budget_exhausted";
477
+ }
478
+ if (rejected.some((task) => task.reason === "required_tool_unavailable")) {
479
+ return "required_tool_unavailable";
480
+ }
481
+ return "no_useful_followups";
482
+ }
483
+ buildVerificationLoopResult(args) {
484
+ if (args.stopReason === "not_configured") {
485
+ return undefined;
486
+ }
487
+ return {
488
+ passed: args.iterations.at(-1)?.output.passed ?? false,
489
+ stopReason: args.stopReason,
490
+ iterations: args.iterations,
491
+ missingInformation: uniqueInOrder(args.iterations.flatMap((iteration) => iteration.output.missingInformation)),
492
+ contradictions: args.iterations.flatMap((iteration) => iteration.output.contradictions),
493
+ issues: args.iterations.flatMap((iteration) => iteration.output.issues),
494
+ followUpTaskCount: args.followUpTaskCount,
495
+ rejectedFollowUpTasks: args.rejectedFollowUpTasks,
496
+ };
497
+ }
498
+ async prepareTasks(input, policyCompilation, warnings) {
499
+ const effectiveAllowedTools = new Set(policyCompilation.effectiveAllowedTools);
500
+ const prepared = [];
501
+ for (const task of input.planner.workerTasks) {
502
+ const allowedTools = uniqueInOrder(task.toolsAllowed.filter((tool) => effectiveAllowedTools.has(tool)));
503
+ const removedTools = uniqueInOrder(task.toolsAllowed.filter((tool) => !effectiveAllowedTools.has(tool)));
504
+ if (removedTools.length > 0) {
505
+ warnings.push(`worker_task_tools_removed:${task.id}:${removedTools.join(",")}`);
506
+ }
507
+ const required = isRequiredWorkerTask(task);
508
+ const sanitizedTask = { ...task, toolsAllowed: allowedTools };
509
+ await this.store.createTask({
510
+ id: task.id,
511
+ runId: input.runId,
512
+ status: "pending",
513
+ workerRole: task.workerRole,
514
+ objective: task.objective,
515
+ metadata: {
516
+ ...(task.metadata ?? {}),
517
+ required,
518
+ allowedTools,
519
+ removedTools,
520
+ },
521
+ });
522
+ prepared.push({ task: sanitizedTask, required, allowedTools, removedTools });
523
+ }
524
+ return prepared;
525
+ }
526
+ async runTask(args) {
527
+ const startedAtMs = this.now();
528
+ const prompt = buildCodaliGatewayWorkerPrompt({
529
+ request: args.input.request,
530
+ task: args.task.task,
531
+ allowedTools: args.task.allowedTools,
532
+ remainingToolCalls: args.remainingToolCalls,
533
+ remainingModelCalls: args.remainingModelCalls,
534
+ });
535
+ await this.store.updateTask(args.input.runId, args.task.task.id, {
536
+ status: "running",
537
+ metadata: {
538
+ ...(args.task.task.metadata ?? {}),
539
+ required: args.task.required,
540
+ allowedTools: args.task.allowedTools,
541
+ removedTools: args.task.removedTools,
542
+ },
543
+ });
544
+ try {
545
+ const workerResult = await withTimeout(this.options.taskRunner.run({
546
+ runId: args.input.runId,
547
+ task: args.task.task,
548
+ prompt,
549
+ allowedTools: args.task.allowedTools,
550
+ remainingToolCalls: args.remainingToolCalls,
551
+ remainingModelCalls: args.remainingModelCalls,
552
+ timeoutMs: args.perTaskTimeoutMs,
553
+ request: args.input.request,
554
+ policyCompilation: args.policyCompilation,
555
+ }), args.perTaskTimeoutMs, args.task.task.id);
556
+ return await this.persistWorkerResult(args, workerResult, startedAtMs);
557
+ }
558
+ catch (error) {
559
+ const errorCode = error instanceof Error && error.name === "CodaliGatewayWorkerTimeoutError"
560
+ ? "GATEWAY_WORKER_TIMEOUT"
561
+ : "GATEWAY_WORKER_FAILED";
562
+ const errorMessage = error instanceof Error ? error.message : String(error);
563
+ const result = {
564
+ taskId: args.task.task.id,
565
+ workerRole: args.task.task.workerRole,
566
+ status: "failed",
567
+ required: args.task.required,
568
+ allowedTools: args.task.allowedTools,
569
+ removedTools: args.task.removedTools,
570
+ durationMs: Math.max(0, this.now() - startedAtMs),
571
+ evidenceCount: 0,
572
+ toolCallCount: 0,
573
+ calledTools: [],
574
+ modelCallCount: 0,
575
+ errorCode,
576
+ errorMessage,
577
+ };
578
+ await this.store.updateTask(args.input.runId, args.task.task.id, {
579
+ status: "failed",
580
+ metadata: {
581
+ ...(args.task.task.metadata ?? {}),
582
+ required: args.task.required,
583
+ allowedTools: args.task.allowedTools,
584
+ errorCode,
585
+ errorMessage,
586
+ },
587
+ });
588
+ return result;
589
+ }
590
+ }
591
+ async persistWorkerResult(args, workerResult, startedAtMs) {
592
+ const allowed = new Set(args.task.allowedTools);
593
+ const disallowedToolCalls = (workerResult.toolCalls ?? [])
594
+ .filter((call) => !allowed.has(call.tool));
595
+ const toolCalls = (workerResult.toolCalls ?? []).map((call) => {
596
+ if (allowed.has(call.tool)) {
597
+ return call;
598
+ }
599
+ return {
600
+ ...call,
601
+ status: "blocked",
602
+ errorCode: call.errorCode ?? "GATEWAY_TOOL_NOT_APPROVED",
603
+ errorMessage: call.errorMessage ?? "Worker attempted a tool outside its approved set.",
604
+ };
605
+ });
606
+ const toolBudgetExceeded = toolCalls.length > args.remainingToolCalls;
607
+ const modelCallCount = Math.max(1, workerResult.modelCalls?.length ?? 0);
608
+ const modelBudgetExceeded = modelCallCount > args.remainingModelCalls;
609
+ const traceBeforePersistence = await this.store.readRunTrace(args.input.runId);
610
+ const remainingEvidenceItems = Math.max(0, args.policyCompilation.security.limits.maxEvidenceItems -
611
+ (traceBeforePersistence?.evidence.length ?? 0));
612
+ const remainingImageArtifactsInitial = Math.max(0, args.policyCompilation.security.limits.maxImageArtifacts -
613
+ (traceBeforePersistence?.artifacts.filter(isImageArtifact).length ?? 0));
614
+ let remainingImageArtifacts = remainingImageArtifactsInitial;
615
+ let blockedImageArtifactCount = 0;
616
+ const tenantScoped = requestHasTenantScope(args.input.request);
617
+ const normalizedEvidence = normalizeCodaliEvidence({
618
+ runId: args.input.runId,
619
+ taskId: args.task.task.id,
620
+ originalQuery: args.input.request.query,
621
+ evidence: workerResult.evidence,
622
+ workerOutput: workerResult.output,
623
+ toolCalls,
624
+ requireTenantScope: tenantScoped,
625
+ defaultTenantScoped: tenantScoped,
626
+ maxEvidenceItems: remainingEvidenceItems,
627
+ });
628
+ const evidence = normalizedEvidence.evidence;
629
+ if (evidence.length > 0) {
630
+ await this.store.appendEvidence(args.input.runId, evidence);
631
+ }
632
+ for (const call of toolCalls) {
633
+ await this.store.appendToolCall({
634
+ runId: args.input.runId,
635
+ taskId: args.task.task.id,
636
+ tool: call.tool,
637
+ status: call.status,
638
+ latencyMs: call.latencyMs,
639
+ args: call.args,
640
+ result: call.result,
641
+ errorCode: call.errorCode,
642
+ errorMessage: call.errorMessage,
643
+ metadata: call.metadata,
644
+ });
645
+ }
646
+ for (const call of workerResult.modelCalls ?? []) {
647
+ await this.store.appendModelCall({
648
+ runId: args.input.runId,
649
+ taskId: args.task.task.id,
650
+ role: call.role,
651
+ status: call.status,
652
+ latencyMs: call.latencyMs,
653
+ agentSlug: call.agentSlug,
654
+ model: call.model,
655
+ provider: call.provider,
656
+ input: call.input,
657
+ output: call.output,
658
+ errorCode: call.errorCode,
659
+ errorMessage: call.errorMessage,
660
+ metadata: call.metadata,
661
+ });
662
+ }
663
+ for (const artifact of workerResult.artifacts ?? []) {
664
+ if (isImageArtifact(artifact) && remainingImageArtifacts <= 0) {
665
+ blockedImageArtifactCount += 1;
666
+ continue;
667
+ }
668
+ if (isImageArtifact(artifact)) {
669
+ remainingImageArtifacts -= 1;
670
+ }
671
+ await this.store.saveArtifact({
672
+ ...artifact,
673
+ runId: args.input.runId,
674
+ taskId: artifact.taskId ?? args.task.task.id,
675
+ });
676
+ }
677
+ const imageArtifactBudgetExceeded = blockedImageArtifactCount > 0;
678
+ const status = workerResult.status === "failed" ||
679
+ disallowedToolCalls.length > 0 ||
680
+ toolBudgetExceeded ||
681
+ modelBudgetExceeded ||
682
+ imageArtifactBudgetExceeded
683
+ ? "failed"
684
+ : "succeeded";
685
+ const errorCode = disallowedToolCalls.length > 0
686
+ ? "GATEWAY_TOOL_NOT_APPROVED"
687
+ : toolBudgetExceeded
688
+ ? "GATEWAY_TOOL_BUDGET_EXCEEDED"
689
+ : modelBudgetExceeded
690
+ ? "GATEWAY_MODEL_BUDGET_EXCEEDED"
691
+ : imageArtifactBudgetExceeded
692
+ ? "GATEWAY_IMAGE_ARTIFACT_BUDGET_EXCEEDED"
693
+ : workerResult.errorCode;
694
+ const errorMessage = disallowedToolCalls.length > 0
695
+ ? `Worker attempted disallowed tools: ${disallowedToolCalls
696
+ .map((call) => call.tool)
697
+ .join(", ")}`
698
+ : toolBudgetExceeded
699
+ ? "Worker reported more tool calls than the remaining gateway budget."
700
+ : modelBudgetExceeded
701
+ ? "Worker reported more model calls than the remaining gateway budget."
702
+ : imageArtifactBudgetExceeded
703
+ ? "Worker produced more image artifacts than the remaining gateway budget."
704
+ : workerResult.errorMessage;
705
+ await this.store.updateTask(args.input.runId, args.task.task.id, {
706
+ status,
707
+ metadata: {
708
+ ...(args.task.task.metadata ?? {}),
709
+ required: args.task.required,
710
+ allowedTools: args.task.allowedTools,
711
+ removedTools: args.task.removedTools,
712
+ errorCode,
713
+ output: workerResult.output,
714
+ workerMetadata: workerResult.metadata,
715
+ evidenceNormalization: {
716
+ warnings: normalizedEvidence.warnings,
717
+ rejectedCount: normalizedEvidence.rejected.length,
718
+ duplicateCount: normalizedEvidence.duplicateCount,
719
+ remainingEvidenceItems,
720
+ },
721
+ budgetEnforcement: {
722
+ remainingToolCalls: args.remainingToolCalls,
723
+ remainingModelCalls: args.remainingModelCalls,
724
+ modelCallCount,
725
+ toolBudgetExceeded,
726
+ modelBudgetExceeded,
727
+ maxImageArtifacts: args.policyCompilation.security.limits.maxImageArtifacts,
728
+ remainingImageArtifacts: remainingImageArtifactsInitial,
729
+ blockedImageArtifactCount,
730
+ },
731
+ },
732
+ });
733
+ return {
734
+ taskId: args.task.task.id,
735
+ workerRole: args.task.task.workerRole,
736
+ status,
737
+ required: args.task.required,
738
+ allowedTools: args.task.allowedTools,
739
+ removedTools: args.task.removedTools,
740
+ durationMs: Math.max(0, this.now() - startedAtMs),
741
+ evidenceCount: evidence.length,
742
+ toolCallCount: toolCalls.length,
743
+ calledTools: uniqueInOrder(toolCalls.map((call) => call.tool)),
744
+ modelCallCount,
745
+ output: workerResult.output,
746
+ errorCode,
747
+ errorMessage,
748
+ metadata: workerResult.metadata,
749
+ };
750
+ }
751
+ async skipTask(runId, task, reason) {
752
+ await this.store.updateTask(runId, task.task.id, {
753
+ status: "skipped",
754
+ metadata: {
755
+ ...(task.task.metadata ?? {}),
756
+ required: task.required,
757
+ allowedTools: task.allowedTools,
758
+ removedTools: task.removedTools,
759
+ skippedReason: reason,
760
+ },
761
+ });
762
+ return {
763
+ taskId: task.task.id,
764
+ workerRole: task.task.workerRole,
765
+ status: "skipped",
766
+ required: task.required,
767
+ allowedTools: task.allowedTools,
768
+ removedTools: task.removedTools,
769
+ durationMs: 0,
770
+ evidenceCount: 0,
771
+ toolCallCount: 0,
772
+ calledTools: [],
773
+ modelCallCount: 0,
774
+ skippedReason: reason,
775
+ };
776
+ }
777
+ resolveStatus(taskResults, verification) {
778
+ if (taskResults.some((result) => result.status === "failed" && result.required)) {
779
+ return "failed";
780
+ }
781
+ if (verification && !verification.passed) {
782
+ return "partial";
783
+ }
784
+ if (taskResults.some((result) => result.status === "failed" || result.status === "skipped")) {
785
+ return "partial";
786
+ }
787
+ return "succeeded";
788
+ }
789
+ }
790
+ export const createCodaliGatewayStateMachine = (options) => new CodaliGatewayStateMachine(options);