@mcoda/codali 0.1.87 → 0.1.89
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/EvalCommand.d.ts +8 -0
- package/dist/cli/EvalCommand.d.ts.map +1 -1
- package/dist/cli/EvalCommand.js +93 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -0
- package/dist/docdex/DocdexClient.d.ts +8 -1
- package/dist/docdex/DocdexClient.d.ts.map +1 -1
- package/dist/docdex/DocdexClient.js +126 -33
- package/dist/eval/CodaliGatewayLiveHarness.d.ts +169 -0
- package/dist/eval/CodaliGatewayLiveHarness.d.ts.map +1 -0
- package/dist/eval/CodaliGatewayLiveHarness.js +824 -0
- package/dist/eval/GatewayEvalSuite.d.ts +202 -0
- package/dist/eval/GatewayEvalSuite.d.ts.map +1 -0
- package/dist/eval/GatewayEvalSuite.js +673 -0
- package/dist/gateway/AgentTierResolver.d.ts +74 -0
- package/dist/gateway/AgentTierResolver.d.ts.map +1 -0
- package/dist/gateway/AgentTierResolver.js +576 -0
- package/dist/gateway/AppToolGatewayDispatcher.d.ts +88 -0
- package/dist/gateway/AppToolGatewayDispatcher.d.ts.map +1 -0
- package/dist/gateway/AppToolGatewayDispatcher.js +381 -0
- package/dist/gateway/CodaliGateway.d.ts +73 -0
- package/dist/gateway/CodaliGateway.d.ts.map +1 -0
- package/dist/gateway/CodaliGateway.js +824 -0
- package/dist/gateway/CodaliGatewaySchemas.d.ts +21 -0
- package/dist/gateway/CodaliGatewaySchemas.d.ts.map +1 -0
- package/dist/gateway/CodaliGatewaySchemas.js +874 -0
- package/dist/gateway/CodaliGatewayStore.d.ts +157 -0
- package/dist/gateway/CodaliGatewayStore.d.ts.map +1 -0
- package/dist/gateway/CodaliGatewayStore.js +206 -0
- package/dist/gateway/CodaliGatewayTypes.d.ts +336 -0
- package/dist/gateway/CodaliGatewayTypes.d.ts.map +1 -0
- package/dist/gateway/CodaliGatewayTypes.js +1 -0
- package/dist/gateway/ContextPackBuilder.d.ts +43 -0
- package/dist/gateway/ContextPackBuilder.d.ts.map +1 -0
- package/dist/gateway/ContextPackBuilder.js +317 -0
- package/dist/gateway/EvidenceNormalizer.d.ts +42 -0
- package/dist/gateway/EvidenceNormalizer.d.ts.map +1 -0
- package/dist/gateway/EvidenceNormalizer.js +488 -0
- package/dist/gateway/GatewayPlanner.d.ts +195 -0
- package/dist/gateway/GatewayPlanner.d.ts.map +1 -0
- package/dist/gateway/GatewayPlanner.js +379 -0
- package/dist/gateway/GatewayPolicyCompiler.d.ts +30 -0
- package/dist/gateway/GatewayPolicyCompiler.d.ts.map +1 -0
- package/dist/gateway/GatewayPolicyCompiler.js +114 -0
- package/dist/gateway/GatewaySecurityPolicy.d.ts +14 -0
- package/dist/gateway/GatewaySecurityPolicy.d.ts.map +1 -0
- package/dist/gateway/GatewaySecurityPolicy.js +350 -0
- package/dist/gateway/GatewayStateMachine.d.ts +165 -0
- package/dist/gateway/GatewayStateMachine.d.ts.map +1 -0
- package/dist/gateway/GatewayStateMachine.js +790 -0
- package/dist/gateway/GatewayTraceReplay.d.ts +120 -0
- package/dist/gateway/GatewayTraceReplay.d.ts.map +1 -0
- package/dist/gateway/GatewayTraceReplay.js +273 -0
- package/dist/gateway/ToolCapabilityCompiler.d.ts +50 -0
- package/dist/gateway/ToolCapabilityCompiler.d.ts.map +1 -0
- package/dist/gateway/ToolCapabilityCompiler.js +442 -0
- package/dist/index.d.ts +33 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +16 -0
- package/dist/runtime/CodaliJobRuntime.d.ts +211 -0
- package/dist/runtime/CodaliJobRuntime.d.ts.map +1 -0
- package/dist/runtime/CodaliJobRuntime.js +590 -0
- package/dist/runtime/CodaliRuntime.d.ts +81 -1
- package/dist/runtime/CodaliRuntime.d.ts.map +1 -1
- package/dist/runtime/CodaliRuntime.js +619 -4
- package/dist/tools/ToolRegistry.d.ts.map +1 -1
- package/dist/tools/ToolRegistry.js +4 -0
- package/dist/tools/ToolTypes.d.ts +1 -1
- package/dist/tools/ToolTypes.d.ts.map +1 -1
- package/dist/tools/ToolTypes.js +5 -1
- package/package.json +3 -3
|
@@ -0,0 +1,790 @@
|
|
|
1
|
+
import { createInMemoryCodaliGatewayStore, } from "./CodaliGatewayStore.js";
|
|
2
|
+
import { compileCodaliGatewayPolicy, } from "./GatewayPolicyCompiler.js";
|
|
3
|
+
import { normalizeCodaliEvidence } from "./EvidenceNormalizer.js";
|
|
4
|
+
import { validateCodaliGatewayVerifierOutput } from "./CodaliGatewaySchemas.js";
|
|
5
|
+
import { CODALI_GATEWAY_SECURITY_PROMPT_HARDENING } from "./GatewaySecurityPolicy.js";
|
|
6
|
+
const DEFAULT_PER_TASK_TIMEOUT_MS = 30000;
|
|
7
|
+
const positiveInteger = (value, fallback) => Number.isFinite(value) && value !== undefined && value > 0
|
|
8
|
+
? Math.floor(value)
|
|
9
|
+
: fallback;
|
|
10
|
+
const nonNegativeInteger = (value, fallback) => Number.isFinite(value) && value !== undefined && value >= 0
|
|
11
|
+
? Math.floor(value)
|
|
12
|
+
: fallback;
|
|
13
|
+
const isRecord = (value) => Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
14
|
+
const readBoolean = (metadata, key) => isRecord(metadata) && typeof metadata[key] === "boolean"
|
|
15
|
+
? metadata[key]
|
|
16
|
+
: undefined;
|
|
17
|
+
const isRequiredWorkerTask = (task) => {
|
|
18
|
+
if (readBoolean(task.metadata, "required") === false)
|
|
19
|
+
return false;
|
|
20
|
+
if (readBoolean(task.metadata, "optional") === true)
|
|
21
|
+
return false;
|
|
22
|
+
return true;
|
|
23
|
+
};
|
|
24
|
+
const uniqueInOrder = (values) => {
|
|
25
|
+
const seen = new Set();
|
|
26
|
+
const output = [];
|
|
27
|
+
for (const value of values) {
|
|
28
|
+
if (seen.has(value))
|
|
29
|
+
continue;
|
|
30
|
+
seen.add(value);
|
|
31
|
+
output.push(value);
|
|
32
|
+
}
|
|
33
|
+
return output;
|
|
34
|
+
};
|
|
35
|
+
const requestHasTenantScope = (request) => Boolean(request.tenant?.id || request.tenant?.slug || request.tenant?.realm);
|
|
36
|
+
const isImageArtifact = (artifact) => {
|
|
37
|
+
const type = artifact.type.toLowerCase();
|
|
38
|
+
if (type === "image" || type.startsWith("image/") || type.includes("image")) {
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
const metadata = isRecord(artifact.metadata) ? artifact.metadata : undefined;
|
|
42
|
+
const mimeType = typeof metadata?.mimeType === "string"
|
|
43
|
+
? metadata.mimeType
|
|
44
|
+
: typeof metadata?.mime_type === "string"
|
|
45
|
+
? metadata.mime_type
|
|
46
|
+
: undefined;
|
|
47
|
+
return Boolean(mimeType?.toLowerCase().startsWith("image/"));
|
|
48
|
+
};
|
|
49
|
+
export const buildCodaliGatewayWorkerPrompt = (input) => [
|
|
50
|
+
"You are a Codali gateway worker.",
|
|
51
|
+
"Gather evidence only.",
|
|
52
|
+
"Do not answer the user.",
|
|
53
|
+
"Output JSON only.",
|
|
54
|
+
CODALI_GATEWAY_SECURITY_PROMPT_HARDENING.toolOutputBoundary,
|
|
55
|
+
CODALI_GATEWAY_SECURITY_PROMPT_HARDENING.policyImmutability,
|
|
56
|
+
CODALI_GATEWAY_SECURITY_PROMPT_HARDENING.tenantScope,
|
|
57
|
+
"Return structured evidence, source references, tool telemetry, and any errors.",
|
|
58
|
+
`User query: ${input.request.query}`,
|
|
59
|
+
`Task id: ${input.task.id}`,
|
|
60
|
+
`Worker role: ${input.task.workerRole}`,
|
|
61
|
+
`Objective: ${input.task.objective}`,
|
|
62
|
+
`Task query: ${input.task.query ?? input.request.query}`,
|
|
63
|
+
`Output format: ${input.task.outputFormat}`,
|
|
64
|
+
`Allowed tools: ${input.allowedTools.length > 0 ? input.allowedTools.join(", ") : "none"}`,
|
|
65
|
+
`Remaining tool calls: ${input.remainingToolCalls}`,
|
|
66
|
+
`Remaining model calls: ${input.remainingModelCalls}`,
|
|
67
|
+
input.task.expectedSources?.length
|
|
68
|
+
? `Expected sources: ${input.task.expectedSources.join(", ")}`
|
|
69
|
+
: "Expected sources: none specified",
|
|
70
|
+
input.task.constraints?.length
|
|
71
|
+
? `Constraints: ${input.task.constraints.join("; ")}`
|
|
72
|
+
: "Constraints: none specified",
|
|
73
|
+
].join("\n");
|
|
74
|
+
const timeoutError = (taskId, timeoutMs) => {
|
|
75
|
+
const error = new Error(`GATEWAY_WORKER_TIMEOUT: ${taskId} exceeded ${timeoutMs}ms`);
|
|
76
|
+
error.name = "CodaliGatewayWorkerTimeoutError";
|
|
77
|
+
return error;
|
|
78
|
+
};
|
|
79
|
+
const withTimeout = async (promise, timeoutMs, taskId) => new Promise((resolve, reject) => {
|
|
80
|
+
const timer = setTimeout(() => reject(timeoutError(taskId, timeoutMs)), timeoutMs);
|
|
81
|
+
promise.then((value) => {
|
|
82
|
+
clearTimeout(timer);
|
|
83
|
+
resolve(value);
|
|
84
|
+
}, (error) => {
|
|
85
|
+
clearTimeout(timer);
|
|
86
|
+
reject(error);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
export class CodaliGatewayStateMachine {
|
|
90
|
+
constructor(options) {
|
|
91
|
+
this.options = options;
|
|
92
|
+
this.store = options.store ?? createInMemoryCodaliGatewayStore();
|
|
93
|
+
this.now = options.now ?? (() => Date.now());
|
|
94
|
+
}
|
|
95
|
+
async execute(input) {
|
|
96
|
+
const policyCompilation = input.policyCompilation ?? compileCodaliGatewayPolicy({ request: input.request });
|
|
97
|
+
if (!policyCompilation.ok) {
|
|
98
|
+
throw new Error("GATEWAY_POLICY_COMPILE_FAILED: Cannot execute worker tasks.");
|
|
99
|
+
}
|
|
100
|
+
const maxParallelWorkers = positiveInteger(this.options.maxParallelWorkers, policyCompilation.jobBudgets.maxParallelStages ?? 1);
|
|
101
|
+
const maxRuntimeMs = positiveInteger(this.options.maxRuntimeMs, policyCompilation.security.limits.maxRuntimeMs);
|
|
102
|
+
const perTaskTimeoutMs = Math.min(positiveInteger(this.options.perTaskTimeoutMs, DEFAULT_PER_TASK_TIMEOUT_MS), maxRuntimeMs);
|
|
103
|
+
const maxToolCalls = nonNegativeInteger(this.options.maxToolCalls, policyCompilation.security.limits.maxToolCalls);
|
|
104
|
+
const maxModelCalls = nonNegativeInteger(this.options.maxModelCalls, policyCompilation.security.limits.maxModelCalls);
|
|
105
|
+
const maxEvidenceItems = nonNegativeInteger(undefined, policyCompilation.security.limits.maxEvidenceItems);
|
|
106
|
+
const maxImageArtifacts = nonNegativeInteger(this.options.maxImageArtifacts, policyCompilation.security.limits.maxImageArtifacts);
|
|
107
|
+
const maxVerificationIterations = nonNegativeInteger(input.request.policy.maxIterations, 1);
|
|
108
|
+
const startedAtMs = this.now();
|
|
109
|
+
const warnings = [];
|
|
110
|
+
const errors = [];
|
|
111
|
+
const taskResults = [];
|
|
112
|
+
const preparedTasks = await this.prepareTasks(input, policyCompilation, warnings);
|
|
113
|
+
const verificationIterations = [];
|
|
114
|
+
const rejectedFollowUpTasks = [];
|
|
115
|
+
const calledTools = new Set();
|
|
116
|
+
const initialTrace = await this.store.readRunTrace(input.runId);
|
|
117
|
+
let toolCallCount = 0;
|
|
118
|
+
let modelCallCount = initialTrace?.modelCalls.length ?? 0;
|
|
119
|
+
let cursor = 0;
|
|
120
|
+
let requiredFailure = false;
|
|
121
|
+
let followUpTaskCount = 0;
|
|
122
|
+
let verificationStopReason = this.options.verifierRunner
|
|
123
|
+
? "not_started"
|
|
124
|
+
: "not_configured";
|
|
125
|
+
await this.store.updateRun(input.runId, {
|
|
126
|
+
status: "running",
|
|
127
|
+
metadata: {
|
|
128
|
+
phase: "worker_task_executor",
|
|
129
|
+
workerTaskCount: preparedTasks.length,
|
|
130
|
+
maxParallelWorkers,
|
|
131
|
+
maxRuntimeMs,
|
|
132
|
+
perTaskTimeoutMs,
|
|
133
|
+
maxToolCalls,
|
|
134
|
+
maxModelCalls,
|
|
135
|
+
maxEvidenceItems,
|
|
136
|
+
maxImageArtifacts,
|
|
137
|
+
initialModelCallCount: modelCallCount,
|
|
138
|
+
verifierEnabled: Boolean(this.options.verifierRunner),
|
|
139
|
+
maxVerificationIterations,
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
while (!requiredFailure) {
|
|
143
|
+
while (cursor < preparedTasks.length && !requiredFailure) {
|
|
144
|
+
const wave = [];
|
|
145
|
+
let waveToolTaskReservations = 0;
|
|
146
|
+
let waveModelTaskReservations = 0;
|
|
147
|
+
while (cursor < preparedTasks.length && wave.length < maxParallelWorkers) {
|
|
148
|
+
const task = preparedTasks[cursor];
|
|
149
|
+
cursor += 1;
|
|
150
|
+
if (this.now() - startedAtMs >= maxRuntimeMs) {
|
|
151
|
+
const result = await this.skipTask(input.runId, task, "max_runtime_exhausted");
|
|
152
|
+
taskResults.push(result);
|
|
153
|
+
warnings.push(`worker_task_skipped:${task.task.id}:max_runtime_exhausted`);
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (modelCallCount + waveModelTaskReservations >= maxModelCalls) {
|
|
157
|
+
const result = await this.skipTask(input.runId, task, "model_budget_exhausted");
|
|
158
|
+
taskResults.push(result);
|
|
159
|
+
warnings.push(`worker_task_skipped:${task.task.id}:model_budget_exhausted`);
|
|
160
|
+
continue;
|
|
161
|
+
}
|
|
162
|
+
const needsToolBudget = task.allowedTools.length > 0;
|
|
163
|
+
if (needsToolBudget && toolCallCount + waveToolTaskReservations >= maxToolCalls) {
|
|
164
|
+
const result = await this.skipTask(input.runId, task, "tool_budget_exhausted");
|
|
165
|
+
taskResults.push(result);
|
|
166
|
+
warnings.push(`worker_task_skipped:${task.task.id}:tool_budget_exhausted`);
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
if (needsToolBudget) {
|
|
170
|
+
waveToolTaskReservations += 1;
|
|
171
|
+
}
|
|
172
|
+
waveModelTaskReservations += 1;
|
|
173
|
+
wave.push(task);
|
|
174
|
+
}
|
|
175
|
+
if (wave.length === 0) {
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
const waveResults = await Promise.all(wave.map((task) => this.runTask({
|
|
179
|
+
input,
|
|
180
|
+
policyCompilation,
|
|
181
|
+
task,
|
|
182
|
+
remainingToolCalls: Math.max(0, maxToolCalls - toolCallCount),
|
|
183
|
+
remainingModelCalls: Math.max(0, maxModelCalls - modelCallCount),
|
|
184
|
+
perTaskTimeoutMs,
|
|
185
|
+
})));
|
|
186
|
+
for (const result of waveResults) {
|
|
187
|
+
taskResults.push(result);
|
|
188
|
+
toolCallCount += result.toolCallCount;
|
|
189
|
+
modelCallCount += result.modelCallCount;
|
|
190
|
+
for (const tool of result.calledTools) {
|
|
191
|
+
if (result.toolCallCount > 0) {
|
|
192
|
+
calledTools.add(tool);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
if (result.status === "failed") {
|
|
196
|
+
const code = result.errorCode ?? "GATEWAY_WORKER_FAILED";
|
|
197
|
+
const label = `${result.taskId}:${code}`;
|
|
198
|
+
if (result.required) {
|
|
199
|
+
errors.push(label);
|
|
200
|
+
requiredFailure = true;
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
warnings.push(`optional_worker_failed:${label}`);
|
|
204
|
+
errors.push(`optional:${label}`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (toolCallCount >= maxToolCalls) {
|
|
209
|
+
while (cursor < preparedTasks.length) {
|
|
210
|
+
const task = preparedTasks[cursor];
|
|
211
|
+
cursor += 1;
|
|
212
|
+
if (task.allowedTools.length === 0) {
|
|
213
|
+
taskResults.push(await this.runTask({
|
|
214
|
+
input,
|
|
215
|
+
policyCompilation,
|
|
216
|
+
task,
|
|
217
|
+
remainingToolCalls: 0,
|
|
218
|
+
remainingModelCalls: Math.max(0, maxModelCalls - modelCallCount),
|
|
219
|
+
perTaskTimeoutMs,
|
|
220
|
+
}));
|
|
221
|
+
continue;
|
|
222
|
+
}
|
|
223
|
+
const result = await this.skipTask(input.runId, task, "tool_budget_exhausted");
|
|
224
|
+
taskResults.push(result);
|
|
225
|
+
warnings.push(`worker_task_skipped:${task.task.id}:tool_budget_exhausted`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
if (modelCallCount >= maxModelCalls) {
|
|
229
|
+
while (cursor < preparedTasks.length) {
|
|
230
|
+
const task = preparedTasks[cursor];
|
|
231
|
+
cursor += 1;
|
|
232
|
+
const result = await this.skipTask(input.runId, task, "model_budget_exhausted");
|
|
233
|
+
taskResults.push(result);
|
|
234
|
+
warnings.push(`worker_task_skipped:${task.task.id}:model_budget_exhausted`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
if (requiredFailure || !this.options.verifierRunner) {
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
241
|
+
if (verificationIterations.length >= maxVerificationIterations) {
|
|
242
|
+
verificationStopReason = "max_iterations_reached";
|
|
243
|
+
warnings.push("verification_stop:max_iterations_reached");
|
|
244
|
+
break;
|
|
245
|
+
}
|
|
246
|
+
if (modelCallCount >= maxModelCalls) {
|
|
247
|
+
verificationStopReason = "model_budget_exhausted";
|
|
248
|
+
warnings.push("verification_stop:model_budget_exhausted");
|
|
249
|
+
break;
|
|
250
|
+
}
|
|
251
|
+
const verifierResult = await this.runVerifierIteration({
|
|
252
|
+
input,
|
|
253
|
+
planner: {
|
|
254
|
+
...input.planner,
|
|
255
|
+
workerTasks: preparedTasks.map((task) => task.task),
|
|
256
|
+
},
|
|
257
|
+
policyCompilation,
|
|
258
|
+
iteration: verificationIterations.length + 1,
|
|
259
|
+
taskResults,
|
|
260
|
+
preparedTasks,
|
|
261
|
+
maxToolCalls,
|
|
262
|
+
toolCallCount,
|
|
263
|
+
});
|
|
264
|
+
modelCallCount += 1;
|
|
265
|
+
rejectedFollowUpTasks.push(...verifierResult.rejectedFollowUpTasks);
|
|
266
|
+
for (const rejected of verifierResult.rejectedFollowUpTasks) {
|
|
267
|
+
warnings.push(`verification_follow_up_rejected:${rejected.taskId ?? "unknown"}:${rejected.reason}`);
|
|
268
|
+
}
|
|
269
|
+
if (verifierResult.errorCode) {
|
|
270
|
+
verificationStopReason = "verifier_failed";
|
|
271
|
+
const label = `${verifierResult.errorCode}:${verifierResult.errorMessage ?? "unknown"}`;
|
|
272
|
+
warnings.push("verification_stop:verifier_failed");
|
|
273
|
+
errors.push(label);
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
if (verifierResult.record) {
|
|
277
|
+
verificationIterations.push(verifierResult.record);
|
|
278
|
+
}
|
|
279
|
+
if (verifierResult.record?.output.passed) {
|
|
280
|
+
verificationStopReason = "verifier_passed";
|
|
281
|
+
break;
|
|
282
|
+
}
|
|
283
|
+
if (verifierResult.acceptedFollowUpTasks.length === 0) {
|
|
284
|
+
verificationStopReason = this.resolveVerificationStopReason(verifierResult.rejectedFollowUpTasks);
|
|
285
|
+
warnings.push(`verification_stop:${verificationStopReason}`);
|
|
286
|
+
break;
|
|
287
|
+
}
|
|
288
|
+
const followUpInput = {
|
|
289
|
+
...input,
|
|
290
|
+
planner: {
|
|
291
|
+
...input.planner,
|
|
292
|
+
workerTasks: verifierResult.acceptedFollowUpTasks,
|
|
293
|
+
},
|
|
294
|
+
};
|
|
295
|
+
const preparedFollowUps = await this.prepareTasks(followUpInput, policyCompilation, warnings);
|
|
296
|
+
preparedTasks.push(...preparedFollowUps);
|
|
297
|
+
followUpTaskCount += preparedFollowUps.length;
|
|
298
|
+
warnings.push(`verification_follow_up_accepted:${preparedFollowUps
|
|
299
|
+
.map((task) => task.task.id)
|
|
300
|
+
.join(",")}`);
|
|
301
|
+
}
|
|
302
|
+
if (requiredFailure) {
|
|
303
|
+
if (this.options.verifierRunner && verificationStopReason === "not_started") {
|
|
304
|
+
verificationStopReason = "required_worker_failed";
|
|
305
|
+
}
|
|
306
|
+
while (cursor < preparedTasks.length) {
|
|
307
|
+
const task = preparedTasks[cursor];
|
|
308
|
+
cursor += 1;
|
|
309
|
+
const result = await this.skipTask(input.runId, task, "required_worker_failed");
|
|
310
|
+
taskResults.push(result);
|
|
311
|
+
warnings.push(`worker_task_skipped:${task.task.id}:required_worker_failed`);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
const verification = this.buildVerificationLoopResult({
|
|
315
|
+
stopReason: verificationStopReason,
|
|
316
|
+
iterations: verificationIterations,
|
|
317
|
+
followUpTaskCount,
|
|
318
|
+
rejectedFollowUpTasks,
|
|
319
|
+
});
|
|
320
|
+
const taskOrder = new Map(preparedTasks.map((task, index) => [task.task.id, index]));
|
|
321
|
+
const orderedTaskResults = [...taskResults].sort((left, right) => (taskOrder.get(left.taskId) ?? Number.MAX_SAFE_INTEGER) -
|
|
322
|
+
(taskOrder.get(right.taskId) ?? Number.MAX_SAFE_INTEGER));
|
|
323
|
+
const finalStatus = this.resolveStatus(orderedTaskResults, verification);
|
|
324
|
+
await this.store.updateRun(input.runId, {
|
|
325
|
+
status: finalStatus,
|
|
326
|
+
warnings,
|
|
327
|
+
errors,
|
|
328
|
+
metadata: {
|
|
329
|
+
phase: "worker_task_executor",
|
|
330
|
+
workerTaskCount: preparedTasks.length,
|
|
331
|
+
completedWorkerTaskCount: orderedTaskResults.filter((result) => result.status !== "skipped").length,
|
|
332
|
+
toolCallCount,
|
|
333
|
+
modelCallCount,
|
|
334
|
+
calledTools: [...calledTools].sort(),
|
|
335
|
+
verification,
|
|
336
|
+
},
|
|
337
|
+
});
|
|
338
|
+
return {
|
|
339
|
+
runId: input.runId,
|
|
340
|
+
status: finalStatus,
|
|
341
|
+
taskResults: orderedTaskResults,
|
|
342
|
+
verification,
|
|
343
|
+
warnings,
|
|
344
|
+
errors,
|
|
345
|
+
toolCallCount,
|
|
346
|
+
calledTools: [...calledTools].sort(),
|
|
347
|
+
modelCallCount,
|
|
348
|
+
trace: await this.store.readRunTrace(input.runId),
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
async runVerifierIteration(args) {
|
|
352
|
+
const startedAtMs = this.now();
|
|
353
|
+
try {
|
|
354
|
+
const trace = await this.store.readRunTrace(args.input.runId);
|
|
355
|
+
const rawOutput = await this.options.verifierRunner?.verify({
|
|
356
|
+
runId: args.input.runId,
|
|
357
|
+
request: args.input.request,
|
|
358
|
+
planner: args.planner,
|
|
359
|
+
iteration: args.iteration,
|
|
360
|
+
evidence: trace?.evidence ?? [],
|
|
361
|
+
taskResults: args.taskResults,
|
|
362
|
+
remainingToolCalls: Math.max(0, args.maxToolCalls - args.toolCallCount),
|
|
363
|
+
policyCompilation: args.policyCompilation,
|
|
364
|
+
});
|
|
365
|
+
const validated = validateCodaliGatewayVerifierOutput(rawOutput);
|
|
366
|
+
if (!validated.ok) {
|
|
367
|
+
throw new Error(`GATEWAY_VERIFIER_OUTPUT_INVALID: ${validated.issues
|
|
368
|
+
.map((issue) => `${issue.path}:${issue.message}`)
|
|
369
|
+
.join("; ")}`);
|
|
370
|
+
}
|
|
371
|
+
const followUps = this.filterVerifierFollowUpTasks({
|
|
372
|
+
output: validated.value,
|
|
373
|
+
preparedTasks: args.preparedTasks,
|
|
374
|
+
policyCompilation: args.policyCompilation,
|
|
375
|
+
maxToolCalls: args.maxToolCalls,
|
|
376
|
+
toolCallCount: args.toolCallCount,
|
|
377
|
+
allowImageWorker: args.input.request.policy.allowImageWorker === true,
|
|
378
|
+
});
|
|
379
|
+
const record = {
|
|
380
|
+
iteration: args.iteration,
|
|
381
|
+
output: validated.value,
|
|
382
|
+
acceptedFollowUpTaskIds: followUps.accepted.map((task) => task.id),
|
|
383
|
+
rejectedFollowUpTasks: followUps.rejected,
|
|
384
|
+
};
|
|
385
|
+
await this.store.appendModelCall({
|
|
386
|
+
runId: args.input.runId,
|
|
387
|
+
role: "verifier",
|
|
388
|
+
status: "success",
|
|
389
|
+
latencyMs: Math.max(0, this.now() - startedAtMs),
|
|
390
|
+
input: {
|
|
391
|
+
iteration: args.iteration,
|
|
392
|
+
evidenceCount: trace?.evidence.length ?? 0,
|
|
393
|
+
taskResultCount: args.taskResults.length,
|
|
394
|
+
remainingToolCalls: Math.max(0, args.maxToolCalls - args.toolCallCount),
|
|
395
|
+
},
|
|
396
|
+
output: validated.value,
|
|
397
|
+
metadata: {
|
|
398
|
+
iteration: args.iteration,
|
|
399
|
+
acceptedFollowUpTaskIds: record.acceptedFollowUpTaskIds,
|
|
400
|
+
rejectedFollowUpTasks: followUps.rejected,
|
|
401
|
+
},
|
|
402
|
+
});
|
|
403
|
+
return {
|
|
404
|
+
record,
|
|
405
|
+
acceptedFollowUpTasks: followUps.accepted,
|
|
406
|
+
rejectedFollowUpTasks: followUps.rejected,
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
catch (error) {
|
|
410
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
411
|
+
await this.store.appendModelCall({
|
|
412
|
+
runId: args.input.runId,
|
|
413
|
+
role: "verifier",
|
|
414
|
+
status: "failed",
|
|
415
|
+
latencyMs: Math.max(0, this.now() - startedAtMs),
|
|
416
|
+
errorCode: "GATEWAY_VERIFIER_FAILED",
|
|
417
|
+
errorMessage,
|
|
418
|
+
metadata: {
|
|
419
|
+
iteration: args.iteration,
|
|
420
|
+
},
|
|
421
|
+
});
|
|
422
|
+
return {
|
|
423
|
+
acceptedFollowUpTasks: [],
|
|
424
|
+
rejectedFollowUpTasks: [],
|
|
425
|
+
errorCode: "GATEWAY_VERIFIER_FAILED",
|
|
426
|
+
errorMessage,
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
filterVerifierFollowUpTasks(args) {
|
|
431
|
+
const existingTaskIds = new Set(args.preparedTasks.map((task) => task.task.id));
|
|
432
|
+
const effectiveAllowedTools = new Set(args.policyCompilation.effectiveAllowedTools);
|
|
433
|
+
const accepted = [];
|
|
434
|
+
const rejected = [];
|
|
435
|
+
for (const task of args.output.followUpTasks) {
|
|
436
|
+
if (existingTaskIds.has(task.id)) {
|
|
437
|
+
rejected.push({ taskId: task.id, reason: "duplicate_task_id" });
|
|
438
|
+
continue;
|
|
439
|
+
}
|
|
440
|
+
if (task.workerRole === "image_worker" &&
|
|
441
|
+
!args.allowImageWorker) {
|
|
442
|
+
rejected.push({ taskId: task.id, reason: "image_worker_disabled" });
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
const blockedTools = task.toolsAllowed.filter((tool) => !effectiveAllowedTools.has(tool));
|
|
446
|
+
if (blockedTools.length > 0) {
|
|
447
|
+
rejected.push({
|
|
448
|
+
taskId: task.id,
|
|
449
|
+
reason: "required_tool_unavailable",
|
|
450
|
+
tools: uniqueInOrder(blockedTools),
|
|
451
|
+
});
|
|
452
|
+
continue;
|
|
453
|
+
}
|
|
454
|
+
if (task.toolsAllowed.length > 0 && args.toolCallCount >= args.maxToolCalls) {
|
|
455
|
+
rejected.push({
|
|
456
|
+
taskId: task.id,
|
|
457
|
+
reason: "tool_budget_exhausted",
|
|
458
|
+
tools: uniqueInOrder(task.toolsAllowed),
|
|
459
|
+
});
|
|
460
|
+
continue;
|
|
461
|
+
}
|
|
462
|
+
existingTaskIds.add(task.id);
|
|
463
|
+
accepted.push({
|
|
464
|
+
...task,
|
|
465
|
+
toolsAllowed: uniqueInOrder(task.toolsAllowed),
|
|
466
|
+
metadata: {
|
|
467
|
+
...(task.metadata ?? {}),
|
|
468
|
+
verifierFollowUp: true,
|
|
469
|
+
},
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
return { accepted, rejected };
|
|
473
|
+
}
|
|
474
|
+
resolveVerificationStopReason(rejected) {
|
|
475
|
+
if (rejected.some((task) => task.reason === "tool_budget_exhausted")) {
|
|
476
|
+
return "tool_budget_exhausted";
|
|
477
|
+
}
|
|
478
|
+
if (rejected.some((task) => task.reason === "required_tool_unavailable")) {
|
|
479
|
+
return "required_tool_unavailable";
|
|
480
|
+
}
|
|
481
|
+
return "no_useful_followups";
|
|
482
|
+
}
|
|
483
|
+
buildVerificationLoopResult(args) {
|
|
484
|
+
if (args.stopReason === "not_configured") {
|
|
485
|
+
return undefined;
|
|
486
|
+
}
|
|
487
|
+
return {
|
|
488
|
+
passed: args.iterations.at(-1)?.output.passed ?? false,
|
|
489
|
+
stopReason: args.stopReason,
|
|
490
|
+
iterations: args.iterations,
|
|
491
|
+
missingInformation: uniqueInOrder(args.iterations.flatMap((iteration) => iteration.output.missingInformation)),
|
|
492
|
+
contradictions: args.iterations.flatMap((iteration) => iteration.output.contradictions),
|
|
493
|
+
issues: args.iterations.flatMap((iteration) => iteration.output.issues),
|
|
494
|
+
followUpTaskCount: args.followUpTaskCount,
|
|
495
|
+
rejectedFollowUpTasks: args.rejectedFollowUpTasks,
|
|
496
|
+
};
|
|
497
|
+
}
|
|
498
|
+
async prepareTasks(input, policyCompilation, warnings) {
|
|
499
|
+
const effectiveAllowedTools = new Set(policyCompilation.effectiveAllowedTools);
|
|
500
|
+
const prepared = [];
|
|
501
|
+
for (const task of input.planner.workerTasks) {
|
|
502
|
+
const allowedTools = uniqueInOrder(task.toolsAllowed.filter((tool) => effectiveAllowedTools.has(tool)));
|
|
503
|
+
const removedTools = uniqueInOrder(task.toolsAllowed.filter((tool) => !effectiveAllowedTools.has(tool)));
|
|
504
|
+
if (removedTools.length > 0) {
|
|
505
|
+
warnings.push(`worker_task_tools_removed:${task.id}:${removedTools.join(",")}`);
|
|
506
|
+
}
|
|
507
|
+
const required = isRequiredWorkerTask(task);
|
|
508
|
+
const sanitizedTask = { ...task, toolsAllowed: allowedTools };
|
|
509
|
+
await this.store.createTask({
|
|
510
|
+
id: task.id,
|
|
511
|
+
runId: input.runId,
|
|
512
|
+
status: "pending",
|
|
513
|
+
workerRole: task.workerRole,
|
|
514
|
+
objective: task.objective,
|
|
515
|
+
metadata: {
|
|
516
|
+
...(task.metadata ?? {}),
|
|
517
|
+
required,
|
|
518
|
+
allowedTools,
|
|
519
|
+
removedTools,
|
|
520
|
+
},
|
|
521
|
+
});
|
|
522
|
+
prepared.push({ task: sanitizedTask, required, allowedTools, removedTools });
|
|
523
|
+
}
|
|
524
|
+
return prepared;
|
|
525
|
+
}
|
|
526
|
+
async runTask(args) {
|
|
527
|
+
const startedAtMs = this.now();
|
|
528
|
+
const prompt = buildCodaliGatewayWorkerPrompt({
|
|
529
|
+
request: args.input.request,
|
|
530
|
+
task: args.task.task,
|
|
531
|
+
allowedTools: args.task.allowedTools,
|
|
532
|
+
remainingToolCalls: args.remainingToolCalls,
|
|
533
|
+
remainingModelCalls: args.remainingModelCalls,
|
|
534
|
+
});
|
|
535
|
+
await this.store.updateTask(args.input.runId, args.task.task.id, {
|
|
536
|
+
status: "running",
|
|
537
|
+
metadata: {
|
|
538
|
+
...(args.task.task.metadata ?? {}),
|
|
539
|
+
required: args.task.required,
|
|
540
|
+
allowedTools: args.task.allowedTools,
|
|
541
|
+
removedTools: args.task.removedTools,
|
|
542
|
+
},
|
|
543
|
+
});
|
|
544
|
+
try {
|
|
545
|
+
const workerResult = await withTimeout(this.options.taskRunner.run({
|
|
546
|
+
runId: args.input.runId,
|
|
547
|
+
task: args.task.task,
|
|
548
|
+
prompt,
|
|
549
|
+
allowedTools: args.task.allowedTools,
|
|
550
|
+
remainingToolCalls: args.remainingToolCalls,
|
|
551
|
+
remainingModelCalls: args.remainingModelCalls,
|
|
552
|
+
timeoutMs: args.perTaskTimeoutMs,
|
|
553
|
+
request: args.input.request,
|
|
554
|
+
policyCompilation: args.policyCompilation,
|
|
555
|
+
}), args.perTaskTimeoutMs, args.task.task.id);
|
|
556
|
+
return await this.persistWorkerResult(args, workerResult, startedAtMs);
|
|
557
|
+
}
|
|
558
|
+
catch (error) {
|
|
559
|
+
const errorCode = error instanceof Error && error.name === "CodaliGatewayWorkerTimeoutError"
|
|
560
|
+
? "GATEWAY_WORKER_TIMEOUT"
|
|
561
|
+
: "GATEWAY_WORKER_FAILED";
|
|
562
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
563
|
+
const result = {
|
|
564
|
+
taskId: args.task.task.id,
|
|
565
|
+
workerRole: args.task.task.workerRole,
|
|
566
|
+
status: "failed",
|
|
567
|
+
required: args.task.required,
|
|
568
|
+
allowedTools: args.task.allowedTools,
|
|
569
|
+
removedTools: args.task.removedTools,
|
|
570
|
+
durationMs: Math.max(0, this.now() - startedAtMs),
|
|
571
|
+
evidenceCount: 0,
|
|
572
|
+
toolCallCount: 0,
|
|
573
|
+
calledTools: [],
|
|
574
|
+
modelCallCount: 0,
|
|
575
|
+
errorCode,
|
|
576
|
+
errorMessage,
|
|
577
|
+
};
|
|
578
|
+
await this.store.updateTask(args.input.runId, args.task.task.id, {
|
|
579
|
+
status: "failed",
|
|
580
|
+
metadata: {
|
|
581
|
+
...(args.task.task.metadata ?? {}),
|
|
582
|
+
required: args.task.required,
|
|
583
|
+
allowedTools: args.task.allowedTools,
|
|
584
|
+
errorCode,
|
|
585
|
+
errorMessage,
|
|
586
|
+
},
|
|
587
|
+
});
|
|
588
|
+
return result;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
async persistWorkerResult(args, workerResult, startedAtMs) {
|
|
592
|
+
const allowed = new Set(args.task.allowedTools);
|
|
593
|
+
const disallowedToolCalls = (workerResult.toolCalls ?? [])
|
|
594
|
+
.filter((call) => !allowed.has(call.tool));
|
|
595
|
+
const toolCalls = (workerResult.toolCalls ?? []).map((call) => {
|
|
596
|
+
if (allowed.has(call.tool)) {
|
|
597
|
+
return call;
|
|
598
|
+
}
|
|
599
|
+
return {
|
|
600
|
+
...call,
|
|
601
|
+
status: "blocked",
|
|
602
|
+
errorCode: call.errorCode ?? "GATEWAY_TOOL_NOT_APPROVED",
|
|
603
|
+
errorMessage: call.errorMessage ?? "Worker attempted a tool outside its approved set.",
|
|
604
|
+
};
|
|
605
|
+
});
|
|
606
|
+
const toolBudgetExceeded = toolCalls.length > args.remainingToolCalls;
|
|
607
|
+
const modelCallCount = Math.max(1, workerResult.modelCalls?.length ?? 0);
|
|
608
|
+
const modelBudgetExceeded = modelCallCount > args.remainingModelCalls;
|
|
609
|
+
const traceBeforePersistence = await this.store.readRunTrace(args.input.runId);
|
|
610
|
+
const remainingEvidenceItems = Math.max(0, args.policyCompilation.security.limits.maxEvidenceItems -
|
|
611
|
+
(traceBeforePersistence?.evidence.length ?? 0));
|
|
612
|
+
const remainingImageArtifactsInitial = Math.max(0, args.policyCompilation.security.limits.maxImageArtifacts -
|
|
613
|
+
(traceBeforePersistence?.artifacts.filter(isImageArtifact).length ?? 0));
|
|
614
|
+
let remainingImageArtifacts = remainingImageArtifactsInitial;
|
|
615
|
+
let blockedImageArtifactCount = 0;
|
|
616
|
+
const tenantScoped = requestHasTenantScope(args.input.request);
|
|
617
|
+
const normalizedEvidence = normalizeCodaliEvidence({
|
|
618
|
+
runId: args.input.runId,
|
|
619
|
+
taskId: args.task.task.id,
|
|
620
|
+
originalQuery: args.input.request.query,
|
|
621
|
+
evidence: workerResult.evidence,
|
|
622
|
+
workerOutput: workerResult.output,
|
|
623
|
+
toolCalls,
|
|
624
|
+
requireTenantScope: tenantScoped,
|
|
625
|
+
defaultTenantScoped: tenantScoped,
|
|
626
|
+
maxEvidenceItems: remainingEvidenceItems,
|
|
627
|
+
});
|
|
628
|
+
const evidence = normalizedEvidence.evidence;
|
|
629
|
+
if (evidence.length > 0) {
|
|
630
|
+
await this.store.appendEvidence(args.input.runId, evidence);
|
|
631
|
+
}
|
|
632
|
+
for (const call of toolCalls) {
|
|
633
|
+
await this.store.appendToolCall({
|
|
634
|
+
runId: args.input.runId,
|
|
635
|
+
taskId: args.task.task.id,
|
|
636
|
+
tool: call.tool,
|
|
637
|
+
status: call.status,
|
|
638
|
+
latencyMs: call.latencyMs,
|
|
639
|
+
args: call.args,
|
|
640
|
+
result: call.result,
|
|
641
|
+
errorCode: call.errorCode,
|
|
642
|
+
errorMessage: call.errorMessage,
|
|
643
|
+
metadata: call.metadata,
|
|
644
|
+
});
|
|
645
|
+
}
|
|
646
|
+
for (const call of workerResult.modelCalls ?? []) {
|
|
647
|
+
await this.store.appendModelCall({
|
|
648
|
+
runId: args.input.runId,
|
|
649
|
+
taskId: args.task.task.id,
|
|
650
|
+
role: call.role,
|
|
651
|
+
status: call.status,
|
|
652
|
+
latencyMs: call.latencyMs,
|
|
653
|
+
agentSlug: call.agentSlug,
|
|
654
|
+
model: call.model,
|
|
655
|
+
provider: call.provider,
|
|
656
|
+
input: call.input,
|
|
657
|
+
output: call.output,
|
|
658
|
+
errorCode: call.errorCode,
|
|
659
|
+
errorMessage: call.errorMessage,
|
|
660
|
+
metadata: call.metadata,
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
for (const artifact of workerResult.artifacts ?? []) {
|
|
664
|
+
if (isImageArtifact(artifact) && remainingImageArtifacts <= 0) {
|
|
665
|
+
blockedImageArtifactCount += 1;
|
|
666
|
+
continue;
|
|
667
|
+
}
|
|
668
|
+
if (isImageArtifact(artifact)) {
|
|
669
|
+
remainingImageArtifacts -= 1;
|
|
670
|
+
}
|
|
671
|
+
await this.store.saveArtifact({
|
|
672
|
+
...artifact,
|
|
673
|
+
runId: args.input.runId,
|
|
674
|
+
taskId: artifact.taskId ?? args.task.task.id,
|
|
675
|
+
});
|
|
676
|
+
}
|
|
677
|
+
const imageArtifactBudgetExceeded = blockedImageArtifactCount > 0;
|
|
678
|
+
const status = workerResult.status === "failed" ||
|
|
679
|
+
disallowedToolCalls.length > 0 ||
|
|
680
|
+
toolBudgetExceeded ||
|
|
681
|
+
modelBudgetExceeded ||
|
|
682
|
+
imageArtifactBudgetExceeded
|
|
683
|
+
? "failed"
|
|
684
|
+
: "succeeded";
|
|
685
|
+
const errorCode = disallowedToolCalls.length > 0
|
|
686
|
+
? "GATEWAY_TOOL_NOT_APPROVED"
|
|
687
|
+
: toolBudgetExceeded
|
|
688
|
+
? "GATEWAY_TOOL_BUDGET_EXCEEDED"
|
|
689
|
+
: modelBudgetExceeded
|
|
690
|
+
? "GATEWAY_MODEL_BUDGET_EXCEEDED"
|
|
691
|
+
: imageArtifactBudgetExceeded
|
|
692
|
+
? "GATEWAY_IMAGE_ARTIFACT_BUDGET_EXCEEDED"
|
|
693
|
+
: workerResult.errorCode;
|
|
694
|
+
const errorMessage = disallowedToolCalls.length > 0
|
|
695
|
+
? `Worker attempted disallowed tools: ${disallowedToolCalls
|
|
696
|
+
.map((call) => call.tool)
|
|
697
|
+
.join(", ")}`
|
|
698
|
+
: toolBudgetExceeded
|
|
699
|
+
? "Worker reported more tool calls than the remaining gateway budget."
|
|
700
|
+
: modelBudgetExceeded
|
|
701
|
+
? "Worker reported more model calls than the remaining gateway budget."
|
|
702
|
+
: imageArtifactBudgetExceeded
|
|
703
|
+
? "Worker produced more image artifacts than the remaining gateway budget."
|
|
704
|
+
: workerResult.errorMessage;
|
|
705
|
+
await this.store.updateTask(args.input.runId, args.task.task.id, {
|
|
706
|
+
status,
|
|
707
|
+
metadata: {
|
|
708
|
+
...(args.task.task.metadata ?? {}),
|
|
709
|
+
required: args.task.required,
|
|
710
|
+
allowedTools: args.task.allowedTools,
|
|
711
|
+
removedTools: args.task.removedTools,
|
|
712
|
+
errorCode,
|
|
713
|
+
output: workerResult.output,
|
|
714
|
+
workerMetadata: workerResult.metadata,
|
|
715
|
+
evidenceNormalization: {
|
|
716
|
+
warnings: normalizedEvidence.warnings,
|
|
717
|
+
rejectedCount: normalizedEvidence.rejected.length,
|
|
718
|
+
duplicateCount: normalizedEvidence.duplicateCount,
|
|
719
|
+
remainingEvidenceItems,
|
|
720
|
+
},
|
|
721
|
+
budgetEnforcement: {
|
|
722
|
+
remainingToolCalls: args.remainingToolCalls,
|
|
723
|
+
remainingModelCalls: args.remainingModelCalls,
|
|
724
|
+
modelCallCount,
|
|
725
|
+
toolBudgetExceeded,
|
|
726
|
+
modelBudgetExceeded,
|
|
727
|
+
maxImageArtifacts: args.policyCompilation.security.limits.maxImageArtifacts,
|
|
728
|
+
remainingImageArtifacts: remainingImageArtifactsInitial,
|
|
729
|
+
blockedImageArtifactCount,
|
|
730
|
+
},
|
|
731
|
+
},
|
|
732
|
+
});
|
|
733
|
+
return {
|
|
734
|
+
taskId: args.task.task.id,
|
|
735
|
+
workerRole: args.task.task.workerRole,
|
|
736
|
+
status,
|
|
737
|
+
required: args.task.required,
|
|
738
|
+
allowedTools: args.task.allowedTools,
|
|
739
|
+
removedTools: args.task.removedTools,
|
|
740
|
+
durationMs: Math.max(0, this.now() - startedAtMs),
|
|
741
|
+
evidenceCount: evidence.length,
|
|
742
|
+
toolCallCount: toolCalls.length,
|
|
743
|
+
calledTools: uniqueInOrder(toolCalls.map((call) => call.tool)),
|
|
744
|
+
modelCallCount,
|
|
745
|
+
output: workerResult.output,
|
|
746
|
+
errorCode,
|
|
747
|
+
errorMessage,
|
|
748
|
+
metadata: workerResult.metadata,
|
|
749
|
+
};
|
|
750
|
+
}
|
|
751
|
+
async skipTask(runId, task, reason) {
|
|
752
|
+
await this.store.updateTask(runId, task.task.id, {
|
|
753
|
+
status: "skipped",
|
|
754
|
+
metadata: {
|
|
755
|
+
...(task.task.metadata ?? {}),
|
|
756
|
+
required: task.required,
|
|
757
|
+
allowedTools: task.allowedTools,
|
|
758
|
+
removedTools: task.removedTools,
|
|
759
|
+
skippedReason: reason,
|
|
760
|
+
},
|
|
761
|
+
});
|
|
762
|
+
return {
|
|
763
|
+
taskId: task.task.id,
|
|
764
|
+
workerRole: task.task.workerRole,
|
|
765
|
+
status: "skipped",
|
|
766
|
+
required: task.required,
|
|
767
|
+
allowedTools: task.allowedTools,
|
|
768
|
+
removedTools: task.removedTools,
|
|
769
|
+
durationMs: 0,
|
|
770
|
+
evidenceCount: 0,
|
|
771
|
+
toolCallCount: 0,
|
|
772
|
+
calledTools: [],
|
|
773
|
+
modelCallCount: 0,
|
|
774
|
+
skippedReason: reason,
|
|
775
|
+
};
|
|
776
|
+
}
|
|
777
|
+
resolveStatus(taskResults, verification) {
|
|
778
|
+
if (taskResults.some((result) => result.status === "failed" && result.required)) {
|
|
779
|
+
return "failed";
|
|
780
|
+
}
|
|
781
|
+
if (verification && !verification.passed) {
|
|
782
|
+
return "partial";
|
|
783
|
+
}
|
|
784
|
+
if (taskResults.some((result) => result.status === "failed" || result.status === "skipped")) {
|
|
785
|
+
return "partial";
|
|
786
|
+
}
|
|
787
|
+
return "succeeded";
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
export const createCodaliGatewayStateMachine = (options) => new CodaliGatewayStateMachine(options);
|