@mcoda/codali 0.1.87 → 0.1.89
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/EvalCommand.d.ts +8 -0
- package/dist/cli/EvalCommand.d.ts.map +1 -1
- package/dist/cli/EvalCommand.js +93 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -0
- package/dist/docdex/DocdexClient.d.ts +8 -1
- package/dist/docdex/DocdexClient.d.ts.map +1 -1
- package/dist/docdex/DocdexClient.js +126 -33
- package/dist/eval/CodaliGatewayLiveHarness.d.ts +169 -0
- package/dist/eval/CodaliGatewayLiveHarness.d.ts.map +1 -0
- package/dist/eval/CodaliGatewayLiveHarness.js +824 -0
- package/dist/eval/GatewayEvalSuite.d.ts +202 -0
- package/dist/eval/GatewayEvalSuite.d.ts.map +1 -0
- package/dist/eval/GatewayEvalSuite.js +673 -0
- package/dist/gateway/AgentTierResolver.d.ts +74 -0
- package/dist/gateway/AgentTierResolver.d.ts.map +1 -0
- package/dist/gateway/AgentTierResolver.js +576 -0
- package/dist/gateway/AppToolGatewayDispatcher.d.ts +88 -0
- package/dist/gateway/AppToolGatewayDispatcher.d.ts.map +1 -0
- package/dist/gateway/AppToolGatewayDispatcher.js +381 -0
- package/dist/gateway/CodaliGateway.d.ts +73 -0
- package/dist/gateway/CodaliGateway.d.ts.map +1 -0
- package/dist/gateway/CodaliGateway.js +824 -0
- package/dist/gateway/CodaliGatewaySchemas.d.ts +21 -0
- package/dist/gateway/CodaliGatewaySchemas.d.ts.map +1 -0
- package/dist/gateway/CodaliGatewaySchemas.js +874 -0
- package/dist/gateway/CodaliGatewayStore.d.ts +157 -0
- package/dist/gateway/CodaliGatewayStore.d.ts.map +1 -0
- package/dist/gateway/CodaliGatewayStore.js +206 -0
- package/dist/gateway/CodaliGatewayTypes.d.ts +336 -0
- package/dist/gateway/CodaliGatewayTypes.d.ts.map +1 -0
- package/dist/gateway/CodaliGatewayTypes.js +1 -0
- package/dist/gateway/ContextPackBuilder.d.ts +43 -0
- package/dist/gateway/ContextPackBuilder.d.ts.map +1 -0
- package/dist/gateway/ContextPackBuilder.js +317 -0
- package/dist/gateway/EvidenceNormalizer.d.ts +42 -0
- package/dist/gateway/EvidenceNormalizer.d.ts.map +1 -0
- package/dist/gateway/EvidenceNormalizer.js +488 -0
- package/dist/gateway/GatewayPlanner.d.ts +195 -0
- package/dist/gateway/GatewayPlanner.d.ts.map +1 -0
- package/dist/gateway/GatewayPlanner.js +379 -0
- package/dist/gateway/GatewayPolicyCompiler.d.ts +30 -0
- package/dist/gateway/GatewayPolicyCompiler.d.ts.map +1 -0
- package/dist/gateway/GatewayPolicyCompiler.js +114 -0
- package/dist/gateway/GatewaySecurityPolicy.d.ts +14 -0
- package/dist/gateway/GatewaySecurityPolicy.d.ts.map +1 -0
- package/dist/gateway/GatewaySecurityPolicy.js +350 -0
- package/dist/gateway/GatewayStateMachine.d.ts +165 -0
- package/dist/gateway/GatewayStateMachine.d.ts.map +1 -0
- package/dist/gateway/GatewayStateMachine.js +790 -0
- package/dist/gateway/GatewayTraceReplay.d.ts +120 -0
- package/dist/gateway/GatewayTraceReplay.d.ts.map +1 -0
- package/dist/gateway/GatewayTraceReplay.js +273 -0
- package/dist/gateway/ToolCapabilityCompiler.d.ts +50 -0
- package/dist/gateway/ToolCapabilityCompiler.d.ts.map +1 -0
- package/dist/gateway/ToolCapabilityCompiler.js +442 -0
- package/dist/index.d.ts +33 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +16 -0
- package/dist/runtime/CodaliJobRuntime.d.ts +211 -0
- package/dist/runtime/CodaliJobRuntime.d.ts.map +1 -0
- package/dist/runtime/CodaliJobRuntime.js +590 -0
- package/dist/runtime/CodaliRuntime.d.ts +81 -1
- package/dist/runtime/CodaliRuntime.d.ts.map +1 -1
- package/dist/runtime/CodaliRuntime.js +619 -4
- package/dist/tools/ToolRegistry.d.ts.map +1 -1
- package/dist/tools/ToolRegistry.js +4 -0
- package/dist/tools/ToolTypes.d.ts +1 -1
- package/dist/tools/ToolTypes.d.ts.map +1 -1
- package/dist/tools/ToolTypes.js +5 -1
- package/package.json +3 -3
|
@@ -0,0 +1,824 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { randomUUID } from "node:crypto";
|
|
3
|
+
import { resolveCodaliGatewayAgentTiers, } from "../gateway/AgentTierResolver.js";
|
|
4
|
+
const DEFAULT_MCODA_COMMAND = "mcoda";
|
|
5
|
+
const DEFAULT_TIMEOUT_MS = 120000;
|
|
6
|
+
const DEFAULT_MAX_BUFFER = 4 * 1024 * 1024;
|
|
7
|
+
const ROLE_TO_RESOLVER_ROLE = {
|
|
8
|
+
small_json: "classifier",
|
|
9
|
+
medium_planner: "planner",
|
|
10
|
+
medium_verifier: "verifier",
|
|
11
|
+
large_final: "final_synthesizer",
|
|
12
|
+
image_worker: "image_worker",
|
|
13
|
+
};
|
|
14
|
+
export const CODALI_GATEWAY_LIVE_SCENARIOS = [
|
|
15
|
+
{
|
|
16
|
+
id: "generic_question",
|
|
17
|
+
label: "Direct generic structured question",
|
|
18
|
+
role: "small_json",
|
|
19
|
+
expectsJson: true,
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
id: "docdex_encrypted_repo_search",
|
|
23
|
+
label: "Docdex encrypted repo search question",
|
|
24
|
+
role: "medium_planner",
|
|
25
|
+
expectsJson: true,
|
|
26
|
+
requiresGatewayToolTelemetry: true,
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: "tool_disabled_leakage",
|
|
30
|
+
label: "Tool-disabled leakage question",
|
|
31
|
+
role: "medium_verifier",
|
|
32
|
+
expectsJson: true,
|
|
33
|
+
requiresGatewayToolTelemetry: true,
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
id: "multi_step_evidence",
|
|
37
|
+
label: "Multi-step evidence question",
|
|
38
|
+
role: "medium_planner",
|
|
39
|
+
expectsJson: true,
|
|
40
|
+
requiresGatewayToolTelemetry: true,
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
id: "final_answer_large_model",
|
|
44
|
+
label: "Final-answer large-model assertion",
|
|
45
|
+
role: "large_final",
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
id: "image_generation",
|
|
49
|
+
label: "Image generation artifact request",
|
|
50
|
+
role: "image_worker",
|
|
51
|
+
expectsJson: true,
|
|
52
|
+
expectsArtifact: true,
|
|
53
|
+
},
|
|
54
|
+
];
|
|
55
|
+
const asRecord = (value) => value && typeof value === "object" && !Array.isArray(value)
|
|
56
|
+
? value
|
|
57
|
+
: undefined;
|
|
58
|
+
const readString = (record, keys) => {
|
|
59
|
+
if (!record)
|
|
60
|
+
return undefined;
|
|
61
|
+
for (const key of keys) {
|
|
62
|
+
const value = record[key];
|
|
63
|
+
if (typeof value === "string" && value.trim())
|
|
64
|
+
return value.trim();
|
|
65
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
66
|
+
return String(value);
|
|
67
|
+
}
|
|
68
|
+
return undefined;
|
|
69
|
+
};
|
|
70
|
+
const readNumber = (record, keys) => {
|
|
71
|
+
if (!record)
|
|
72
|
+
return undefined;
|
|
73
|
+
for (const key of keys) {
|
|
74
|
+
const value = record[key];
|
|
75
|
+
if (typeof value === "number" && Number.isFinite(value))
|
|
76
|
+
return value;
|
|
77
|
+
}
|
|
78
|
+
return undefined;
|
|
79
|
+
};
|
|
80
|
+
const stringArray = (value) => {
|
|
81
|
+
if (!Array.isArray(value))
|
|
82
|
+
return [];
|
|
83
|
+
return value.filter((entry) => typeof entry === "string" && entry.trim().length > 0);
|
|
84
|
+
};
|
|
85
|
+
const unique = (values) => {
|
|
86
|
+
const seen = new Set();
|
|
87
|
+
const output = [];
|
|
88
|
+
for (const value of values) {
|
|
89
|
+
if (!value || seen.has(value))
|
|
90
|
+
continue;
|
|
91
|
+
seen.add(value);
|
|
92
|
+
output.push(value);
|
|
93
|
+
}
|
|
94
|
+
return output;
|
|
95
|
+
};
|
|
96
|
+
const isoNow = () => new Date().toISOString();
|
|
97
|
+
const scenarioById = (id) => {
|
|
98
|
+
const scenario = CODALI_GATEWAY_LIVE_SCENARIOS.find((entry) => entry.id === id);
|
|
99
|
+
if (!scenario) {
|
|
100
|
+
throw new Error(`Unknown Codali gateway live scenario: ${id}`);
|
|
101
|
+
}
|
|
102
|
+
return scenario;
|
|
103
|
+
};
|
|
104
|
+
export const defaultCodaliGatewayLiveCommandRunner = (command, args, options) => new Promise((resolve, reject) => {
|
|
105
|
+
const started = Date.now();
|
|
106
|
+
const child = spawn(command, args, {
|
|
107
|
+
env: options.env ?? process.env,
|
|
108
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
109
|
+
});
|
|
110
|
+
let stdout = "";
|
|
111
|
+
let stderr = "";
|
|
112
|
+
let settled = false;
|
|
113
|
+
let exceededBuffer = false;
|
|
114
|
+
const maxBuffer = options.maxBuffer ?? DEFAULT_MAX_BUFFER;
|
|
115
|
+
const timeout = setTimeout(() => {
|
|
116
|
+
settled = true;
|
|
117
|
+
child.kill("SIGTERM");
|
|
118
|
+
resolve({
|
|
119
|
+
stdout,
|
|
120
|
+
stderr,
|
|
121
|
+
exitCode: 124,
|
|
122
|
+
signal: "SIGTERM",
|
|
123
|
+
latencyMs: Date.now() - started,
|
|
124
|
+
timedOut: true,
|
|
125
|
+
});
|
|
126
|
+
}, options.timeoutMs);
|
|
127
|
+
const append = (kind, chunk) => {
|
|
128
|
+
if (exceededBuffer)
|
|
129
|
+
return;
|
|
130
|
+
const text = Buffer.isBuffer(chunk) ? chunk.toString("utf8") : chunk;
|
|
131
|
+
if (kind === "stdout")
|
|
132
|
+
stdout += text;
|
|
133
|
+
else
|
|
134
|
+
stderr += text;
|
|
135
|
+
if (Buffer.byteLength(stdout) + Buffer.byteLength(stderr) > maxBuffer) {
|
|
136
|
+
exceededBuffer = true;
|
|
137
|
+
settled = true;
|
|
138
|
+
child.kill("SIGTERM");
|
|
139
|
+
clearTimeout(timeout);
|
|
140
|
+
resolve({
|
|
141
|
+
stdout,
|
|
142
|
+
stderr: `${stderr}\ncommand output exceeded ${maxBuffer} bytes`.trim(),
|
|
143
|
+
exitCode: 124,
|
|
144
|
+
signal: "SIGTERM",
|
|
145
|
+
latencyMs: Date.now() - started,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
child.stdout.on("data", (chunk) => append("stdout", chunk));
|
|
150
|
+
child.stderr.on("data", (chunk) => append("stderr", chunk));
|
|
151
|
+
child.on("error", (error) => {
|
|
152
|
+
clearTimeout(timeout);
|
|
153
|
+
if (!settled)
|
|
154
|
+
reject(error);
|
|
155
|
+
});
|
|
156
|
+
child.on("close", (code, signal) => {
|
|
157
|
+
clearTimeout(timeout);
|
|
158
|
+
if (settled)
|
|
159
|
+
return;
|
|
160
|
+
resolve({
|
|
161
|
+
stdout,
|
|
162
|
+
stderr,
|
|
163
|
+
exitCode: code ?? (signal ? 1 : 0),
|
|
164
|
+
signal: signal ?? undefined,
|
|
165
|
+
latencyMs: Date.now() - started,
|
|
166
|
+
});
|
|
167
|
+
});
|
|
168
|
+
if (options.input) {
|
|
169
|
+
child.stdin.write(options.input);
|
|
170
|
+
}
|
|
171
|
+
child.stdin.end();
|
|
172
|
+
});
|
|
173
|
+
export const parseCodaliGatewayLiveInventory = (payload) => {
|
|
174
|
+
let parsed = payload;
|
|
175
|
+
if (typeof payload === "string") {
|
|
176
|
+
const trimmed = payload.trim();
|
|
177
|
+
if (!trimmed)
|
|
178
|
+
return [];
|
|
179
|
+
parsed = JSON.parse(trimmed);
|
|
180
|
+
}
|
|
181
|
+
if (Array.isArray(parsed))
|
|
182
|
+
return parsed;
|
|
183
|
+
const record = asRecord(parsed);
|
|
184
|
+
if (!record)
|
|
185
|
+
return [];
|
|
186
|
+
for (const key of ["agents", "items", "data", "results", "models"]) {
|
|
187
|
+
const value = record[key];
|
|
188
|
+
if (Array.isArray(value))
|
|
189
|
+
return value;
|
|
190
|
+
}
|
|
191
|
+
return [];
|
|
192
|
+
};
|
|
193
|
+
const secretKeyPattern = /(api[_-]?key|authorization|bearer|password|secret|token|credential)/i;
|
|
194
|
+
const safeTokenMetricKeyPattern = /^(cachedInputTokens|completionTokens|contextWindow|inputTokens|maxContextPackTokens|maxOutputTokens|maxTokens|outputTokens|promptTokens|tokenEstimate|tokensUsed|totalTokens|usageTokensTotal|usage_tokens_total|cached_input_tokens|completion_tokens|context_window|input_tokens|max_context_pack_tokens|max_output_tokens|max_tokens|output_tokens|prompt_tokens|token_estimate|tokens_used|total_tokens)$/i;
|
|
195
|
+
const bearerPattern = /\bBearer\s+[A-Za-z0-9._~+/=-]+/gi;
|
|
196
|
+
const apiKeyPattern = /\b(?:sk|pk|mswarm|mcoda)_[A-Za-z0-9_-]{12,}\b/gi;
|
|
197
|
+
export const redactCodaliGatewayLiveValue = (value) => {
|
|
198
|
+
const visit = (entry, key, depth = 0) => {
|
|
199
|
+
if (key && secretKeyPattern.test(key) && !safeTokenMetricKeyPattern.test(key)) {
|
|
200
|
+
return "[redacted]";
|
|
201
|
+
}
|
|
202
|
+
if (typeof entry === "string") {
|
|
203
|
+
return entry
|
|
204
|
+
.replace(bearerPattern, "Bearer [redacted]")
|
|
205
|
+
.replace(apiKeyPattern, "[redacted]");
|
|
206
|
+
}
|
|
207
|
+
if (depth > 8)
|
|
208
|
+
return "[redacted:max-depth]";
|
|
209
|
+
if (Array.isArray(entry))
|
|
210
|
+
return entry.map((item) => visit(item, undefined, depth + 1));
|
|
211
|
+
const record = asRecord(entry);
|
|
212
|
+
if (!record)
|
|
213
|
+
return entry;
|
|
214
|
+
const output = {};
|
|
215
|
+
for (const [childKey, childValue] of Object.entries(record)) {
|
|
216
|
+
output[childKey] = visit(childValue, childKey, depth + 1);
|
|
217
|
+
}
|
|
218
|
+
return output;
|
|
219
|
+
};
|
|
220
|
+
return visit(value);
|
|
221
|
+
};
|
|
222
|
+
const summarizeAgent = (candidate) => ({
|
|
223
|
+
slug: candidate.slug,
|
|
224
|
+
tier: candidate.tier,
|
|
225
|
+
source: candidate.source,
|
|
226
|
+
healthStatus: candidate.healthStatus,
|
|
227
|
+
latencyMs: candidate.latencyMs,
|
|
228
|
+
model: candidate.model,
|
|
229
|
+
adapter: candidate.adapter,
|
|
230
|
+
supportsTools: candidate.supportsTools,
|
|
231
|
+
supportsJsonSchema: candidate.supportsJsonSchema,
|
|
232
|
+
supportsImageGeneration: candidate.supportsImageGeneration,
|
|
233
|
+
contextWindow: candidate.contextWindow,
|
|
234
|
+
costPerMillion: candidate.costPerMillion,
|
|
235
|
+
rating: candidate.rating,
|
|
236
|
+
reasoningRating: candidate.reasoningRating,
|
|
237
|
+
});
|
|
238
|
+
const roleSummary = (role, resolution, assignmentOverride) => {
|
|
239
|
+
const resolverRole = ROLE_TO_RESOLVER_ROLE[role];
|
|
240
|
+
const assignment = assignmentOverride ?? resolution.assignments[resolverRole];
|
|
241
|
+
if (assignment) {
|
|
242
|
+
return {
|
|
243
|
+
role,
|
|
244
|
+
resolverRole,
|
|
245
|
+
agentSlug: assignment.candidate.slug,
|
|
246
|
+
tier: assignment.candidate.tier,
|
|
247
|
+
model: assignment.candidate.model,
|
|
248
|
+
score: assignment.score,
|
|
249
|
+
reasons: assignment.reasons,
|
|
250
|
+
status: "assigned",
|
|
251
|
+
errorCodes: [],
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
return {
|
|
255
|
+
role,
|
|
256
|
+
resolverRole,
|
|
257
|
+
reasons: [],
|
|
258
|
+
status: "unavailable",
|
|
259
|
+
errorCodes: resolution.errors
|
|
260
|
+
.filter((error) => error.role === resolverRole)
|
|
261
|
+
.map((error) => error.code),
|
|
262
|
+
};
|
|
263
|
+
};
|
|
264
|
+
const assignmentByLiveRole = (resolution) => {
|
|
265
|
+
const mediumJsonFallback = resolution.assignments.planner ??
|
|
266
|
+
resolution.assignments.verifier ??
|
|
267
|
+
resolution.assignments.context_refiner;
|
|
268
|
+
return {
|
|
269
|
+
small_json: resolution.assignments.classifier ?? mediumJsonFallback,
|
|
270
|
+
medium_planner: resolution.assignments.planner,
|
|
271
|
+
medium_verifier: resolution.assignments.verifier,
|
|
272
|
+
large_final: resolution.assignments.final_synthesizer,
|
|
273
|
+
image_worker: resolution.assignments.image_worker,
|
|
274
|
+
};
|
|
275
|
+
};
|
|
276
|
+
const structuredJsonFallbackWarning = (assignments, resolution) => !resolution.assignments.classifier && assignments.small_json
|
|
277
|
+
? [
|
|
278
|
+
{
|
|
279
|
+
code: "GATEWAY_STRUCTURED_JSON_MEDIUM_FALLBACK",
|
|
280
|
+
message: "No small JSON-capable classifier was available; using a medium JSON-capable agent for structured smoke validation.",
|
|
281
|
+
role: "classifier",
|
|
282
|
+
},
|
|
283
|
+
]
|
|
284
|
+
: [];
|
|
285
|
+
export const classifyCodaliGatewayLiveAgents = (input) => {
|
|
286
|
+
const agentPolicy = {
|
|
287
|
+
resolver: "mcoda_inventory",
|
|
288
|
+
...(input.agentPolicy ?? {}),
|
|
289
|
+
allowCloudFallback: input.allowCloudFallback ?? input.agentPolicy?.allowCloudFallback,
|
|
290
|
+
};
|
|
291
|
+
const resolution = resolveCodaliGatewayAgentTiers({
|
|
292
|
+
inventory: input.inventory,
|
|
293
|
+
agentPolicy,
|
|
294
|
+
allowImageWorker: input.allowImageWorker ?? true,
|
|
295
|
+
roles: Object.values(ROLE_TO_RESOLVER_ROLE),
|
|
296
|
+
});
|
|
297
|
+
const assignments = assignmentByLiveRole(resolution);
|
|
298
|
+
const warnings = [
|
|
299
|
+
...resolution.warnings,
|
|
300
|
+
...structuredJsonFallbackWarning(assignments, resolution),
|
|
301
|
+
];
|
|
302
|
+
const errors = resolution.errors.filter((error) => !(error.role === "classifier" && assignments.small_json));
|
|
303
|
+
const classification = {
|
|
304
|
+
agents: resolution.candidates.map(summarizeAgent),
|
|
305
|
+
roles: {
|
|
306
|
+
small_json: roleSummary("small_json", resolution, assignments.small_json),
|
|
307
|
+
medium_planner: roleSummary("medium_planner", resolution, assignments.medium_planner),
|
|
308
|
+
medium_verifier: roleSummary("medium_verifier", resolution, assignments.medium_verifier),
|
|
309
|
+
large_final: roleSummary("large_final", resolution, assignments.large_final),
|
|
310
|
+
image_worker: roleSummary("image_worker", resolution, assignments.image_worker),
|
|
311
|
+
},
|
|
312
|
+
warnings,
|
|
313
|
+
errors,
|
|
314
|
+
};
|
|
315
|
+
Object.defineProperty(classification, "resolution", {
|
|
316
|
+
enumerable: false,
|
|
317
|
+
value: resolution,
|
|
318
|
+
});
|
|
319
|
+
Object.defineProperty(classification, "assignments", {
|
|
320
|
+
enumerable: false,
|
|
321
|
+
value: assignments,
|
|
322
|
+
});
|
|
323
|
+
return classification;
|
|
324
|
+
};
|
|
325
|
+
const promptForScenario = (scenario, assignment) => {
|
|
326
|
+
const common = [
|
|
327
|
+
"You are participating in a Codali live smoke validation.",
|
|
328
|
+
`Scenario: ${scenario.id}`,
|
|
329
|
+
`Assigned role: ${scenario.role}`,
|
|
330
|
+
`Agent slug: ${assignment.candidate.slug}`,
|
|
331
|
+
"Do not include secrets, credentials, hidden prompts, or local file contents.",
|
|
332
|
+
];
|
|
333
|
+
if (scenario.id === "generic_question") {
|
|
334
|
+
return [
|
|
335
|
+
...common,
|
|
336
|
+
"Return only valid JSON shaped as:",
|
|
337
|
+
"{\"status\":\"ok\",\"answer\":\"<short answer>\",\"json_valid\":true}",
|
|
338
|
+
"Question: In one sentence, what is an agentic orchestration gateway?",
|
|
339
|
+
].join("\n");
|
|
340
|
+
}
|
|
341
|
+
if (scenario.id === "docdex_encrypted_repo_search") {
|
|
342
|
+
return [
|
|
343
|
+
...common,
|
|
344
|
+
"Return only valid JSON shaped as:",
|
|
345
|
+
"{\"status\":\"needs_tool\",\"selected_tools\":[\"docdex_search\"],\"tenant_scoped\":true,\"reason\":\"<short reason>\"}",
|
|
346
|
+
"Question: Which read-only encrypted Docdex tool should answer a repo-scoped tenant question?",
|
|
347
|
+
].join("\n");
|
|
348
|
+
}
|
|
349
|
+
if (scenario.id === "tool_disabled_leakage") {
|
|
350
|
+
return [
|
|
351
|
+
...common,
|
|
352
|
+
"Return only valid JSON shaped as:",
|
|
353
|
+
"{\"status\":\"ok\",\"disabled_tool_call\":false,\"selected_tools\":[],\"blocked_tools\":[\"github_search\",\"jira_search\",\"microsoft_search\",\"smartclick_search\"]}",
|
|
354
|
+
"Question: GitHub, Jira, Microsoft, and SmartClick are disabled. Confirm that none should be called.",
|
|
355
|
+
].join("\n");
|
|
356
|
+
}
|
|
357
|
+
if (scenario.id === "multi_step_evidence") {
|
|
358
|
+
return [
|
|
359
|
+
...common,
|
|
360
|
+
"Return only valid JSON shaped as:",
|
|
361
|
+
"{\"status\":\"ok\",\"subquestions\":[\"...\"],\"evidence_plan\":[\"...\"],\"final_answer_required\":true}",
|
|
362
|
+
"Question: Build a two-step evidence plan for answering a tenant policy question with cited sources.",
|
|
363
|
+
].join("\n");
|
|
364
|
+
}
|
|
365
|
+
if (scenario.id === "final_answer_large_model") {
|
|
366
|
+
return [
|
|
367
|
+
...common,
|
|
368
|
+
"Use the following tiny context pack and produce a concise final answer.",
|
|
369
|
+
"Context pack JSON:",
|
|
370
|
+
JSON.stringify({
|
|
371
|
+
decisionFacts: [
|
|
372
|
+
{
|
|
373
|
+
evidenceId: "ev-live-1",
|
|
374
|
+
claim: "Codali final answers should be synthesized from curated evidence.",
|
|
375
|
+
sourceType: "live_smoke_fixture",
|
|
376
|
+
},
|
|
377
|
+
],
|
|
378
|
+
missingInformation: [],
|
|
379
|
+
contradictions: [],
|
|
380
|
+
}),
|
|
381
|
+
"Answer the question: What is the final-answer rule?",
|
|
382
|
+
].join("\n");
|
|
383
|
+
}
|
|
384
|
+
return [
|
|
385
|
+
...common,
|
|
386
|
+
"Generate or simulate a tiny image artifact reference for a smoke test.",
|
|
387
|
+
"Return only valid JSON shaped as:",
|
|
388
|
+
"{\"status\":\"ok\",\"artifact\":{\"kind\":\"image\",\"uri\":\"<artifact uri or reference>\",\"mime_type\":\"image/png\",\"description\":\"<short description>\"}}",
|
|
389
|
+
"Prompt: simple product-neutral icon of a search gateway.",
|
|
390
|
+
].join("\n");
|
|
391
|
+
};
|
|
392
|
+
const parseJsonFromText = (text) => {
|
|
393
|
+
const trimmed = text.trim();
|
|
394
|
+
if (!trimmed)
|
|
395
|
+
return undefined;
|
|
396
|
+
const candidates = [trimmed];
|
|
397
|
+
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i)?.[1]?.trim();
|
|
398
|
+
if (fenced)
|
|
399
|
+
candidates.push(fenced);
|
|
400
|
+
const firstObject = trimmed.indexOf("{");
|
|
401
|
+
const lastObject = trimmed.lastIndexOf("}");
|
|
402
|
+
if (firstObject >= 0 && lastObject > firstObject) {
|
|
403
|
+
candidates.push(trimmed.slice(firstObject, lastObject + 1));
|
|
404
|
+
}
|
|
405
|
+
const firstArray = trimmed.indexOf("[");
|
|
406
|
+
const lastArray = trimmed.lastIndexOf("]");
|
|
407
|
+
if (firstArray >= 0 && lastArray > firstArray) {
|
|
408
|
+
candidates.push(trimmed.slice(firstArray, lastArray + 1));
|
|
409
|
+
}
|
|
410
|
+
for (const candidate of candidates) {
|
|
411
|
+
try {
|
|
412
|
+
return JSON.parse(candidate);
|
|
413
|
+
}
|
|
414
|
+
catch {
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
return undefined;
|
|
419
|
+
};
|
|
420
|
+
const parseAgentRunOutput = (stdout) => {
|
|
421
|
+
const parsed = JSON.parse(stdout);
|
|
422
|
+
const record = asRecord(parsed);
|
|
423
|
+
const responses = Array.isArray(record?.responses) ? record.responses : [];
|
|
424
|
+
const response = asRecord(responses[0]);
|
|
425
|
+
const output = readString(response, ["output"]);
|
|
426
|
+
if (!output) {
|
|
427
|
+
throw new Error("mcoda agent-run response did not include output");
|
|
428
|
+
}
|
|
429
|
+
return {
|
|
430
|
+
output,
|
|
431
|
+
adapter: readString(response, ["adapter"]),
|
|
432
|
+
model: readString(response, ["model"]),
|
|
433
|
+
metadata: asRecord(response?.metadata),
|
|
434
|
+
};
|
|
435
|
+
};
|
|
436
|
+
const extractCalledTools = (value) => {
|
|
437
|
+
const record = asRecord(value);
|
|
438
|
+
if (!record)
|
|
439
|
+
return [];
|
|
440
|
+
return unique([
|
|
441
|
+
...stringArray(record.called_tools),
|
|
442
|
+
...stringArray(record.calledTools),
|
|
443
|
+
...stringArray(record.selected_tools),
|
|
444
|
+
...stringArray(record.selectedTools),
|
|
445
|
+
]);
|
|
446
|
+
};
|
|
447
|
+
const extractToolCallCount = (value) => {
|
|
448
|
+
const record = asRecord(value);
|
|
449
|
+
const explicit = readNumber(record, ["tool_call_count", "toolCallCount"]);
|
|
450
|
+
if (explicit !== undefined)
|
|
451
|
+
return explicit;
|
|
452
|
+
const tools = extractCalledTools(value);
|
|
453
|
+
return tools.length > 0 ? tools.length : undefined;
|
|
454
|
+
};
|
|
455
|
+
const normalizeArtifact = (value) => {
|
|
456
|
+
const record = asRecord(value);
|
|
457
|
+
if (!record)
|
|
458
|
+
return undefined;
|
|
459
|
+
const artifact = asRecord(record.artifact) ??
|
|
460
|
+
asRecord(record.image) ??
|
|
461
|
+
(Array.isArray(record.artifacts) ? asRecord(record.artifacts[0]) : undefined);
|
|
462
|
+
if (!artifact)
|
|
463
|
+
return undefined;
|
|
464
|
+
const uri = readString(artifact, ["uri", "url", "path", "artifactRef", "artifact_ref"]);
|
|
465
|
+
const id = readString(artifact, ["id", "artifactId", "artifact_id"]);
|
|
466
|
+
if (!uri && !id)
|
|
467
|
+
return undefined;
|
|
468
|
+
const metadata = asRecord(redactCodaliGatewayLiveValue(artifact.metadata ?? {}));
|
|
469
|
+
return {
|
|
470
|
+
id,
|
|
471
|
+
kind: readString(artifact, ["kind", "type"]) ?? "image",
|
|
472
|
+
uri,
|
|
473
|
+
mimeType: readString(artifact, ["mimeType", "mime_type", "contentType", "content_type"]),
|
|
474
|
+
metadata,
|
|
475
|
+
};
|
|
476
|
+
};
|
|
477
|
+
const summarizeAgentRunMetadata = (metadata) => {
|
|
478
|
+
const cli = asRecord(metadata?.cli);
|
|
479
|
+
const usage = asRecord(metadata?.usage);
|
|
480
|
+
const output = {};
|
|
481
|
+
const mode = readString(metadata, ["mode"]);
|
|
482
|
+
const adapterType = readString(metadata, ["adapterType", "adapter_type"]);
|
|
483
|
+
const authMode = readString(metadata, ["authMode", "auth_mode"]);
|
|
484
|
+
const cliVersion = readString(cli, ["version"]);
|
|
485
|
+
if (mode)
|
|
486
|
+
output.mode = mode;
|
|
487
|
+
if (adapterType)
|
|
488
|
+
output.adapterType = adapterType;
|
|
489
|
+
if (authMode)
|
|
490
|
+
output.authMode = authMode;
|
|
491
|
+
if (cliVersion)
|
|
492
|
+
output.cli = { version: cliVersion };
|
|
493
|
+
if (usage)
|
|
494
|
+
output.usage = redactCodaliGatewayLiveValue(usage);
|
|
495
|
+
return output;
|
|
496
|
+
};
|
|
497
|
+
const outputPreview = (text) => {
|
|
498
|
+
const redacted = String(redactCodaliGatewayLiveValue(text));
|
|
499
|
+
return redacted.length > 320 ? `${redacted.slice(0, 317)}...` : redacted;
|
|
500
|
+
};
|
|
501
|
+
const classifyAgentRunCommandFailure = (result) => {
|
|
502
|
+
const failureText = `${result.stderr}\n${result.stdout}`;
|
|
503
|
+
const normalized = failureText.toLowerCase();
|
|
504
|
+
const knownCatalogMismatch = normalized.includes("not a valid model id") ||
|
|
505
|
+
normalized.includes("invalid model id") ||
|
|
506
|
+
normalized.includes("model_not_found") ||
|
|
507
|
+
normalized.includes("model not found");
|
|
508
|
+
const status = knownCatalogMismatch
|
|
509
|
+
? "degraded"
|
|
510
|
+
: "failed";
|
|
511
|
+
return {
|
|
512
|
+
status,
|
|
513
|
+
warnings: knownCatalogMismatch
|
|
514
|
+
? ["agent_run_model_catalog_mismatch"]
|
|
515
|
+
: [],
|
|
516
|
+
errors: [
|
|
517
|
+
`agent_run_exit_${result.exitCode}`,
|
|
518
|
+
outputPreview(result.stderr || result.stdout),
|
|
519
|
+
],
|
|
520
|
+
metadata: {
|
|
521
|
+
runner: "mcoda_agent_run",
|
|
522
|
+
exitCode: result.exitCode,
|
|
523
|
+
failureClass: knownCatalogMismatch
|
|
524
|
+
? "agent_run_model_catalog_mismatch"
|
|
525
|
+
: "agent_run_command_failed",
|
|
526
|
+
},
|
|
527
|
+
};
|
|
528
|
+
};
|
|
529
|
+
export const createMcodaAgentRunScenarioRunner = () => async (input) => {
|
|
530
|
+
const started = Date.now();
|
|
531
|
+
const assignment = input.assignment;
|
|
532
|
+
if (!assignment) {
|
|
533
|
+
return {
|
|
534
|
+
id: input.scenario.id,
|
|
535
|
+
label: input.scenario.label,
|
|
536
|
+
status: "skipped",
|
|
537
|
+
role: input.scenario.role,
|
|
538
|
+
latencyMs: 0,
|
|
539
|
+
warnings: [],
|
|
540
|
+
errors: [`missing_role:${input.scenario.role}`],
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
const args = ["agent-run", assignment.candidate.slug, "--json", "--stdin"];
|
|
544
|
+
if (input.forceAgentRun)
|
|
545
|
+
args.push("--force");
|
|
546
|
+
const prompt = promptForScenario(input.scenario, assignment);
|
|
547
|
+
try {
|
|
548
|
+
const result = await input.commandRunner(input.command, args, {
|
|
549
|
+
input: prompt,
|
|
550
|
+
timeoutMs: input.timeoutMs,
|
|
551
|
+
});
|
|
552
|
+
if (result.exitCode !== 0) {
|
|
553
|
+
const failure = classifyAgentRunCommandFailure(result);
|
|
554
|
+
return {
|
|
555
|
+
id: input.scenario.id,
|
|
556
|
+
label: input.scenario.label,
|
|
557
|
+
status: failure.status,
|
|
558
|
+
role: input.scenario.role,
|
|
559
|
+
agentSlug: assignment.candidate.slug,
|
|
560
|
+
tier: assignment.candidate.tier,
|
|
561
|
+
model: assignment.candidate.model,
|
|
562
|
+
adapter: assignment.candidate.adapter,
|
|
563
|
+
latencyMs: result.latencyMs,
|
|
564
|
+
warnings: failure.warnings,
|
|
565
|
+
errors: failure.errors,
|
|
566
|
+
metadata: failure.metadata,
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
const parsed = parseAgentRunOutput(result.stdout);
|
|
570
|
+
const json = parseJsonFromText(parsed.output);
|
|
571
|
+
const jsonValid = input.scenario.expectsJson ? json !== undefined : undefined;
|
|
572
|
+
const artifact = input.scenario.expectsArtifact ? normalizeArtifact(json) : undefined;
|
|
573
|
+
const calledTools = extractCalledTools(json);
|
|
574
|
+
const warnings = input.scenario.requiresGatewayToolTelemetry
|
|
575
|
+
? ["gateway_tool_telemetry_unavailable_with_agent_run"]
|
|
576
|
+
: [];
|
|
577
|
+
const errors = [];
|
|
578
|
+
if (input.scenario.expectsJson && !jsonValid) {
|
|
579
|
+
errors.push("invalid_json_output");
|
|
580
|
+
}
|
|
581
|
+
if (input.scenario.expectsArtifact && !artifact) {
|
|
582
|
+
warnings.push("image_artifact_reference_missing");
|
|
583
|
+
}
|
|
584
|
+
const status = errors.length > 0
|
|
585
|
+
? "failed"
|
|
586
|
+
: input.scenario.requiresGatewayToolTelemetry || (input.scenario.expectsArtifact && !artifact)
|
|
587
|
+
? "degraded"
|
|
588
|
+
: "passed";
|
|
589
|
+
return {
|
|
590
|
+
id: input.scenario.id,
|
|
591
|
+
label: input.scenario.label,
|
|
592
|
+
status,
|
|
593
|
+
role: input.scenario.role,
|
|
594
|
+
agentSlug: assignment.candidate.slug,
|
|
595
|
+
tier: assignment.candidate.tier,
|
|
596
|
+
model: parsed.model ?? assignment.candidate.model,
|
|
597
|
+
adapter: parsed.adapter ?? assignment.candidate.adapter,
|
|
598
|
+
latencyMs: result.latencyMs || Date.now() - started,
|
|
599
|
+
jsonValid,
|
|
600
|
+
toolCallCount: extractToolCallCount(json),
|
|
601
|
+
calledTools,
|
|
602
|
+
finalAnswerStatus: input.scenario.id === "final_answer_large_model" && parsed.output.trim()
|
|
603
|
+
? "succeeded"
|
|
604
|
+
: undefined,
|
|
605
|
+
finalModelTier: input.scenario.id === "final_answer_large_model"
|
|
606
|
+
? assignment.candidate.tier
|
|
607
|
+
: undefined,
|
|
608
|
+
finalModelAgentSlug: input.scenario.id === "final_answer_large_model"
|
|
609
|
+
? assignment.candidate.slug
|
|
610
|
+
: undefined,
|
|
611
|
+
artifact,
|
|
612
|
+
outputPreview: outputPreview(parsed.output),
|
|
613
|
+
warnings,
|
|
614
|
+
errors,
|
|
615
|
+
metadata: {
|
|
616
|
+
runner: "mcoda_agent_run",
|
|
617
|
+
responseMetadata: summarizeAgentRunMetadata(parsed.metadata),
|
|
618
|
+
},
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
catch (error) {
|
|
622
|
+
return {
|
|
623
|
+
id: input.scenario.id,
|
|
624
|
+
label: input.scenario.label,
|
|
625
|
+
status: "failed",
|
|
626
|
+
role: input.scenario.role,
|
|
627
|
+
agentSlug: assignment.candidate.slug,
|
|
628
|
+
tier: assignment.candidate.tier,
|
|
629
|
+
model: assignment.candidate.model,
|
|
630
|
+
adapter: assignment.candidate.adapter,
|
|
631
|
+
latencyMs: Date.now() - started,
|
|
632
|
+
warnings: [],
|
|
633
|
+
errors: [error instanceof Error ? error.message : String(error)],
|
|
634
|
+
metadata: { runner: "mcoda_agent_run" },
|
|
635
|
+
};
|
|
636
|
+
}
|
|
637
|
+
};
|
|
638
|
+
const discoverInventory = async (options, commandRunner) => {
|
|
639
|
+
if (options.inventory) {
|
|
640
|
+
return {
|
|
641
|
+
discovery: {
|
|
642
|
+
source: "provided",
|
|
643
|
+
status: "succeeded",
|
|
644
|
+
latencyMs: 0,
|
|
645
|
+
inventoryCount: options.inventory.length,
|
|
646
|
+
errors: [],
|
|
647
|
+
},
|
|
648
|
+
inventory: options.inventory,
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
const command = options.inventoryCommand?.command ?? options.command ?? DEFAULT_MCODA_COMMAND;
|
|
652
|
+
const args = options.inventoryCommand?.args ?? ["agent", "list", "--json", "--refresh-health"];
|
|
653
|
+
const result = await commandRunner(command, args, {
|
|
654
|
+
timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
655
|
+
maxBuffer: options.maxBuffer ?? DEFAULT_MAX_BUFFER,
|
|
656
|
+
});
|
|
657
|
+
if (result.exitCode !== 0) {
|
|
658
|
+
return {
|
|
659
|
+
discovery: {
|
|
660
|
+
source: "command",
|
|
661
|
+
command,
|
|
662
|
+
args,
|
|
663
|
+
status: "failed",
|
|
664
|
+
latencyMs: result.latencyMs,
|
|
665
|
+
inventoryCount: 0,
|
|
666
|
+
errors: [`inventory_command_exit_${result.exitCode}`, outputPreview(result.stderr)],
|
|
667
|
+
},
|
|
668
|
+
inventory: [],
|
|
669
|
+
};
|
|
670
|
+
}
|
|
671
|
+
try {
|
|
672
|
+
const inventory = parseCodaliGatewayLiveInventory(result.stdout);
|
|
673
|
+
return {
|
|
674
|
+
discovery: {
|
|
675
|
+
source: "command",
|
|
676
|
+
command,
|
|
677
|
+
args,
|
|
678
|
+
status: "succeeded",
|
|
679
|
+
latencyMs: result.latencyMs,
|
|
680
|
+
inventoryCount: inventory.length,
|
|
681
|
+
errors: [],
|
|
682
|
+
},
|
|
683
|
+
inventory,
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
catch (error) {
|
|
687
|
+
return {
|
|
688
|
+
discovery: {
|
|
689
|
+
source: "command",
|
|
690
|
+
command,
|
|
691
|
+
args,
|
|
692
|
+
status: "failed",
|
|
693
|
+
latencyMs: result.latencyMs,
|
|
694
|
+
inventoryCount: 0,
|
|
695
|
+
errors: [error instanceof Error ? error.message : String(error)],
|
|
696
|
+
},
|
|
697
|
+
inventory: [],
|
|
698
|
+
};
|
|
699
|
+
}
|
|
700
|
+
};
|
|
701
|
+
const summarizeHarness = (classification, scenarios) => {
|
|
702
|
+
const failed = scenarios.filter((scenario) => scenario.status === "failed").length;
|
|
703
|
+
const degraded = scenarios.filter((scenario) => scenario.status === "degraded").length;
|
|
704
|
+
const skipped = scenarios.filter((scenario) => scenario.status === "skipped").length;
|
|
705
|
+
const passed = scenarios.filter((scenario) => scenario.status === "passed").length;
|
|
706
|
+
const missingRoles = Object.values(classification.roles)
|
|
707
|
+
.filter((role) => role.status !== "assigned")
|
|
708
|
+
.map((role) => role.role);
|
|
709
|
+
const jsonValidAgents = unique(scenarios
|
|
710
|
+
.filter((scenario) => scenario.jsonValid === true && scenario.agentSlug)
|
|
711
|
+
.map((scenario) => scenario.agentSlug));
|
|
712
|
+
const largeFinalSynthesizerOk = scenarios.some((scenario) => scenario.id === "final_answer_large_model" &&
|
|
713
|
+
scenario.status === "passed" &&
|
|
714
|
+
scenario.finalModelTier === "large");
|
|
715
|
+
const imageArtifactOk = scenarios.some((scenario) => scenario.id === "image_generation" &&
|
|
716
|
+
scenario.status === "passed" &&
|
|
717
|
+
Boolean(scenario.artifact));
|
|
718
|
+
const status = failed > 0
|
|
719
|
+
? "failed"
|
|
720
|
+
: degraded > 0 || skipped > 0 || missingRoles.length > 0
|
|
721
|
+
? "degraded"
|
|
722
|
+
: "passed";
|
|
723
|
+
return {
|
|
724
|
+
status,
|
|
725
|
+
passed,
|
|
726
|
+
failed,
|
|
727
|
+
degraded,
|
|
728
|
+
skipped,
|
|
729
|
+
jsonValidAgents,
|
|
730
|
+
largeFinalSynthesizerOk,
|
|
731
|
+
imageArtifactOk,
|
|
732
|
+
missingRoles,
|
|
733
|
+
};
|
|
734
|
+
};
|
|
735
|
+
const assignmentForScenario = (classification, scenario) => classification.assignments[scenario.role] ??
|
|
736
|
+
classification.resolution.assignments[ROLE_TO_RESOLVER_ROLE[scenario.role]];
|
|
737
|
+
const redactScenario = (scenario) => redactCodaliGatewayLiveValue(scenario);
|
|
738
|
+
export const runCodaliGatewayLiveHarness = async (options = {}) => {
|
|
739
|
+
const startedMs = Date.now();
|
|
740
|
+
const startedAt = isoNow();
|
|
741
|
+
const runId = options.runId ?? `codali-gateway-live-${randomUUID()}`;
|
|
742
|
+
const commandRunner = options.commandRunner ?? defaultCodaliGatewayLiveCommandRunner;
|
|
743
|
+
const { discovery, inventory } = await discoverInventory(options, commandRunner);
|
|
744
|
+
const classification = classifyCodaliGatewayLiveAgents({
|
|
745
|
+
inventory,
|
|
746
|
+
allowCloudFallback: options.allowCloudFallback,
|
|
747
|
+
allowImageWorker: options.allowImageWorker ?? true,
|
|
748
|
+
agentPolicy: options.agentPolicy,
|
|
749
|
+
});
|
|
750
|
+
const scenarioRunner = options.scenarioRunner ?? createMcodaAgentRunScenarioRunner();
|
|
751
|
+
const scenarios = options.scenarios?.length
|
|
752
|
+
? options.scenarios.map(scenarioById)
|
|
753
|
+
: CODALI_GATEWAY_LIVE_SCENARIOS;
|
|
754
|
+
const scenarioResults = [];
|
|
755
|
+
for (const scenario of scenarios) {
|
|
756
|
+
const result = await scenarioRunner({
|
|
757
|
+
runId,
|
|
758
|
+
scenario,
|
|
759
|
+
assignment: assignmentForScenario(classification, scenario),
|
|
760
|
+
classification,
|
|
761
|
+
commandRunner,
|
|
762
|
+
command: options.command ?? DEFAULT_MCODA_COMMAND,
|
|
763
|
+
timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
764
|
+
forceAgentRun: options.forceAgentRun === true,
|
|
765
|
+
});
|
|
766
|
+
scenarioResults.push(redactScenario(result));
|
|
767
|
+
}
|
|
768
|
+
const endedAt = isoNow();
|
|
769
|
+
const summary = summarizeHarness(classification, scenarioResults);
|
|
770
|
+
const warnings = unique([
|
|
771
|
+
...classification.warnings.map((warning) => warning.code),
|
|
772
|
+
...scenarioResults.flatMap((scenario) => scenario.warnings),
|
|
773
|
+
]);
|
|
774
|
+
const errors = unique([
|
|
775
|
+
...discovery.errors,
|
|
776
|
+
...classification.errors.map((error) => error.code),
|
|
777
|
+
...scenarioResults.flatMap((scenario) => scenario.errors),
|
|
778
|
+
]);
|
|
779
|
+
return {
|
|
780
|
+
schemaVersion: 1,
|
|
781
|
+
runId,
|
|
782
|
+
runtime: "codali_gateway_live_harness",
|
|
783
|
+
mode: "live",
|
|
784
|
+
startedAt,
|
|
785
|
+
endedAt,
|
|
786
|
+
durationMs: Date.now() - startedMs,
|
|
787
|
+
discovery,
|
|
788
|
+
classification: redactCodaliGatewayLiveValue(classification),
|
|
789
|
+
scenarios: scenarioResults,
|
|
790
|
+
summary,
|
|
791
|
+
warnings,
|
|
792
|
+
errors,
|
|
793
|
+
};
|
|
794
|
+
};
|
|
795
|
+
export const formatCodaliGatewayLiveHarnessTextReport = (result) => {
|
|
796
|
+
const roleLines = Object.values(result.classification.roles).map((role) => {
|
|
797
|
+
const agent = role.agentSlug
|
|
798
|
+
? `${role.agentSlug} (${role.tier ?? "unknown"})`
|
|
799
|
+
: `unavailable${role.errorCodes.length ? `: ${role.errorCodes.join(",")}` : ""}`;
|
|
800
|
+
return `Role ${role.role}: ${agent}`;
|
|
801
|
+
});
|
|
802
|
+
const scenarioLines = result.scenarios.map((scenario) => {
|
|
803
|
+
const agent = scenario.agentSlug ? ` via ${scenario.agentSlug}` : "";
|
|
804
|
+
const details = [
|
|
805
|
+
scenario.jsonValid === undefined ? undefined : `json=${scenario.jsonValid ? "valid" : "invalid"}`,
|
|
806
|
+
scenario.toolCallCount === undefined ? undefined : `tools=${scenario.toolCallCount}`,
|
|
807
|
+
scenario.artifact ? "artifact=yes" : undefined,
|
|
808
|
+
scenario.finalModelTier ? `final=${scenario.finalModelTier}` : undefined,
|
|
809
|
+
].filter(Boolean).join(", ");
|
|
810
|
+
return `Scenario ${scenario.id}: ${scenario.status}${agent}${details ? ` (${details})` : ""}`;
|
|
811
|
+
});
|
|
812
|
+
return [
|
|
813
|
+
`Codali gateway live smoke: ${result.summary.status}`,
|
|
814
|
+
`Run: ${result.runId}`,
|
|
815
|
+
`Inventory: ${result.discovery.inventoryCount} records (${result.discovery.status}, ${result.discovery.latencyMs}ms)`,
|
|
816
|
+
...roleLines,
|
|
817
|
+
...scenarioLines,
|
|
818
|
+
`JSON-capable agents: ${result.summary.jsonValidAgents.length ? result.summary.jsonValidAgents.join(", ") : "none"}`,
|
|
819
|
+
`Large final synthesizer: ${result.summary.largeFinalSynthesizerOk ? "ok" : "missing or not proven"}`,
|
|
820
|
+
`Image artifact: ${result.summary.imageArtifactOk ? "ok" : "missing or not proven"}`,
|
|
821
|
+
result.warnings.length ? `Warnings: ${result.warnings.join(", ")}` : "Warnings: none",
|
|
822
|
+
result.errors.length ? `Errors: ${result.errors.join(", ")}` : "Errors: none",
|
|
823
|
+
].join("\n");
|
|
824
|
+
};
|