agent-skill-evals 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1525 @@
1
+ import { c as parseRuntimeTestFields, n as FileSystem, r as NodeServicesLive, t as Environment } from "./internal-services-5-mRgNls.mjs";
2
+ import { basename, dirname, isAbsolute, join, relative, resolve } from "node:path";
3
+ import * as Clock from "effect/Clock";
4
+ import * as Either from "effect/Either";
5
+ import * as Effect from "effect/Effect";
6
+ import * as Layer from "effect/Layer";
7
+ import * as ParseResult from "effect/ParseResult";
8
+ import * as Schema from "effect/Schema";
9
+ import * as Context from "effect/Context";
10
+ import { spawn } from "node:child_process";
11
+ import "effect/Stream";
12
+ import { createHash } from "node:crypto";
13
+ //#region src/runtime-checks/_helpers.ts
14
+ function result(pass, reason, evidence) {
15
+ return {
16
+ pass,
17
+ score: pass ? 1 : 0,
18
+ reason,
19
+ evidence
20
+ };
21
+ }
22
+ function validationFailure(reason) {
23
+ return result(false, reason);
24
+ }
25
+ /**
26
+ * Map a "matched" boolean to a pass result based on mode. Used by effect
27
+ * types that don't self-encode polarity (file.exists, tool.called, ...).
28
+ *
29
+ * - should: pass = matched
30
+ * - should_not: pass = !matched
31
+ * - precondition: pass = matched (precondition asserts a current state)
32
+ */
33
+ function applyMode(matched, mode, reasonMatched, reasonUnmatched) {
34
+ switch (mode) {
35
+ case "should":
36
+ case "precondition": return result(matched, matched ? reasonMatched : reasonUnmatched);
37
+ case "should_not": return result(!matched, matched ? reasonMatched : reasonUnmatched);
38
+ }
39
+ }
40
+ //#endregion
41
+ //#region src/runtime-checks/schemas.ts
42
+ const NonEmptyString = Schema.String.pipe(Schema.filter((value) => value.trim().length > 0, { identifier: "NonEmptyString" }));
43
+ const OptionalString$1 = Schema.optional(Schema.String);
44
+ const PathArgsSchema = Schema.Struct({ path: NonEmptyString });
45
+ const FileContainsArgsSchema = Schema.Struct({
46
+ path: NonEmptyString,
47
+ text: Schema.String
48
+ });
49
+ const FileChangesOutsideScopeArgsSchema = Schema.Struct({ scope: Schema.Array(NonEmptyString).pipe(Schema.minItems(1)) });
50
+ const CodePatternArgsSchema = Schema.Struct({
51
+ glob: NonEmptyString,
52
+ pattern: NonEmptyString
53
+ });
54
+ const VerifierArgsSchema = Schema.Struct({
55
+ run: NonEmptyString,
56
+ args: Schema.optional(Schema.Array(Schema.String)),
57
+ timeoutMs: Schema.optional(Schema.Number)
58
+ });
59
+ const ToolCalledArgsSchema = Schema.Struct({
60
+ tool: NonEmptyString,
61
+ provider: OptionalString$1,
62
+ server: OptionalString$1,
63
+ args_match: Schema.optional(Schema.Unknown)
64
+ });
65
+ const ToolNotCalledArgsSchema = Schema.Struct({
66
+ tool: OptionalString$1,
67
+ provider: OptionalString$1,
68
+ server: OptionalString$1,
69
+ args_match: Schema.optional(Schema.Unknown)
70
+ });
71
+ const SkillLoadedArgsSchema = Schema.Struct({
72
+ should_include: Schema.optional(Schema.Array(NonEmptyString)),
73
+ should_exclude: Schema.optional(Schema.Array(NonEmptyString)),
74
+ delivery: Schema.optional(Schema.Literal("native", "mcp")),
75
+ provider: OptionalString$1,
76
+ server: OptionalString$1,
77
+ source: OptionalString$1
78
+ });
79
+ function decodeCheckArgs(schema, assertion, invalidReason) {
80
+ const decoded = Schema.decodeUnknownEither(schema, { errors: "all" })(assertion ?? {});
81
+ return Either.isRight(decoded) ? decoded.right : validationFailure(invalidReason);
82
+ }
83
+ function isValidationFailure(value) {
84
+ return !!value && typeof value === "object" && "pass" in value && value.pass === false && typeof value.reason === "string";
85
+ }
86
+ function decodeToolNotCalledArgs(assertion) {
87
+ return decodeCheckArgs(ToolNotCalledArgsSchema, assertion, "tool.not_called: assertion must include at least one selector");
88
+ }
89
+ //#endregion
90
+ //#region src/runtime-checks/verifier-succeeds.ts
91
+ const verifierSucceeds = {
92
+ type: "verifier.succeeds",
93
+ verify(ctx) {
94
+ return Effect.gen(function* () {
95
+ const a = decodeCheckArgs(VerifierArgsSchema, ctx.assertion, "verifier.succeeds: assertion.run must be a non-empty string");
96
+ if (isValidationFailure(a)) return a;
97
+ const r = yield* ctx.world.exec(a.run, a.args ?? [], { timeoutMs: a.timeoutMs ?? 6e4 });
98
+ return applyMode(r.exitCode === 0, ctx.mode, `verifier.succeeds: ${a.run} exited 0`, `verifier.succeeds: ${a.run} exited ${r.exitCode}: ${r.stderr.slice(0, 200)}`);
99
+ });
100
+ }
101
+ };
102
+ //#endregion
103
+ //#region src/runtime-checks/verifier-fails.ts
104
+ const verifierFails = {
105
+ type: "verifier.fails",
106
+ verify(ctx) {
107
+ return Effect.gen(function* () {
108
+ const a = decodeCheckArgs(VerifierArgsSchema, ctx.assertion, "verifier.fails: assertion.run must be a non-empty string");
109
+ if (isValidationFailure(a)) return a;
110
+ const r = yield* ctx.world.exec(a.run, a.args ?? [], { timeoutMs: a.timeoutMs ?? 6e4 });
111
+ return applyMode(r.exitCode !== 0, ctx.mode, `verifier.fails: ${a.run} exited ${r.exitCode} (failed as expected)`, `verifier.fails: ${a.run} unexpectedly exited 0`);
112
+ });
113
+ }
114
+ };
115
+ //#endregion
116
+ //#region src/runtime-checks/file-exists.ts
117
+ const fileExists = {
118
+ type: "file.exists",
119
+ verify(ctx) {
120
+ return Effect.gen(function* () {
121
+ const a = decodeCheckArgs(PathArgsSchema, ctx.assertion, "file.exists: assertion.path must be a non-empty string");
122
+ if (isValidationFailure(a)) return a;
123
+ return applyMode((yield* ctx.world.readFile(a.path)) !== null, ctx.mode, `file.exists: ${a.path} present`, `file.exists: ${a.path} not found`);
124
+ });
125
+ }
126
+ };
127
+ //#endregion
128
+ //#region src/runtime-checks/file-created.ts
129
+ const fileCreated = {
130
+ type: "file.created",
131
+ verify(ctx) {
132
+ const a = decodeCheckArgs(PathArgsSchema, ctx.assertion, "file.created: assertion.path must be a non-empty string");
133
+ if (isValidationFailure(a)) return Effect.succeed(a);
134
+ const created = ctx.evidence.filesWritten().some((ev) => ev.path === a.path && ev.op === "create");
135
+ return Effect.succeed(applyMode(created, ctx.mode, `file.created: ${a.path} created`, `file.created: ${a.path} was not created`));
136
+ }
137
+ };
138
+ //#endregion
139
+ //#region src/runtime-checks/file-not-modified.ts
140
+ const fileNotModified = {
141
+ type: "file.not_modified",
142
+ verify(ctx) {
143
+ const a = decodeCheckArgs(PathArgsSchema, ctx.assertion, "file.not_modified: assertion.path must be a non-empty string");
144
+ if (isValidationFailure(a)) return Effect.succeed(a);
145
+ const matched = !ctx.evidence.filesWritten().some((f) => f.path === a.path);
146
+ return Effect.succeed(applyMode(matched, ctx.mode, `file.not_modified: ${a.path} unchanged`, `file.not_modified: ${a.path} was modified`));
147
+ }
148
+ };
149
+ //#endregion
150
+ //#region src/runtime-checks/file-contains.ts
151
+ const fileContains = {
152
+ type: "file.contains",
153
+ verify(ctx) {
154
+ return Effect.gen(function* () {
155
+ const a = decodeCheckArgs(FileContainsArgsSchema, ctx.assertion, "file.contains: assertion.path must be a non-empty string and assertion.text must be a string");
156
+ if (isValidationFailure(a)) return a;
157
+ const content = yield* ctx.world.readFile(a.path);
158
+ return applyMode(content !== null && content.includes(a.text), ctx.mode, `file.contains: ${a.path} contains "${a.text.slice(0, 40)}"`, content === null ? `file.contains: ${a.path} not found` : `file.contains: ${a.path} missing "${a.text.slice(0, 40)}"`);
159
+ });
160
+ }
161
+ };
162
+ //#endregion
163
+ //#region src/runtime-checks/code-pattern-exists.ts
164
+ const codePatternExists = {
165
+ type: "code.pattern_exists",
166
+ verify(ctx) {
167
+ return Effect.gen(function* () {
168
+ const a = decodeCheckArgs(CodePatternArgsSchema, ctx.assertion, "code.pattern_exists: assertion.glob and assertion.pattern must be non-empty strings");
169
+ if (isValidationFailure(a)) return a;
170
+ let re;
171
+ try {
172
+ re = new RegExp(a.pattern);
173
+ } catch (err) {
174
+ return {
175
+ pass: false,
176
+ score: 0,
177
+ reason: `code.pattern_exists: invalid assertion.pattern regex: ${err instanceof Error ? err.message : String(err)}`
178
+ };
179
+ }
180
+ const files = yield* ctx.world.listFiles(a.glob);
181
+ const matchedFiles = [];
182
+ for (const f of files) {
183
+ const content = yield* ctx.world.readFile(f);
184
+ if (content && re.test(content)) matchedFiles.push(f);
185
+ }
186
+ return applyMode(matchedFiles.length > 0, ctx.mode, `code.pattern_exists: /${a.pattern}/ found in ${matchedFiles.slice(0, 3).join(", ")}`, `code.pattern_exists: /${a.pattern}/ not found in any ${a.glob}`);
187
+ });
188
+ }
189
+ };
190
+ //#endregion
191
+ //#region src/runtime-checks/code-no-pattern.ts
192
+ /**
193
+ * Polarity-inverted alias of code.pattern_exists. "matched" means
194
+ * "no pattern matches anywhere", which is the natural reading of the name.
195
+ */
196
+ const codeNoPattern = {
197
+ type: "code.no_pattern",
198
+ verify(ctx) {
199
+ return Effect.gen(function* () {
200
+ const a = decodeCheckArgs(CodePatternArgsSchema, ctx.assertion, "code.no_pattern: assertion.glob and assertion.pattern must be non-empty strings");
201
+ if (isValidationFailure(a)) return a;
202
+ try {
203
+ new RegExp(a.pattern);
204
+ } catch (err) {
205
+ return {
206
+ pass: false,
207
+ score: 0,
208
+ reason: `code.no_pattern: invalid assertion.pattern regex: ${err instanceof Error ? err.message : String(err)}`
209
+ };
210
+ }
211
+ const inner = yield* codePatternExists.verify({
212
+ ...ctx,
213
+ mode: "should"
214
+ });
215
+ return applyMode(!inner.pass, ctx.mode, `code.no_pattern: pattern absent`, `code.no_pattern: pattern present (${inner.reason})`);
216
+ });
217
+ }
218
+ };
219
+ //#endregion
220
+ //#region src/runtime-checks/file-changes-outside-scope.ts
221
+ const fileChangesOutsideScope = {
222
+ type: "file.changes_outside_scope",
223
+ verify(ctx) {
224
+ const a = decodeCheckArgs(FileChangesOutsideScopeArgsSchema, ctx.assertion, "file.changes_outside_scope: assertion.scope must contain at least one non-empty string");
225
+ if (isValidationFailure(a)) return Effect.succeed(a);
226
+ const unrelated = ctx.evidence.filesWritten().filter((f) => !a.scope.some((s) => f.path.startsWith(s)));
227
+ const matched = unrelated.length > 0;
228
+ return Effect.succeed(applyMode(matched, ctx.mode, `file.changes_outside_scope: ${unrelated.length} file(s) outside scope: ${unrelated.slice(0, 3).map((f) => f.path).join(", ")}`, `file.changes_outside_scope: no changes outside scope`));
229
+ }
230
+ };
231
+ //#endregion
232
+ //#region src/runtime-checks/_match.ts
233
+ function matchesSubset(actual, expected) {
234
+ if (expected === actual) return true;
235
+ if (Array.isArray(expected)) return Array.isArray(actual) && expected.length === actual.length && expected.every((item, i) => matchesSubset(actual[i], item));
236
+ if (expected && typeof expected === "object") {
237
+ if (!actual || typeof actual !== "object" || Array.isArray(actual)) return false;
238
+ const actualRecord = actual;
239
+ return Object.entries(expected).every(([key, value]) => matchesSubset(actualRecord[key], value));
240
+ }
241
+ return false;
242
+ }
243
+ //#endregion
244
+ //#region src/runtime-checks/_call-match.ts
245
+ function matchesRecordedCall(call, args) {
246
+ return (!args.tool || call.tool === args.tool) && (!args.provider || call.provider === args.provider) && (!args.server || call.server === args.server) && (args.args_match === void 0 || matchesSubset(call.args, args.args_match));
247
+ }
248
+ //#endregion
249
+ //#region src/runtime-checks/tool-called.ts
250
+ const toolCalled = {
251
+ type: "tool.called",
252
+ verify({ assertion, evidence, mode }) {
253
+ const a = decodeCheckArgs(ToolCalledArgsSchema, assertion, "tool.called: assertion.tool must be a non-empty string");
254
+ if (isValidationFailure(a)) return Effect.succeed(a);
255
+ const calls = evidence.toolCalls();
256
+ if (!calls.length) return Effect.succeed({
257
+ pass: false,
258
+ score: 0,
259
+ reason: "tool.called: no built-in tool evidence found"
260
+ });
261
+ const found = calls.some((c) => matchesRecordedCall(c, a));
262
+ return Effect.succeed(applyMode(found, mode, "tool.called: matched built-in tool call", "tool.called: matching call not found"));
263
+ }
264
+ };
265
+ //#endregion
266
+ //#region src/runtime-checks/tool-not-called.ts
267
+ const toolNotCalled = {
268
+ type: "tool.not_called",
269
+ verify({ assertion, evidence }) {
270
+ const a = decodeToolNotCalledArgs(assertion);
271
+ if (isValidationFailure(a)) return Effect.succeed(a);
272
+ const found = evidence.toolCalls().some((c) => matchesRecordedCall(c, a));
273
+ return Effect.succeed(found ? {
274
+ pass: false,
275
+ score: 0,
276
+ reason: "tool.not_called: forbidden built-in tool call observed"
277
+ } : {
278
+ pass: true,
279
+ score: 1,
280
+ reason: "tool.not_called: no matching built-in tool calls observed"
281
+ });
282
+ }
283
+ };
284
+ //#endregion
285
+ //#region src/runtime-checks/skill-loaded.ts
286
+ const skillLoaded = {
287
+ type: "skill.loaded",
288
+ verify({ assertion, evidence, mode }) {
289
+ const a = decodeCheckArgs(SkillLoadedArgsSchema, assertion, "skill.loaded: assertion must be an object");
290
+ if (isValidationFailure(a)) return Effect.succeed(a);
291
+ const shouldInclude = a.should_include ?? [];
292
+ const shouldExclude = a.should_exclude ?? [];
293
+ if (shouldInclude.length === 0 && shouldExclude.length === 0) return Effect.succeed({
294
+ pass: false,
295
+ score: 0,
296
+ reason: "skill.loaded: declare should_include or should_exclude"
297
+ });
298
+ const events = evidence.skillsLoaded().filter((event) => matchesLoad(event, a));
299
+ const loaded = new Set(events.map((event) => event.skill));
300
+ const missing = shouldInclude.filter((skill) => !loaded.has(skill));
301
+ const forbidden = shouldExclude.filter((skill) => loaded.has(skill));
302
+ const matched = missing.length === 0 && forbidden.length === 0;
303
+ const unmatchedReason = skillLoadedMismatchReason(missing, forbidden);
304
+ return Effect.succeed(applyMode(matched, mode, "skill.loaded: expected skill context observed", unmatchedReason));
305
+ }
306
+ };
307
+ function skillLoadedMismatchReason(missing, forbidden) {
308
+ return `skill.loaded: ${[missing.length ? `missing loaded skill(s): ${missing.join(", ")}` : "", forbidden.length ? `forbidden loaded skill(s): ${forbidden.join(", ")}` : ""].filter(Boolean).join("; ")}`;
309
+ }
310
+ function matchesLoad(event, args) {
311
+ return (!args.delivery || event.delivery === args.delivery) && (!args.provider || event.provider === args.provider) && (!args.server || event.server === args.server) && (!args.source || event.source === args.source);
312
+ }
313
+ //#endregion
314
+ //#region src/runtime-checks/index.ts
315
+ const corePlugins = [
316
+ verifierSucceeds,
317
+ verifierFails,
318
+ fileExists,
319
+ fileCreated,
320
+ fileContains,
321
+ fileNotModified,
322
+ fileChangesOutsideScope,
323
+ codePatternExists,
324
+ codeNoPattern,
325
+ toolCalled,
326
+ toolNotCalled,
327
+ skillLoaded
328
+ ];
329
+ function buildRegistry(extra = []) {
330
+ const reg = /* @__PURE__ */ new Map();
331
+ for (const p of [...corePlugins, ...extra]) reg.set(p.type, p);
332
+ return reg;
333
+ }
334
+ buildRegistry();
335
+ //#endregion
336
+ //#region src/runtime-checks/catalog.ts
337
+ var RuntimeCheckCatalog = class extends Context.Tag("agent-skill-evals/promptfoo/RuntimeCheckCatalog")() {};
338
+ function runtimeCheckCatalogFromChecks(checks = corePlugins) {
339
+ const registry = /* @__PURE__ */ new Map();
340
+ for (const check of checks) registry.set(check.type, check);
341
+ return {
342
+ all: () => [...registry.values()],
343
+ get: (type) => registry.get(type),
344
+ knownTypes: () => new Set(registry.keys())
345
+ };
346
+ }
347
+ function makeRuntimeCheckCatalogLayer(extraChecks = []) {
348
+ return Layer.succeed(RuntimeCheckCatalog, runtimeCheckCatalogFromChecks([...corePlugins, ...extraChecks]));
349
+ }
350
+ const RuntimeCheckCatalogLive = makeRuntimeCheckCatalogLayer();
351
+ function getRuntimeCheck(type) {
352
+ return Effect.map(RuntimeCheckCatalog, (catalog) => catalog.get(type));
353
+ }
354
+ Effect.map(RuntimeCheckCatalog, (catalog) => catalog.knownTypes());
355
+ //#endregion
356
+ //#region src/evidence-types.ts
357
+ const EVIDENCE_SCHEMA_VERSION = "agent-skill-evals.evidence.v1";
358
+ //#endregion
359
+ //#region src/evidence-schema.ts
360
+ const OptionalString = Schema.optional(Schema.String);
361
+ const OptionalNumber = Schema.optional(Schema.Number);
362
+ const StringArray = Schema.mutable(Schema.Array(Schema.String));
363
+ const CommandEventSchema = Schema.Struct({
364
+ command: Schema.String,
365
+ args: Schema.optionalWith(StringArray, { default: () => [] }),
366
+ exitCode: Schema.Number,
367
+ signal: OptionalString,
368
+ stdout: OptionalString,
369
+ stderr: OptionalString,
370
+ startedAt: Schema.Number,
371
+ durationMs: Schema.Number
372
+ });
373
+ const FileEventSchema = Schema.Struct({
374
+ path: Schema.String,
375
+ op: Schema.Literal("create", "modify", "delete")
376
+ });
377
+ const ToolCallEventSchema = Schema.Struct({
378
+ tool: Schema.String,
379
+ provider: OptionalString,
380
+ server: OptionalString,
381
+ args: Schema.optional(Schema.Unknown),
382
+ result: Schema.optional(Schema.Unknown),
383
+ startedAt: Schema.Number,
384
+ durationMs: Schema.Number
385
+ });
386
+ const SkillLoadEventSchema = Schema.Struct({
387
+ skill: Schema.String,
388
+ delivery: Schema.Literal("native", "mcp"),
389
+ provider: OptionalString,
390
+ server: OptionalString,
391
+ source: OptionalString,
392
+ startedAt: Schema.Number
393
+ });
394
+ const UsageSchema = Schema.Struct({
395
+ inputTokens: OptionalNumber,
396
+ outputTokens: OptionalNumber,
397
+ totalTokens: OptionalNumber,
398
+ cacheReadTokens: OptionalNumber,
399
+ cacheWriteTokens: OptionalNumber
400
+ });
401
+ const RunSummarySchema = Schema.Struct({
402
+ runDir: Schema.String,
403
+ worldPath: Schema.String,
404
+ fixture: Schema.String,
405
+ durationMs: OptionalNumber
406
+ });
407
+ const EvidenceSnapshotSchema = Schema.Struct({
408
+ schemaVersion: Schema.Literal(EVIDENCE_SCHEMA_VERSION),
409
+ output: Schema.optionalWith(Schema.String, { default: () => "" }),
410
+ run: RunSummarySchema,
411
+ commands: Schema.optionalWith(Schema.mutable(Schema.Array(CommandEventSchema)), { default: () => [] }),
412
+ filesWritten: Schema.optionalWith(Schema.mutable(Schema.Array(FileEventSchema)), { default: () => [] }),
413
+ toolCalls: Schema.optionalWith(Schema.mutable(Schema.Array(ToolCallEventSchema)), { default: () => [] }),
414
+ skillsLoaded: Schema.optionalWith(Schema.mutable(Schema.Array(SkillLoadEventSchema)), { default: () => [] }),
415
+ usage: Schema.optionalWith(UsageSchema, { default: () => ({}) }),
416
+ extensions: Schema.optional(Schema.Record({
417
+ key: Schema.String,
418
+ value: Schema.Unknown
419
+ }))
420
+ });
421
+ const decodeEvidenceSnapshot = Schema.decodeUnknownEither(EvidenceSnapshotSchema, { errors: "all" });
422
+ function decodeEvidenceSnapshotEither(input) {
423
+ const decoded = decodeEvidenceSnapshot(input);
424
+ if (Either.isRight(decoded)) return Either.right(decoded.right);
425
+ return Either.left(new Error(ParseResult.TreeFormatter.formatErrorSync(decoded.left)));
426
+ }
427
+ function parseEvidenceSnapshot(input) {
428
+ const decoded = decodeEvidenceSnapshotEither(input);
429
+ if (Either.isRight(decoded)) return decoded.right;
430
+ throw decoded.left;
431
+ }
432
+ //#endregion
433
+ //#region src/agent/evidence.ts
434
+ const DEFAULT_SKILL_EVIDENCE_CONFIG = {
435
+ mcpResource: {
436
+ uriArgPaths: ["uri"],
437
+ uriPatterns: ["^skill://(?<skill>[^/]+)/SKILL\\.md$"]
438
+ },
439
+ mcpTool: { toolPatterns: ["^load_(?<skill>[A-Za-z0-9_-]+)_skill$"] }
440
+ };
441
+ var EvidenceCollector = class EvidenceCollector {
442
+ skillEvidenceConfig;
443
+ snapshot = {
444
+ schemaVersion: EVIDENCE_SCHEMA_VERSION,
445
+ output: "",
446
+ run: {
447
+ runDir: "",
448
+ worldPath: "",
449
+ fixture: ""
450
+ },
451
+ commands: [],
452
+ filesWritten: [],
453
+ toolCalls: [],
454
+ skillsLoaded: [],
455
+ usage: {}
456
+ };
457
+ constructor(skillEvidenceConfig = {}) {
458
+ this.skillEvidenceConfig = mergeSkillEvidenceConfig(skillEvidenceConfig);
459
+ }
460
+ addCommand(e) {
461
+ this.snapshot.commands.push(e);
462
+ }
463
+ addFileWrite(e) {
464
+ this.snapshot.filesWritten.push(e);
465
+ }
466
+ addToolCall(e) {
467
+ this.snapshot.toolCalls.push(e);
468
+ const skillLoad = skillLoadFromToolCall(e, this.skillEvidenceConfig);
469
+ if (skillLoad) this.addSkillLoad(skillLoad);
470
+ }
471
+ addSkillLoad(e) {
472
+ if (this.snapshot.skillsLoaded.some((existing) => existing.skill === e.skill && existing.delivery === e.delivery && existing.provider === e.provider && existing.server === e.server)) return;
473
+ this.snapshot.skillsLoaded.push(e);
474
+ }
475
+ setUsage(u) {
476
+ this.snapshot.usage = u;
477
+ }
478
+ addUsage(u) {
479
+ this.snapshot.usage = mergeUsage(this.snapshot.usage, u);
480
+ }
481
+ setOutput(output) {
482
+ this.snapshot.output = output;
483
+ }
484
+ setRun(run) {
485
+ this.snapshot.run = run;
486
+ }
487
+ toSnapshot() {
488
+ return parseEvidenceSnapshot({
489
+ schemaVersion: this.snapshot.schemaVersion,
490
+ output: this.snapshot.output,
491
+ run: { ...this.snapshot.run },
492
+ commands: [...this.snapshot.commands],
493
+ filesWritten: [...this.snapshot.filesWritten],
494
+ toolCalls: [...this.snapshot.toolCalls],
495
+ skillsLoaded: [...this.snapshot.skillsLoaded],
496
+ usage: { ...this.snapshot.usage },
497
+ extensions: this.snapshot.extensions ? { ...this.snapshot.extensions } : void 0
498
+ });
499
+ }
500
+ async writeTo(runDir) {
501
+ return Effect.runPromise(writeEvidenceToEffect(this, runDir).pipe(Effect.provide(NodeServicesLive)));
502
+ }
503
+ static fromSnapshot(snapshot) {
504
+ const collector = new EvidenceCollector();
505
+ collector.snapshot = parseEvidenceSnapshot(snapshot);
506
+ return collector;
507
+ }
508
+ };
509
+ function mergeSkillEvidenceConfig(config) {
510
+ return {
511
+ mcpResource: {
512
+ uriArgPaths: config.mcpResource?.uriArgPaths ?? DEFAULT_SKILL_EVIDENCE_CONFIG.mcpResource.uriArgPaths,
513
+ uriPatterns: config.mcpResource?.uriPatterns ?? DEFAULT_SKILL_EVIDENCE_CONFIG.mcpResource.uriPatterns
514
+ },
515
+ mcpTool: { toolPatterns: config.mcpTool?.toolPatterns ?? DEFAULT_SKILL_EVIDENCE_CONFIG.mcpTool.toolPatterns },
516
+ ...config.nativeArgs ? { nativeArgs: config.nativeArgs } : {}
517
+ };
518
+ }
519
+ function skillLoadFromToolCall(event, config) {
520
+ const uri = skillUriFromArgs(event.args, config.mcpResource?.uriArgPaths ?? []);
521
+ const skill = uri ? skillFromPattern(uri, config.mcpResource?.uriPatterns ?? []) : skillFromTool(event.tool, config.mcpTool?.toolPatterns ?? []);
522
+ if (!skill) return void 0;
523
+ const server = event.server ?? serverFromArgs(event.args);
524
+ return {
525
+ skill,
526
+ delivery: "mcp",
527
+ ...event.provider ? { provider: event.provider } : {},
528
+ ...server ? { server } : {},
529
+ source: event.tool,
530
+ startedAt: event.startedAt
531
+ };
532
+ }
533
+ function skillUriFromArgs(args, paths) {
534
+ for (const path of paths) {
535
+ const uri = valueAtPath(args, path);
536
+ if (typeof uri === "string") return uri;
537
+ }
538
+ }
539
+ function serverFromArgs(args) {
540
+ if (!args || typeof args !== "object" || Array.isArray(args)) return void 0;
541
+ const server = args.server;
542
+ return typeof server === "string" ? server : void 0;
543
+ }
544
+ function skillFromPattern(value, patterns) {
545
+ for (const pattern of patterns) {
546
+ let regex;
547
+ try {
548
+ regex = new RegExp(pattern, "i");
549
+ } catch {
550
+ continue;
551
+ }
552
+ const match = regex.exec(value);
553
+ const skill = match?.groups?.skill ?? match?.[1];
554
+ if (skill) return skill;
555
+ }
556
+ }
557
+ function skillFromTool(tool, patterns) {
558
+ return skillFromPattern(tool, patterns)?.replace(/_/g, "-");
559
+ }
560
+ function valueAtPath(value, path) {
561
+ let current = value;
562
+ for (const segment of path.split(".")) {
563
+ if (!current || typeof current !== "object" || Array.isArray(current)) return void 0;
564
+ current = current[segment];
565
+ }
566
+ return current;
567
+ }
568
+ function addOptionalNumbers(a, b) {
569
+ if (a === void 0) return b;
570
+ if (b === void 0) return a;
571
+ return a + b;
572
+ }
573
+ function mergeUsage(a, b) {
574
+ return {
575
+ inputTokens: addOptionalNumbers(a.inputTokens, b.inputTokens),
576
+ outputTokens: addOptionalNumbers(a.outputTokens, b.outputTokens),
577
+ totalTokens: addOptionalNumbers(a.totalTokens, b.totalTokens),
578
+ cacheReadTokens: addOptionalNumbers(a.cacheReadTokens, b.cacheReadTokens),
579
+ cacheWriteTokens: addOptionalNumbers(a.cacheWriteTokens, b.cacheWriteTokens)
580
+ };
581
+ }
582
+ function writeEvidenceToEffect(collector, runDir) {
583
+ return Effect.gen(function* () {
584
+ const fs = yield* FileSystem;
585
+ const path = join(runDir, "evidence.json");
586
+ yield* fs.writeText(path, JSON.stringify(collector.toSnapshot(), null, 2));
587
+ return path;
588
+ });
589
+ }
590
+ function evidenceFromSnapshot(s) {
591
+ return {
592
+ commands: () => s.commands,
593
+ filesWritten: () => s.filesWritten,
594
+ toolCalls: () => s.toolCalls,
595
+ skillsLoaded: () => s.skillsLoaded,
596
+ usage: () => s.usage
597
+ };
598
+ }
599
+ //#endregion
600
+ //#region src/agent/command-runner.ts
601
+ const DEFAULT_OUTPUT_LIMIT = 4096;
602
+ const KILL_GRACE_MS = 1e3;
603
+ const EXIT_STDIO_FLUSH_MS = 50;
604
+ var ProcessRunner = class extends Context.Tag("agent-skill-evals/promptfoo/ProcessRunner")() {};
605
+ function appendLimited(current, chunk, limit) {
606
+ if (limit <= 0) return "";
607
+ const next = current + chunk;
608
+ return next.length <= limit ? next : next.slice(next.length - limit);
609
+ }
610
+ function killProcessGroup(child, signal) {
611
+ if (!child.pid) return;
612
+ try {
613
+ if (process.platform === "win32") child.kill(signal);
614
+ else process.kill(-child.pid, signal);
615
+ } catch {
616
+ try {
617
+ child.kill(signal);
618
+ } catch {}
619
+ }
620
+ }
621
+ function runCommandEffect(command, args = [], options) {
622
+ return Effect.flatMap(ProcessRunner, (runner) => runner.run(command, args, options));
623
+ }
624
+ const ProcessRunnerLive = Layer.succeed(ProcessRunner, { run: nodeRunCommandEffect });
625
+ function nodeRunCommandEffect(command, args, options) {
626
+ const stdoutLimit = options.stdoutLimit ?? DEFAULT_OUTPUT_LIMIT;
627
+ const stderrLimit = options.stderrLimit ?? DEFAULT_OUTPUT_LIMIT;
628
+ return Effect.gen(function* () {
629
+ const startedAt = yield* Clock.currentTimeMillis;
630
+ return yield* Effect.async((resume, signal) => {
631
+ const child = spawn(command, [...args], {
632
+ cwd: options.cwd,
633
+ env: options.env,
634
+ detached: process.platform !== "win32",
635
+ stdio: [
636
+ "pipe",
637
+ "pipe",
638
+ "pipe"
639
+ ]
640
+ });
641
+ let stdout = "";
642
+ let stderr = "";
643
+ let timedOut = false;
644
+ let settled = false;
645
+ let timeoutTimer = null;
646
+ let forceTimer = null;
647
+ let exitFlushTimer = null;
648
+ let abortListener = null;
649
+ const finish = (result) => {
650
+ if (settled) return;
651
+ settled = true;
652
+ if (timeoutTimer) clearTimeout(timeoutTimer);
653
+ if (forceTimer) clearTimeout(forceTimer);
654
+ if (exitFlushTimer) clearTimeout(exitFlushTimer);
655
+ child.stdout?.destroy();
656
+ child.stderr?.destroy();
657
+ child.stdin?.destroy();
658
+ if (abortListener) signal.removeEventListener("abort", abortListener);
659
+ if (process.platform !== "win32") killProcessGroup(child, "SIGTERM");
660
+ resume(Clock.currentTimeMillis.pipe(Effect.map((endedAt) => ({
661
+ ...result,
662
+ durationMs: endedAt - startedAt
663
+ }))));
664
+ };
665
+ abortListener = () => {
666
+ timedOut = true;
667
+ stderr = appendLimited(stderr, `${stderr ? "\n" : ""}agent-skill-evals: command interrupted`, stderrLimit);
668
+ killProcessGroup(child, "SIGKILL");
669
+ finish({
670
+ command,
671
+ args: [...args],
672
+ exitCode: -1,
673
+ signal: "SIGKILL",
674
+ stdout,
675
+ stderr,
676
+ startedAt,
677
+ timedOut
678
+ });
679
+ };
680
+ signal.addEventListener("abort", abortListener, { once: true });
681
+ child.stdout?.on("data", (chunk) => {
682
+ const text = chunk.toString();
683
+ stdout = appendLimited(stdout, text, stdoutLimit);
684
+ options.onStdout?.(text);
685
+ });
686
+ child.stderr?.on("data", (chunk) => {
687
+ const text = chunk.toString();
688
+ stderr = appendLimited(stderr, text, stderrLimit);
689
+ options.onStderr?.(text);
690
+ });
691
+ child.on("error", (error) => {
692
+ stderr = appendLimited(stderr, String(error), stderrLimit);
693
+ finish({
694
+ command,
695
+ args: [...args],
696
+ exitCode: -1,
697
+ stdout,
698
+ stderr,
699
+ startedAt,
700
+ timedOut,
701
+ error
702
+ });
703
+ });
704
+ child.on("exit", (code, signal) => {
705
+ const result = {
706
+ command,
707
+ args: [...args],
708
+ exitCode: code ?? -1,
709
+ signal: signal ?? void 0,
710
+ stdout,
711
+ stderr,
712
+ startedAt,
713
+ timedOut
714
+ };
715
+ exitFlushTimer = setTimeout(() => finish(result), EXIT_STDIO_FLUSH_MS);
716
+ });
717
+ child.on("close", (code, signal) => {
718
+ finish({
719
+ command,
720
+ args: [...args],
721
+ exitCode: code ?? -1,
722
+ signal: signal ?? void 0,
723
+ stdout,
724
+ stderr,
725
+ startedAt,
726
+ timedOut
727
+ });
728
+ });
729
+ if (options.stdin !== void 0) child.stdin?.write(options.stdin);
730
+ child.stdin?.end();
731
+ if (options.timeoutMs) timeoutTimer = setTimeout(() => {
732
+ timedOut = true;
733
+ stderr = appendLimited(stderr, `${stderr ? "\n" : ""}agent-skill-evals: command timed out after ${options.timeoutMs}ms`, stderrLimit);
734
+ killProcessGroup(child, "SIGKILL");
735
+ forceTimer = setTimeout(() => {
736
+ finish({
737
+ command,
738
+ args: [...args],
739
+ exitCode: -1,
740
+ signal: "SIGKILL",
741
+ stdout,
742
+ stderr,
743
+ startedAt,
744
+ timedOut
745
+ });
746
+ }, KILL_GRACE_MS);
747
+ }, options.timeoutMs);
748
+ });
749
+ });
750
+ }
751
+ //#endregion
752
+ //#region src/agent/jsonl-stream.ts
753
+ function appendLine(events, line) {
754
+ if (!line.trim()) return events;
755
+ try {
756
+ return [...events, JSON.parse(line)];
757
+ } catch {
758
+ return events;
759
+ }
760
+ }
761
+ function parseChunk(state, text) {
762
+ const lines = (state.leftover + text).split("\n");
763
+ return {
764
+ leftover: lines.pop() ?? "",
765
+ events: lines.reduce(appendLine, state.events)
766
+ };
767
+ }
768
+ function createJsonlEventParser() {
769
+ let leftover = "";
770
+ return {
771
+ push(chunk) {
772
+ const state = parseChunk({
773
+ leftover,
774
+ events: []
775
+ }, chunk);
776
+ leftover = state.leftover;
777
+ return state.events;
778
+ },
779
+ finish() {
780
+ const events = appendLine([], leftover);
781
+ leftover = "";
782
+ return events;
783
+ }
784
+ };
785
+ }
786
+ //#endregion
787
+ //#region src/agent/adapters.ts
788
+ function argsWithPrompt(args, prompt) {
789
+ const dashIndex = args.lastIndexOf("-");
790
+ if (dashIndex === -1) return [...args, prompt];
791
+ return args.map((arg, index) => index === dashIndex ? prompt : arg);
792
+ }
793
+ function normalizePathFromCwd(path, cwd) {
794
+ if (!isAbsolute(path)) return path;
795
+ const candidate = path.startsWith("/private/") ? path.slice(8) : path;
796
+ const rel = relative(cwd.startsWith("/private/") ? cwd.slice(8) : cwd, candidate);
797
+ return rel && !rel.startsWith("..") && !isAbsolute(rel) ? rel : path;
798
+ }
799
+ function normalizeToolCallArgs(args, cwd) {
800
+ if (!args || typeof args !== "object" || Array.isArray(args)) return args;
801
+ const normalized = {};
802
+ for (const [key, value] of Object.entries(args)) {
803
+ const normalizedValue = (key === "path" || key === "file_path") && typeof value === "string" ? normalizePathFromCwd(value, cwd) : value;
804
+ normalized[key] = normalizedValue;
805
+ if (key === "file_path" && typeof normalizedValue === "string" && normalized.path === void 0) normalized.path = normalizedValue;
806
+ }
807
+ return normalized;
808
+ }
809
+ function normalizeMcpToolCall(event) {
810
+ const match = /^mcp__(.+?)__(.+)$/.exec(event.tool);
811
+ if (!match) return event;
812
+ return {
813
+ ...event,
814
+ server: event.server ?? match[1],
815
+ tool: match[2] ?? event.tool
816
+ };
817
+ }
818
+ function parseJsonObjectString(value) {
819
+ if (typeof value !== "string") return value;
820
+ try {
821
+ const parsed = JSON.parse(value);
822
+ return parsed && typeof parsed === "object" ? parsed : value;
823
+ } catch {
824
+ return value;
825
+ }
826
+ }
827
+ function evidenceWithRelativeToolPaths(evidence, cwd, now) {
828
+ return {
829
+ addCommand: (event) => evidence.addCommand(event),
830
+ addToolCall: (event) => {
831
+ const normalized = normalizeMcpToolCall(event);
832
+ evidence.addToolCall({
833
+ ...normalized,
834
+ args: normalizeToolCallArgs(normalized.args, cwd)
835
+ });
836
+ },
837
+ setUsage: (usage) => evidence.setUsage(usage),
838
+ addUsage: (usage) => evidence.addUsage(usage),
839
+ now
840
+ };
841
+ }
842
+ function normalizeUsage(usage) {
843
+ const inputTokens = usage.input_tokens ?? usage.inputTokens ?? usage.input;
844
+ const outputTokens = usage.output_tokens ?? usage.outputTokens ?? usage.output;
845
+ const cacheReadTokens = usage.cache_read_input_tokens ?? usage.cacheReadTokens ?? usage.cacheRead;
846
+ const cacheWriteTokens = usage.cache_creation_input_tokens ?? usage.cacheWriteTokens ?? usage.cacheWrite;
847
+ return {
848
+ inputTokens,
849
+ outputTokens,
850
+ totalTokens: usage.total_tokens ?? usage.totalTokens ?? addKnownNumbers(inputTokens, outputTokens),
851
+ cacheReadTokens,
852
+ cacheWriteTokens
853
+ };
854
+ }
855
+ function addKnownNumbers(...values) {
856
+ const present = values.filter((value) => typeof value === "number");
857
+ return present.length > 0 ? present.reduce((sum, value) => sum + value, 0) : void 0;
858
+ }
859
+ function normalizePiToolName(tool) {
860
+ switch (tool.toLowerCase()) {
861
+ case "bash": return "Bash";
862
+ case "edit": return "Edit";
863
+ case "read": return "Read";
864
+ case "write": return "Write";
865
+ default: return tool;
866
+ }
867
+ }
868
+ function evidenceStartedAt(evidence) {
869
+ return evidence.now?.() ?? 0;
870
+ }
871
+ function runJsonlAdapter(input, onEvent, options = {}) {
872
+ const { command, args, cwd, prompt, evidence, timeoutMs, env } = input;
873
+ const promptDelivery = options.promptDelivery ?? "stdin";
874
+ const spawnArgs = promptDelivery === "arg" ? argsWithPrompt(args, prompt) : [...args];
875
+ return Effect.gen(function* () {
876
+ const runStartedAt = yield* Clock.currentTimeMillis;
877
+ const adapterEvidence = evidenceWithRelativeToolPaths(evidence, cwd, () => runStartedAt);
878
+ const parser = createJsonlEventParser();
879
+ let finalText = "";
880
+ const handleChunk = (chunk) => {
881
+ for (const evt of parser.push(chunk)) onEvent(evt, adapterEvidence, (text) => finalText += text);
882
+ };
883
+ const result = yield* runCommandEffect(command, spawnArgs, {
884
+ cwd,
885
+ env,
886
+ stdin: promptDelivery === "stdin" ? prompt : void 0,
887
+ timeoutMs,
888
+ stdoutLimit: 0,
889
+ onStdout: handleChunk
890
+ });
891
+ for (const evt of parser.finish()) onEvent(evt, adapterEvidence, (text) => finalText += text);
892
+ evidence.addCommand({
893
+ command,
894
+ args: [...spawnArgs],
895
+ exitCode: result.exitCode,
896
+ stderr: result.stderr.slice(0, 4096),
897
+ startedAt: result.startedAt,
898
+ durationMs: result.durationMs
899
+ });
900
+ const error = result.error ? `adapter error: failed to start "${command}": ${result.error.message}` : result.timedOut ? `${command} timed out after ${timeoutMs ?? 0}ms` : result.exitCode !== 0 ? `${command} exited ${result.exitCode}${result.stderr.trim() ? `: ${result.stderr.trim()}` : ""}` : void 0;
901
+ if (error && !finalText.trim()) finalText = error;
902
+ return {
903
+ output: finalText.trim(),
904
+ exitCode: result.exitCode,
905
+ timedOut: result.timedOut,
906
+ ...error ? { error } : {},
907
+ durationMs: result.durationMs
908
+ };
909
+ });
910
+ }
911
+ /**
912
+ * Claude Code stream-json adapter: parses stream-json events emitted by
913
+ * `claude -p ... --output-format stream-json` and projects them into evidence.
914
+ *
915
+ * Events of interest (see Claude Code docs):
916
+ * - { type: "system" | "assistant" | "user" | "result", ... }
917
+ * - tool_use blocks inside assistant content (Bash, Edit, Write, etc.)
918
+ */
919
+ const claudeCodeJsonAdapter = {
920
+ id: "claude-code-json",
921
+ run(input) {
922
+ return runJsonlAdapter(input, handleClaudeEvent);
923
+ }
924
+ };
925
+ const codexJsonAdapter = {
926
+ id: "codex-json",
927
+ run(input) {
928
+ return runJsonlAdapter(input, handleCodexEvent, { promptDelivery: "arg" });
929
+ }
930
+ };
931
+ const piJsonAdapter = {
932
+ id: "pi-json",
933
+ run(input) {
934
+ return runJsonlAdapter(input, handlePiEvent);
935
+ }
936
+ };
937
+ const internalTestJsonAdapter = {
938
+ id: "internal-test-json",
939
+ run(input) {
940
+ return runJsonlAdapter(input, handleCodexEvent);
941
+ }
942
+ };
943
+ function handleClaudeEvent(evt, evidence, appendFinal) {
944
+ if (!evt || typeof evt !== "object") return;
945
+ const e = evt;
946
+ if (e.type === "result" && typeof e.result === "string") appendFinal(e.result);
947
+ if (e.type === "assistant" && e.message && typeof e.message === "object") {
948
+ const msg = e.message;
949
+ if (msg.usage && typeof msg.usage === "object") evidence.addUsage(normalizeUsage(msg.usage));
950
+ if (Array.isArray(msg.content)) for (const block of msg.content) {
951
+ if (!block || typeof block !== "object") continue;
952
+ const b = block;
953
+ if (b.type === "text" && typeof b.text === "string") appendFinal(b.text);
954
+ if (b.type === "tool_use" && typeof b.name === "string") evidence.addToolCall({
955
+ tool: b.name,
956
+ provider: "claude-code-json",
957
+ args: b.input,
958
+ startedAt: evidenceStartedAt(evidence),
959
+ durationMs: 0
960
+ });
961
+ }
962
+ }
963
+ if (e.type === "result" && e.usage && typeof e.usage === "object") evidence.setUsage(normalizeUsage(e.usage));
964
+ }
965
+ function handleCodexEvent(evt, evidence, appendFinal) {
966
+ if (!evt || typeof evt !== "object") return;
967
+ const e = evt;
968
+ const type = typeof e.type === "string" ? e.type : "";
969
+ const text = typeof e.message === "string" ? e.message : typeof e.text === "string" ? e.text : typeof e.content === "string" ? e.content : typeof e.output === "string" ? e.output : void 0;
970
+ if (text && /message|final|result|response|output/.test(type)) appendFinal(text);
971
+ const item = e.item && typeof e.item === "object" ? e.item : e;
972
+ const itemType = typeof item.type === "string" ? item.type : "";
973
+ const toolName = typeof item.tool === "string" ? item.tool : typeof item.name === "string" && /tool|call|command/.test(`${type}:${itemType}`) ? item.name : void 0;
974
+ if (toolName && /tool|call|command/.test(`${type}:${itemType}`)) evidence.addToolCall({
975
+ tool: toolName,
976
+ provider: "codex-json",
977
+ server: typeof item.server === "string" ? item.server : void 0,
978
+ args: parseJsonObjectString(item.args ?? item.input ?? item.arguments),
979
+ result: item.result ?? item.output,
980
+ startedAt: evidenceStartedAt(evidence),
981
+ durationMs: 0
982
+ });
983
+ if (itemType === "file_change" && Array.isArray(item.changes)) for (const change of item.changes) {
984
+ if (!change || typeof change !== "object") continue;
985
+ const c = change;
986
+ if (typeof c.path !== "string") continue;
987
+ evidence.addToolCall({
988
+ tool: "Edit",
989
+ provider: "codex-json",
990
+ args: {
991
+ path: c.path,
992
+ kind: typeof c.kind === "string" ? c.kind : void 0
993
+ },
994
+ startedAt: evidenceStartedAt(evidence),
995
+ durationMs: 0
996
+ });
997
+ }
998
+ if ((type.includes("exec") || type.includes("command") || itemType.includes("command")) && typeof item.command === "string") {
999
+ const args = Array.isArray(item.args) ? item.args.map(String) : [];
1000
+ evidence.addCommand({
1001
+ command: item.command,
1002
+ args,
1003
+ exitCode: typeof item.exit_code === "number" ? item.exit_code : typeof item.exitCode === "number" ? item.exitCode : 0,
1004
+ stdout: typeof item.stdout === "string" ? item.stdout.slice(0, 4096) : typeof item.aggregated_output === "string" ? item.aggregated_output.slice(0, 4096) : void 0,
1005
+ stderr: typeof item.stderr === "string" ? item.stderr.slice(0, 4096) : void 0,
1006
+ startedAt: evidenceStartedAt(evidence),
1007
+ durationMs: typeof item.durationMs === "number" ? item.durationMs : 0
1008
+ });
1009
+ }
1010
+ const usage = e.usage && typeof e.usage === "object" ? e.usage : void 0;
1011
+ if (usage) evidence.setUsage(normalizeUsage(usage));
1012
+ }
1013
+ function handlePiEvent(evt, evidence, appendFinal) {
1014
+ if (!evt || typeof evt !== "object") return;
1015
+ const e = evt;
1016
+ const type = typeof e.type === "string" ? e.type : "";
1017
+ const message = e.message && typeof e.message === "object" ? e.message : void 0;
1018
+ const messageUsage = message?.usage && typeof message.usage === "object" ? message.usage : void 0;
1019
+ if (messageUsage && (type === "message_end" || type === "turn_end" || type === "agent_end")) evidence.setUsage(normalizeUsage(messageUsage));
1020
+ const text = typeof e.message === "string" ? e.message : typeof e.text === "string" ? e.text : typeof e.content === "string" ? e.content : typeof e.output === "string" ? e.output : typeof e.result === "string" ? e.result : void 0;
1021
+ if (text && /assistant|message|final|result|response|output/.test(type)) appendFinal(text);
1022
+ if (type === "tool_execution_start" || type === "tool_execution_end") {
1023
+ const rawTool = typeof e.tool === "string" ? e.tool : typeof e.name === "string" ? e.name : typeof e.tool_name === "string" ? e.tool_name : typeof e.toolName === "string" ? e.toolName : void 0;
1024
+ const tool = rawTool ? normalizePiToolName(rawTool) : void 0;
1025
+ if (tool) evidence.addToolCall({
1026
+ tool,
1027
+ provider: "pi-json",
1028
+ args: e.args ?? e.input ?? e.arguments,
1029
+ result: type === "tool_execution_end" ? e.result ?? e.output : void 0,
1030
+ startedAt: evidenceStartedAt(evidence),
1031
+ durationMs: typeof e.duration_ms === "number" ? e.duration_ms : typeof e.durationMs === "number" ? e.durationMs : 0
1032
+ });
1033
+ }
1034
+ if (type === "tool_execution_end") {
1035
+ const tool = typeof e.tool === "string" ? e.tool : typeof e.name === "string" ? e.name : typeof e.tool_name === "string" ? e.tool_name : typeof e.toolName === "string" ? e.toolName : "";
1036
+ const commandText = typeof e.command === "string" ? e.command : e.args && typeof e.args === "object" && !Array.isArray(e.args) && typeof e.args.command === "string" ? e.args.command : void 0;
1037
+ if (commandText && /bash|shell|command|exec/i.test(tool)) evidence.addCommand({
1038
+ command: commandText,
1039
+ args: [],
1040
+ exitCode: typeof e.exit_code === "number" ? e.exit_code : typeof e.exitCode === "number" ? e.exitCode : 0,
1041
+ stdout: typeof e.stdout === "string" ? e.stdout.slice(0, 4096) : typeof e.output === "string" ? e.output.slice(0, 4096) : void 0,
1042
+ stderr: typeof e.stderr === "string" ? e.stderr.slice(0, 4096) : void 0,
1043
+ startedAt: evidenceStartedAt(evidence),
1044
+ durationMs: typeof e.duration_ms === "number" ? e.duration_ms : typeof e.durationMs === "number" ? e.durationMs : 0
1045
+ });
1046
+ }
1047
+ const usage = e.usage && typeof e.usage === "object" ? e.usage : e;
1048
+ if (type === "usage" || e.usage) evidence.setUsage(normalizeUsage(usage));
1049
+ }
1050
+ const adapterRegistry = new Map([
1051
+ [claudeCodeJsonAdapter.id, claudeCodeJsonAdapter],
1052
+ [codexJsonAdapter.id, codexJsonAdapter],
1053
+ [piJsonAdapter.id, piJsonAdapter],
1054
+ [internalTestJsonAdapter.id, internalTestJsonAdapter]
1055
+ ]);
1056
+ //#endregion
1057
+ //#region src/agent/adapter-catalog.ts
1058
+ var AdapterCatalog = class extends Context.Tag("agent-skill-evals/promptfoo/AdapterCatalog")() {};
1059
+ const AdapterCatalogLive = Layer.succeed(AdapterCatalog, { get: (id) => adapterRegistry.get(id) });
1060
+ function getAdapter(id) {
1061
+ return Effect.map(AdapterCatalog, (catalog) => catalog.get(id));
1062
+ }
1063
+ //#endregion
1064
+ //#region src/runtime-checks/_files.ts
1065
+ const SKIP_DIRS = new Set(["node_modules", ".git"]);
1066
+ function walkFilesEffect(root, visit) {
1067
+ return Effect.gen(function* () {
1068
+ const fs = yield* FileSystem;
1069
+ function walk(dir) {
1070
+ return Effect.gen(function* () {
1071
+ const entries = yield* fs.readDirectory(dir).pipe(Effect.catchAll(() => Effect.succeed([])));
1072
+ for (const entry of entries) {
1073
+ const absolutePath = join(dir, entry.name);
1074
+ if (entry.isDirectory()) {
1075
+ if (SKIP_DIRS.has(entry.name)) continue;
1076
+ yield* walk(absolutePath);
1077
+ continue;
1078
+ }
1079
+ if (entry.isFile()) yield* visit(relative(root, absolutePath), absolutePath);
1080
+ }
1081
+ });
1082
+ }
1083
+ if (!(yield* fs.stat(root).pipe(Effect.catchAll(() => Effect.succeed(null))))?.isDirectory()) return;
1084
+ yield* walk(root);
1085
+ });
1086
+ }
1087
+ function listMatchingFilesEffect(root, glob) {
1088
+ return Effect.gen(function* () {
1089
+ const matches = [];
1090
+ yield* walkFilesEffect(root, (relativePath) => Effect.sync(() => {
1091
+ if (matchesGlob(relativePath, glob)) matches.push(relativePath);
1092
+ }));
1093
+ return matches;
1094
+ });
1095
+ }
1096
+ function matchesGlob(relativePath, glob) {
1097
+ if (!glob.includes("/") && relativePath.includes("/")) return globToRegExp(glob).test(relativePath.split("/").at(-1) ?? relativePath);
1098
+ return globToRegExp(glob).test(relativePath);
1099
+ }
1100
+ function globToRegExp(glob) {
1101
+ const globstar = "__AGENT_SKILL_EVALS_GLOBSTAR__";
1102
+ const star = "__AGENT_SKILL_EVALS_STAR__";
1103
+ const pattern = glob.replace(/^\.\//, "").replace(/\*\*\//g, globstar).replace(/\*\*/g, globstar).replace(/\*/g, star).replace(/[.+^${}()|[\]\\]/g, "\\$&").replaceAll(globstar, "(?:.*/)?").replaceAll(star, "[^/]*");
1104
+ return new RegExp(`^${pattern}$`);
1105
+ }
1106
+ //#endregion
1107
+ //#region src/agent/world.ts
1108
+ function createRunDirEffect() {
1109
+ return Effect.gen(function* () {
1110
+ const fs = yield* FileSystem;
1111
+ const runDir = yield* fs.makeTempDirectory("agent-skill-evals-run-");
1112
+ const worldPath = join(runDir, "world");
1113
+ yield* fs.makeDirectory(worldPath);
1114
+ return {
1115
+ runDir,
1116
+ worldPath
1117
+ };
1118
+ });
1119
+ }
1120
+ function copyFixtureEffect(input, worldPath) {
1121
+ return Effect.gen(function* () {
1122
+ const fs = yield* FileSystem;
1123
+ const cwd = yield* (yield* Environment).cwd;
1124
+ const src = isAbsolute(input.fixturePath) ? input.fixturePath : resolve(input.testFileDir ?? input.baseDir ?? cwd, input.fixturePath);
1125
+ yield* fs.copyDirectory(src, worldPath);
1126
+ });
1127
+ }
1128
+ function makeWorldHandle(worldPath, recordCommand) {
1129
+ return {
1130
+ path: worldPath,
1131
+ readFile(rel) {
1132
+ return Effect.gen(function* () {
1133
+ return yield* (yield* FileSystem).readText(join(worldPath, rel)).pipe(Effect.catchAll(() => Effect.succeed(null)));
1134
+ }).pipe(Effect.provide(NodeServicesLive));
1135
+ },
1136
+ listFiles(glob) {
1137
+ return listMatchingFilesEffect(worldPath, glob).pipe(Effect.provide(NodeServicesLive));
1138
+ },
1139
+ exec(command, args = [], opts = {}) {
1140
+ return Effect.gen(function* () {
1141
+ const result = yield* runCommandEffect(command, args, {
1142
+ cwd: worldPath,
1143
+ env: {
1144
+ ...yield* (yield* Environment).env,
1145
+ ...opts.env ?? {}
1146
+ },
1147
+ timeoutMs: opts.timeoutMs
1148
+ });
1149
+ recordCommand?.({
1150
+ command,
1151
+ args: [...args],
1152
+ exitCode: result.exitCode,
1153
+ stdout: result.stdout.slice(0, 4096),
1154
+ stderr: result.stderr.slice(0, 4096),
1155
+ startedAt: result.startedAt,
1156
+ durationMs: result.durationMs
1157
+ });
1158
+ return {
1159
+ exitCode: result.exitCode,
1160
+ stdout: result.stdout,
1161
+ stderr: result.stderr
1162
+ };
1163
+ }).pipe(Effect.provide(ProcessRunnerLive), Effect.provide(NodeServicesLive));
1164
+ },
1165
+ diff() {
1166
+ return Effect.succeed("");
1167
+ }
1168
+ };
1169
+ }
1170
+ //#endregion
1171
+ //#region src/agent/file-watch.ts
1172
+ function snapshotTreeEffect(root) {
1173
+ return Effect.gen(function* () {
1174
+ const out = /* @__PURE__ */ new Map();
1175
+ const fs = yield* FileSystem;
1176
+ function walk(dir) {
1177
+ return Effect.gen(function* () {
1178
+ const entries = yield* fs.readDirectory(dir).pipe(Effect.catchAll(() => Effect.succeed([])));
1179
+ for (const e of entries) {
1180
+ const p = join(dir, e.name);
1181
+ if (e.isDirectory()) {
1182
+ if (e.name === "node_modules" || e.name === ".git") continue;
1183
+ yield* walk(p);
1184
+ } else if (e.isFile()) {
1185
+ const buf = yield* fs.readFile(p).pipe(Effect.catchAll(() => Effect.succeed(null)));
1186
+ if (!buf) continue;
1187
+ const hash = createHash("sha256").update(buf).digest("hex");
1188
+ out.set(relative(root, p), hash);
1189
+ }
1190
+ }
1191
+ });
1192
+ }
1193
+ yield* fs.stat(root).pipe(Effect.catchAll(() => Effect.succeed(null)));
1194
+ yield* walk(root);
1195
+ return out;
1196
+ });
1197
+ }
1198
+ function diffTrees(before, after) {
1199
+ const events = [];
1200
+ for (const [path, hash] of after) {
1201
+ const prev = before.get(path);
1202
+ if (prev === void 0) events.push({
1203
+ path,
1204
+ op: "create"
1205
+ });
1206
+ else if (prev !== hash) events.push({
1207
+ path,
1208
+ op: "modify"
1209
+ });
1210
+ }
1211
+ for (const [path] of before) if (!after.has(path)) events.push({
1212
+ path,
1213
+ op: "delete"
1214
+ });
1215
+ return events;
1216
+ }
1217
+ //#endregion
1218
+ //#region src/agent/index.ts
1219
+ const SkillEvidenceConfigSchema = Schema.Struct({
1220
+ mcpResource: Schema.optional(Schema.Struct({
1221
+ uriArgPaths: Schema.optional(Schema.Array(Schema.String)),
1222
+ uriPatterns: Schema.optional(Schema.Array(Schema.String))
1223
+ })),
1224
+ mcpTool: Schema.optional(Schema.Struct({ toolPatterns: Schema.optional(Schema.Array(Schema.String)) })),
1225
+ nativeArgs: Schema.optional(Schema.Struct({
1226
+ whenArgs: Schema.optional(Schema.Array(Schema.String)),
1227
+ whenAnyArgs: Schema.optional(Schema.Array(Schema.String)),
1228
+ skillPathFlags: Schema.optional(Schema.Array(Schema.String)),
1229
+ provider: Schema.optional(Schema.String),
1230
+ source: Schema.optional(Schema.String)
1231
+ }))
1232
+ });
1233
+ const ProviderConfigSchema = Schema.Struct({
1234
+ adapter: Schema.optional(Schema.String),
1235
+ command: Schema.optional(Schema.String),
1236
+ args: Schema.optional(Schema.Array(Schema.String)),
1237
+ timeoutMs: Schema.optional(Schema.Number),
1238
+ baseDir: Schema.optional(Schema.String),
1239
+ isolatedHome: Schema.optional(Schema.Boolean),
1240
+ skillEvidence: Schema.optional(SkillEvidenceConfigSchema)
1241
+ });
1242
+ const DOCUMENTED_ADAPTERS = [
1243
+ "codex-json",
1244
+ "claude-code-json",
1245
+ "pi-json"
1246
+ ];
1247
+ const AgentLiveLayer = Layer.mergeAll(RuntimeCheckCatalogLive, AdapterCatalogLive, NodeServicesLive, ProcessRunnerLive);
1248
+ function asVars(value) {
1249
+ return value && typeof value === "object" && !Array.isArray(value) ? value : void 0;
1250
+ }
1251
+ function varsFromContext(context) {
1252
+ return asVars(context.vars) ?? asVars(context.test?.vars) ?? {};
1253
+ }
1254
+ function decodeProviderConfig(input) {
1255
+ const decoded = Schema.decodeUnknownEither(ProviderConfigSchema, { errors: "all" })(input ?? {});
1256
+ if (Either.isRight(decoded)) return decoded.right;
1257
+ return { error: `agent-skill-evals-provider: invalid config: ${ParseResult.TreeFormatter.formatErrorSync(decoded.left)}` };
1258
+ }
1259
+ function prepareRunEffect(fixture, config) {
1260
+ return Effect.gen(function* () {
1261
+ const created = yield* Effect.either(createRunDirEffect());
1262
+ if (Either.isLeft(created)) return { error: `agent-skill-evals-provider: failed to create isolated world: ${created.left instanceof Error ? created.left.message : String(created.left)}` };
1263
+ const { runDir, worldPath } = created.right;
1264
+ const copied = yield* Effect.either(copyFixtureEffect({
1265
+ fixturePath: fixture,
1266
+ baseDir: config.baseDir
1267
+ }, worldPath));
1268
+ if (Either.isLeft(copied)) return { error: `agent-skill-evals-provider: failed to copy vars.fixture "${fixture}" into isolated world: ${copied.left instanceof Error ? copied.left.message : String(copied.left)}` };
1269
+ const evidenceCollector = new EvidenceCollector(config.skillEvidence);
1270
+ return {
1271
+ runDir,
1272
+ worldPath,
1273
+ evidenceCollector,
1274
+ world: makeWorldHandle(worldPath, (event) => evidenceCollector.addCommand(event))
1275
+ };
1276
+ });
1277
+ }
1278
+ function runPreconditionsEffect(vars, run) {
1279
+ return Effect.gen(function* () {
1280
+ const results = [];
1281
+ let passed = true;
1282
+ const parsed = parseRuntimeTestFields(vars);
1283
+ for (const error of parsed.errors.filter((e) => e.field === "preconditions")) {
1284
+ const at = error.index === void 0 ? error.field : `${error.field}[${error.index}]`;
1285
+ results.push({
1286
+ pass: false,
1287
+ score: 0,
1288
+ reason: `precondition: ${at}: ${error.reason}`
1289
+ });
1290
+ passed = false;
1291
+ }
1292
+ for (const entry of parsed.preconditions) {
1293
+ const plugin = yield* getRuntimeCheck(entry.type);
1294
+ if (!plugin) {
1295
+ results.push({
1296
+ pass: false,
1297
+ score: 0,
1298
+ reason: `precondition: unknown effect type "${entry.type}"`
1299
+ });
1300
+ passed = false;
1301
+ continue;
1302
+ }
1303
+ const result = yield* plugin.verify({
1304
+ assertion: entry.args,
1305
+ world: run.world,
1306
+ evidence: evidenceFromSnapshot(run.evidenceCollector.toSnapshot()),
1307
+ mode: "precondition"
1308
+ });
1309
+ results.push(result);
1310
+ if (!result.pass) passed = false;
1311
+ }
1312
+ return {
1313
+ results,
1314
+ passed
1315
+ };
1316
+ });
1317
+ }
1318
+ function resolveConfiguredPath(baseDir, path) {
1319
+ if (path.includes("=")) return path;
1320
+ return path.startsWith("./") || path.startsWith("../") || !isAbsolute(path) && path.includes("/") ? resolve(baseDir, path) : path;
1321
+ }
1322
+ function expandEnvVars(value, env) {
1323
+ return value.replace(/\$\{([A-Za-z_][A-Za-z0-9_]*)(:-([^}]*))?\}/g, (_match, name, _fallbackPart, fallback) => env[name] ?? fallback ?? "");
1324
+ }
1325
+ function skillNameFromNativePath(path) {
1326
+ const normalized = path.replace(/\/+$/, "");
1327
+ if (!normalized) return void 0;
1328
+ const leaf = basename(normalized);
1329
+ return leaf === "SKILL.md" ? basename(dirname(normalized)) : leaf.replace(/\.md$/i, "");
1330
+ }
1331
+ function addNativeSkillEvidenceFromConfig(run, config, startedAt) {
1332
+ const nativeConfig = nativeSkillEvidenceConfig(config);
1333
+ if (!nativeConfig) return;
1334
+ const args = config.args ?? [];
1335
+ if (!nativeConfig.whenArgs.every((arg) => args.includes(arg))) return;
1336
+ if (nativeConfig.whenAnyArgs.length > 0 && !nativeConfig.whenAnyArgs.some((arg) => args.includes(arg))) return;
1337
+ for (let index = 0; index < args.length; index += 1) {
1338
+ const arg = args[index];
1339
+ if (!arg) continue;
1340
+ const source = nativeConfig.skillPathFlags.includes(arg) ? arg : void 0;
1341
+ if (!source) continue;
1342
+ const skillPath = args[index + 1];
1343
+ if (!skillPath) continue;
1344
+ const skill = skillNameFromNativePath(skillPath);
1345
+ if (!skill) continue;
1346
+ run.evidenceCollector.addSkillLoad({
1347
+ skill,
1348
+ delivery: "native",
1349
+ provider: nativeConfig.provider,
1350
+ source: nativeConfig.source ?? source,
1351
+ startedAt
1352
+ });
1353
+ }
1354
+ }
1355
+ function nativeSkillEvidenceConfig(config) {
1356
+ const configured = config.skillEvidence?.nativeArgs;
1357
+ if (configured) return {
1358
+ whenArgs: configured.whenArgs ?? [],
1359
+ whenAnyArgs: configured.whenAnyArgs ?? [],
1360
+ skillPathFlags: configured.skillPathFlags ?? ["--skill"],
1361
+ provider: configured.provider ?? config.adapter ?? "agent",
1362
+ source: configured.source
1363
+ };
1364
+ if (config.adapter === "pi-json") return {
1365
+ whenArgs: [],
1366
+ whenAnyArgs: ["--no-skills", "-ns"],
1367
+ skillPathFlags: ["--skill"],
1368
+ provider: "pi-json",
1369
+ source: "--skill"
1370
+ };
1371
+ }
1372
+ function promptfooTokenUsage(usage) {
1373
+ const tokenUsage = {
1374
+ ...usage.totalTokens !== void 0 ? { total: usage.totalTokens } : {},
1375
+ ...usage.inputTokens !== void 0 ? { prompt: usage.inputTokens } : {},
1376
+ ...usage.outputTokens !== void 0 ? { completion: usage.outputTokens } : {},
1377
+ ...usage.cacheReadTokens !== void 0 ? { cached: usage.cacheReadTokens } : {}
1378
+ };
1379
+ return Object.keys(tokenUsage).length > 0 ? tokenUsage : void 0;
1380
+ }
1381
+ function runConfiguredAdapterEffect(input) {
1382
+ return Effect.gen(function* () {
1383
+ const adapterId = input.config.adapter;
1384
+ if (!adapterId) return {
1385
+ output: "",
1386
+ error: "agent-skill-evals-provider: config.adapter is required. Use codex-json, claude-code-json, or pi-json."
1387
+ };
1388
+ const adapter = yield* getAdapter(adapterId);
1389
+ if (!adapter) return {
1390
+ output: "",
1391
+ error: `agent-skill-evals-provider: unknown adapter "${adapterId}". Supported adapters: ${DOCUMENTED_ADAPTERS.join(", ")}`
1392
+ };
1393
+ if (!input.config.command) return {
1394
+ output: "",
1395
+ error: "agent-skill-evals-provider: config.command is required for dynamic agent runs"
1396
+ };
1397
+ const command = input.config.command;
1398
+ const environment = yield* Environment;
1399
+ const cwd = yield* environment.cwd;
1400
+ const env = yield* environment.env;
1401
+ const baseDir = input.config.baseDir ?? cwd;
1402
+ const result = yield* adapter.run({
1403
+ command: resolveConfiguredPath(baseDir, expandEnvVars(command, env)),
1404
+ args: (input.config.args ?? []).map((arg) => typeof arg === "string" ? resolveConfiguredPath(baseDir, expandEnvVars(arg, env)) : arg),
1405
+ cwd: input.run.worldPath,
1406
+ prompt: input.prompt,
1407
+ evidence: input.run.evidenceCollector,
1408
+ timeoutMs: input.config.timeoutMs ?? 5 * 6e4,
1409
+ env: {
1410
+ ...env,
1411
+ ...input.config.isolatedHome ? { HOME: join(input.run.runDir, "agent-home") } : {}
1412
+ }
1413
+ });
1414
+ return {
1415
+ output: result.output,
1416
+ ...result.error ? { error: result.error } : {}
1417
+ };
1418
+ });
1419
+ }
1420
+ function collectFileEvidenceEffect(run, preTree) {
1421
+ return Effect.gen(function* () {
1422
+ const postTree = yield* snapshotTreeEffect(run.worldPath);
1423
+ for (const event of diffTrees(preTree, postTree)) run.evidenceCollector.addFileWrite(event);
1424
+ });
1425
+ }
1426
+ function persistMetadataEffect(input) {
1427
+ return Effect.gen(function* () {
1428
+ const fs = yield* FileSystem;
1429
+ const durationMs = (yield* Clock.currentTimeMillis) - input.startedAt;
1430
+ input.run.evidenceCollector.setOutput(input.output);
1431
+ input.run.evidenceCollector.setRun({
1432
+ runDir: input.run.runDir,
1433
+ worldPath: input.run.worldPath,
1434
+ fixture: input.fixture,
1435
+ durationMs
1436
+ });
1437
+ const evidencePath = yield* writeEvidenceToEffect(input.run.evidenceCollector, input.run.runDir);
1438
+ const metadata = {
1439
+ runDir: input.run.runDir,
1440
+ worldPath: input.run.worldPath,
1441
+ evidencePath,
1442
+ fixture: input.fixture,
1443
+ skill: input.vars.skill,
1444
+ kind: input.vars.kind,
1445
+ preconditionResults: input.preconditionResults,
1446
+ preconditionsPassed: input.preconditionsPassed,
1447
+ durationMs
1448
+ };
1449
+ yield* fs.writeText(join(input.run.runDir, "agent-skill-evals-meta.json"), JSON.stringify(metadata, null, 2));
1450
+ return metadata;
1451
+ });
1452
+ }
1453
+ var AgentSkillEvalsProvider = class {
1454
+ config;
1455
+ configError;
1456
+ id;
1457
+ constructor(options = {}) {
1458
+ const config = decodeProviderConfig(options.config ?? {});
1459
+ if ("error" in config) {
1460
+ this.config = {};
1461
+ this.configError = config.error;
1462
+ } else this.config = config;
1463
+ const label = options.id ?? "agent-skill-evals";
1464
+ this.id = () => label;
1465
+ }
1466
+ async callApi(prompt, context = {}) {
1467
+ return Effect.runPromise(this.callApiEffect(prompt, context).pipe(Effect.provide(AgentLiveLayer)));
1468
+ }
1469
+ callApiEffect(prompt, context = {}) {
1470
+ const self = this;
1471
+ return Effect.gen(function* () {
1472
+ if (self.configError) return {
1473
+ output: "",
1474
+ error: self.configError
1475
+ };
1476
+ const startedAt = yield* Clock.currentTimeMillis;
1477
+ const vars = varsFromContext(context);
1478
+ const fixture = vars.fixture;
1479
+ if (!fixture) return {
1480
+ output: "",
1481
+ error: "agent-skill-evals-provider: vars.fixture is required. Set vars.fixture to the fixture directory for this test case."
1482
+ };
1483
+ const prepared = yield* prepareRunEffect(fixture, self.config);
1484
+ if ("error" in prepared) return {
1485
+ output: "",
1486
+ error: prepared.error
1487
+ };
1488
+ const preconditions = yield* runPreconditionsEffect(vars, prepared);
1489
+ const preTree = yield* snapshotTreeEffect(prepared.worldPath);
1490
+ let output = "";
1491
+ let error;
1492
+ if (preconditions.passed) {
1493
+ addNativeSkillEvidenceFromConfig(prepared, self.config, startedAt);
1494
+ const result = yield* runConfiguredAdapterEffect({
1495
+ prompt,
1496
+ run: prepared,
1497
+ config: self.config
1498
+ });
1499
+ output = result.output;
1500
+ error = result.error;
1501
+ }
1502
+ yield* collectFileEvidenceEffect(prepared, preTree);
1503
+ const metadata = yield* persistMetadataEffect({
1504
+ run: prepared,
1505
+ fixture,
1506
+ vars,
1507
+ output,
1508
+ preconditionResults: preconditions.results,
1509
+ preconditionsPassed: preconditions.passed,
1510
+ startedAt
1511
+ }).pipe(Effect.orDie);
1512
+ const usage = promptfooTokenUsage(prepared.evidenceCollector.toSnapshot().usage);
1513
+ return {
1514
+ output,
1515
+ ...error ? { error } : {},
1516
+ metadata,
1517
+ ...usage ? { tokenUsage: usage } : {}
1518
+ };
1519
+ });
1520
+ }
1521
+ };
1522
+ //#endregion
1523
+ export { writeEvidenceToEffect as a, getRuntimeCheck as c, evidenceFromSnapshot as i, makeWorldHandle as n, decodeEvidenceSnapshotEither as o, EvidenceCollector as r, RuntimeCheckCatalogLive as s, AgentSkillEvalsProvider as t };
1524
+
1525
+ //# sourceMappingURL=agent-CM7fIL_C.mjs.map