vitest-evals 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -1
- package/dist/harness.d.mts +252 -1
- package/dist/harness.d.ts +252 -1
- package/dist/harness.js +298 -21
- package/dist/harness.js.map +1 -1
- package/dist/harness.mjs +289 -21
- package/dist/harness.mjs.map +1 -1
- package/dist/index.d.mts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +331 -21
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +321 -21
- package/dist/index.mjs.map +1 -1
- package/dist/internal/toolCallScorer.js +32 -12
- package/dist/internal/toolCallScorer.js.map +1 -1
- package/dist/internal/toolCallScorer.mjs +32 -12
- package/dist/internal/toolCallScorer.mjs.map +1 -1
- package/dist/judges/factualityJudge.js.map +1 -1
- package/dist/judges/factualityJudge.mjs.map +1 -1
- package/dist/judges/index.js +291 -23
- package/dist/judges/index.js.map +1 -1
- package/dist/judges/index.mjs +291 -23
- package/dist/judges/index.mjs.map +1 -1
- package/dist/judges/judgeHarness.js +291 -23
- package/dist/judges/judgeHarness.js.map +1 -1
- package/dist/judges/judgeHarness.mjs +291 -23
- package/dist/judges/judgeHarness.mjs.map +1 -1
- package/dist/judges/toolCallJudge.js +32 -12
- package/dist/judges/toolCallJudge.js.map +1 -1
- package/dist/judges/toolCallJudge.mjs +32 -12
- package/dist/judges/toolCallJudge.mjs.map +1 -1
- package/dist/legacy/scorers/index.js +32 -12
- package/dist/legacy/scorers/index.js.map +1 -1
- package/dist/legacy/scorers/index.mjs +32 -12
- package/dist/legacy/scorers/index.mjs.map +1 -1
- package/dist/legacy/scorers/toolCallScorer.js +32 -12
- package/dist/legacy/scorers/toolCallScorer.js.map +1 -1
- package/dist/legacy/scorers/toolCallScorer.mjs +32 -12
- package/dist/legacy/scorers/toolCallScorer.mjs.map +1 -1
- package/dist/legacy.js +33 -13
- package/dist/legacy.js.map +1 -1
- package/dist/legacy.mjs +33 -13
- package/dist/legacy.mjs.map +1 -1
- package/dist/reporter.d.mts +5 -0
- package/dist/reporter.d.ts +5 -0
- package/dist/reporter.js +26 -2
- package/dist/reporter.js.map +1 -1
- package/dist/reporter.mjs +26 -2
- package/dist/reporter.mjs.map +1 -1
- package/package.json +1 -1
package/dist/harness.js
CHANGED
|
@@ -22,7 +22,12 @@ var harness_exports = {};
|
|
|
22
22
|
__export(harness_exports, {
|
|
23
23
|
assistantMessages: () => assistantMessages,
|
|
24
24
|
attachHarnessRunToError: () => attachHarnessRunToError,
|
|
25
|
+
createFailedHarnessRun: () => createFailedHarnessRun,
|
|
26
|
+
createGenAiUsageAttributes: () => createGenAiUsageAttributes,
|
|
25
27
|
createHarness: () => createHarness,
|
|
28
|
+
createToolCallSpans: () => createToolCallSpans,
|
|
29
|
+
ensureRunTrace: () => ensureRunTrace,
|
|
30
|
+
failedSpans: () => failedSpans,
|
|
26
31
|
getHarnessRunFromError: () => getHarnessRunFromError,
|
|
27
32
|
hasCallableMethod: () => hasCallableMethod,
|
|
28
33
|
isHarnessRun: () => isHarnessRun,
|
|
@@ -33,8 +38,12 @@ __export(harness_exports, {
|
|
|
33
38
|
normalizeHarnessRun: () => normalizeHarnessRun,
|
|
34
39
|
normalizeMetadata: () => normalizeMetadata,
|
|
35
40
|
normalizeRecord: () => normalizeRecord,
|
|
41
|
+
normalizeSpanAttributes: () => normalizeSpanAttributes,
|
|
42
|
+
normalizeSpanError: () => normalizeSpanError,
|
|
36
43
|
resolveHarnessRunErrors: () => resolveHarnessRunErrors,
|
|
37
44
|
serializeError: () => serializeError,
|
|
45
|
+
spans: () => spans,
|
|
46
|
+
spansByKind: () => spansByKind,
|
|
38
47
|
systemMessages: () => systemMessages,
|
|
39
48
|
toJsonValue: () => toJsonValue,
|
|
40
49
|
toolCalls: () => toolCalls,
|
|
@@ -43,24 +52,38 @@ __export(harness_exports, {
|
|
|
43
52
|
});
|
|
44
53
|
module.exports = __toCommonJS(harness_exports);
|
|
45
54
|
function isJsonPrimitive(value) {
|
|
46
|
-
return value === null || typeof value === "string" || typeof value === "
|
|
55
|
+
return value === null || typeof value === "string" || typeof value === "boolean" || typeof value === "number" && Number.isFinite(value);
|
|
47
56
|
}
|
|
48
57
|
function isJsonRecord(value) {
|
|
49
58
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
50
59
|
}
|
|
51
|
-
function normalizeJsonArray(value) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
function normalizeJsonArray(value, seen) {
|
|
61
|
+
if (seen.has(value)) {
|
|
62
|
+
return void 0;
|
|
63
|
+
}
|
|
64
|
+
seen.add(value);
|
|
65
|
+
const normalized = value.map((item) => {
|
|
66
|
+
const normalized2 = toJsonValueInternal(item, seen);
|
|
67
|
+
return normalized2 === void 0 ? null : normalized2;
|
|
55
68
|
});
|
|
69
|
+
seen.delete(value);
|
|
70
|
+
return normalized;
|
|
56
71
|
}
|
|
57
|
-
function normalizeJsonObject(value) {
|
|
72
|
+
function normalizeJsonObject(value, seen) {
|
|
58
73
|
const normalized = {};
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
74
|
+
if (seen.has(value)) {
|
|
75
|
+
return normalized;
|
|
76
|
+
}
|
|
77
|
+
seen.add(value);
|
|
78
|
+
try {
|
|
79
|
+
for (const [key, entryValue] of Object.entries(value)) {
|
|
80
|
+
const entry = toJsonValueInternal(entryValue, seen);
|
|
81
|
+
if (entry !== void 0) {
|
|
82
|
+
normalized[key] = entry;
|
|
83
|
+
}
|
|
63
84
|
}
|
|
85
|
+
} finally {
|
|
86
|
+
seen.delete(value);
|
|
64
87
|
}
|
|
65
88
|
return normalized;
|
|
66
89
|
}
|
|
@@ -68,19 +91,25 @@ function hasCallableMethod(value, methodName) {
|
|
|
68
91
|
return value !== null && (typeof value === "object" || typeof value === "function") && methodName in value && typeof value[methodName] === "function";
|
|
69
92
|
}
|
|
70
93
|
function toJsonValue(value) {
|
|
94
|
+
return toJsonValueInternal(value, /* @__PURE__ */ new WeakSet());
|
|
95
|
+
}
|
|
96
|
+
function toJsonValueInternal(value, seen) {
|
|
71
97
|
if (isJsonPrimitive(value)) {
|
|
72
98
|
return value;
|
|
73
99
|
}
|
|
100
|
+
if (value !== null && typeof value === "object" && seen.has(value)) {
|
|
101
|
+
return void 0;
|
|
102
|
+
}
|
|
74
103
|
if (Array.isArray(value)) {
|
|
75
|
-
return normalizeJsonArray(value);
|
|
104
|
+
return normalizeJsonArray(value, seen);
|
|
76
105
|
}
|
|
77
106
|
if (isJsonRecord(value)) {
|
|
78
|
-
return normalizeJsonObject(value);
|
|
107
|
+
return normalizeJsonObject(value, seen);
|
|
79
108
|
}
|
|
80
109
|
return void 0;
|
|
81
110
|
}
|
|
82
111
|
function normalizeRecord(value) {
|
|
83
|
-
return normalizeJsonObject(value);
|
|
112
|
+
return normalizeJsonObject(value, /* @__PURE__ */ new WeakSet());
|
|
84
113
|
}
|
|
85
114
|
function normalizeMetadata(value) {
|
|
86
115
|
const normalized = normalizeRecord(value);
|
|
@@ -94,14 +123,45 @@ function createHarness(options) {
|
|
|
94
123
|
const harness = {
|
|
95
124
|
name: options.name,
|
|
96
125
|
run: async (input, context) => {
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
126
|
+
const startedAt = /* @__PURE__ */ new Date();
|
|
127
|
+
try {
|
|
128
|
+
const result = await options.run({
|
|
129
|
+
input,
|
|
130
|
+
metadata: context.metadata,
|
|
131
|
+
signal: context.signal,
|
|
132
|
+
artifacts: context.artifacts,
|
|
133
|
+
setArtifact: context.setArtifact
|
|
134
|
+
});
|
|
135
|
+
const run = normalizeHarnessRun(input, result, context);
|
|
136
|
+
ensureRunTrace(run, {
|
|
137
|
+
name: options.name,
|
|
138
|
+
startedAt,
|
|
139
|
+
finishedAt: /* @__PURE__ */ new Date()
|
|
140
|
+
});
|
|
141
|
+
return run;
|
|
142
|
+
} catch (error) {
|
|
143
|
+
const partialRun = getHarnessRunFromError(error);
|
|
144
|
+
if (partialRun) {
|
|
145
|
+
if (Object.keys(context.artifacts).length > 0 && !partialRun.artifacts) {
|
|
146
|
+
partialRun.artifacts = context.artifacts;
|
|
147
|
+
}
|
|
148
|
+
ensureRunTrace(partialRun, {
|
|
149
|
+
name: options.name,
|
|
150
|
+
startedAt,
|
|
151
|
+
finishedAt: /* @__PURE__ */ new Date()
|
|
152
|
+
});
|
|
153
|
+
throw attachHarnessRunToError(error, partialRun);
|
|
154
|
+
}
|
|
155
|
+
const failedRun = createFailedHarnessRun(input, error, {
|
|
156
|
+
artifacts: context.artifacts
|
|
157
|
+
});
|
|
158
|
+
ensureRunTrace(failedRun, {
|
|
159
|
+
name: options.name,
|
|
160
|
+
startedAt,
|
|
161
|
+
finishedAt: /* @__PURE__ */ new Date()
|
|
162
|
+
});
|
|
163
|
+
throw attachHarnessRunToError(error, failedRun);
|
|
164
|
+
}
|
|
105
165
|
}
|
|
106
166
|
};
|
|
107
167
|
return harness;
|
|
@@ -129,6 +189,7 @@ function normalizeHarnessRun(input, result, context) {
|
|
|
129
189
|
context?.artifacts,
|
|
130
190
|
result.artifacts
|
|
131
191
|
);
|
|
192
|
+
const traces = normalizeSimpleTraces(result.traces);
|
|
132
193
|
return {
|
|
133
194
|
session: {
|
|
134
195
|
messages,
|
|
@@ -140,9 +201,26 @@ function normalizeHarnessRun(input, result, context) {
|
|
|
140
201
|
usage,
|
|
141
202
|
...result.timings ? { timings: result.timings } : {},
|
|
142
203
|
...artifacts ? { artifacts } : {},
|
|
204
|
+
...traces ? { traces } : {},
|
|
143
205
|
errors: normalizeSimpleErrors(result.errors)
|
|
144
206
|
};
|
|
145
207
|
}
|
|
208
|
+
function createFailedHarnessRun(input, error, options = {}) {
|
|
209
|
+
const artifacts = options.artifacts;
|
|
210
|
+
return {
|
|
211
|
+
session: {
|
|
212
|
+
messages: [
|
|
213
|
+
{
|
|
214
|
+
role: "user",
|
|
215
|
+
content: normalizeContent(input)
|
|
216
|
+
}
|
|
217
|
+
]
|
|
218
|
+
},
|
|
219
|
+
usage: {},
|
|
220
|
+
...artifacts && Object.keys(artifacts).length > 0 ? { artifacts } : {},
|
|
221
|
+
errors: [serializeError(error)]
|
|
222
|
+
};
|
|
223
|
+
}
|
|
146
224
|
function createDefaultSessionMessages({
|
|
147
225
|
input,
|
|
148
226
|
output,
|
|
@@ -220,9 +298,199 @@ function normalizeSimpleErrors(errors) {
|
|
|
220
298
|
return serializeError(error);
|
|
221
299
|
});
|
|
222
300
|
}
|
|
301
|
+
function normalizeSimpleTraces(traces) {
|
|
302
|
+
if (!Array.isArray(traces)) {
|
|
303
|
+
return void 0;
|
|
304
|
+
}
|
|
305
|
+
const normalized = traces.map(normalizeSimpleTrace).filter((trace) => Boolean(trace));
|
|
306
|
+
return normalized.length > 0 ? normalized : void 0;
|
|
307
|
+
}
|
|
308
|
+
function normalizeSimpleTrace(trace) {
|
|
309
|
+
if (!isJsonRecord(trace)) {
|
|
310
|
+
return void 0;
|
|
311
|
+
}
|
|
312
|
+
const {
|
|
313
|
+
metadata: rawMetadata,
|
|
314
|
+
spans: rawSpans,
|
|
315
|
+
...traceFields
|
|
316
|
+
} = trace;
|
|
317
|
+
const spans2 = (Array.isArray(rawSpans) ? rawSpans : []).map((span) => normalizeSimpleSpan(span)).filter((span) => Boolean(span));
|
|
318
|
+
const metadata = isJsonRecord(rawMetadata) ? normalizeMetadata(rawMetadata) : void 0;
|
|
319
|
+
if (spans2.length === 0 && !traceFields.id && !traceFields.name) {
|
|
320
|
+
return void 0;
|
|
321
|
+
}
|
|
322
|
+
return {
|
|
323
|
+
...traceFields,
|
|
324
|
+
...metadata ? { metadata } : {},
|
|
325
|
+
spans: spans2
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
function normalizeSimpleSpan(span) {
|
|
329
|
+
if (!isJsonRecord(span) || typeof span.name !== "string" || !span.name) {
|
|
330
|
+
return void 0;
|
|
331
|
+
}
|
|
332
|
+
const {
|
|
333
|
+
attributes: rawAttributes,
|
|
334
|
+
error: rawError,
|
|
335
|
+
events: rawEvents,
|
|
336
|
+
...spanFields
|
|
337
|
+
} = span;
|
|
338
|
+
const attributes = rawAttributes ? isJsonRecord(rawAttributes) ? normalizeMetadata(rawAttributes) : void 0 : void 0;
|
|
339
|
+
const error = normalizeSpanError(rawError);
|
|
340
|
+
const events = normalizeSimpleSpanEvents(rawEvents);
|
|
341
|
+
return {
|
|
342
|
+
...spanFields,
|
|
343
|
+
...attributes ? { attributes } : {},
|
|
344
|
+
...error ? { error } : {},
|
|
345
|
+
...events ? { events } : {}
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
function normalizeSimpleSpanEvents(events) {
|
|
349
|
+
if (!Array.isArray(events)) {
|
|
350
|
+
return void 0;
|
|
351
|
+
}
|
|
352
|
+
const normalized = events.map(normalizeSimpleSpanEvent).filter((event) => Boolean(event));
|
|
353
|
+
return normalized.length > 0 ? normalized : void 0;
|
|
354
|
+
}
|
|
355
|
+
function normalizeSimpleSpanEvent(event) {
|
|
356
|
+
if (!isJsonRecord(event) || typeof event.name !== "string" || !event.name) {
|
|
357
|
+
return void 0;
|
|
358
|
+
}
|
|
359
|
+
const { attributes: rawAttributes, ...eventFields } = event;
|
|
360
|
+
const attributes = rawAttributes ? isJsonRecord(rawAttributes) ? normalizeMetadata(rawAttributes) : void 0 : void 0;
|
|
361
|
+
return {
|
|
362
|
+
...eventFields,
|
|
363
|
+
...attributes ? { attributes } : {}
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
function normalizeSpanError(error) {
|
|
367
|
+
if (error === void 0) {
|
|
368
|
+
return void 0;
|
|
369
|
+
}
|
|
370
|
+
if (error instanceof Error) {
|
|
371
|
+
const details2 = normalizeMetadata(
|
|
372
|
+
error
|
|
373
|
+
);
|
|
374
|
+
return {
|
|
375
|
+
...details2 ?? {},
|
|
376
|
+
type: error.name,
|
|
377
|
+
message: error.message
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
if (error && typeof error === "object" && !Array.isArray(error) && typeof error.message === "string") {
|
|
381
|
+
const normalized = normalizeMetadata(error);
|
|
382
|
+
const { message: message2, type: type2, ...details2 } = normalized ?? {};
|
|
383
|
+
return {
|
|
384
|
+
...details2,
|
|
385
|
+
message: message2,
|
|
386
|
+
...typeof type2 === "string" ? { type: type2 } : {}
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
const serialized = serializeError(error);
|
|
390
|
+
const { message, type, ...details } = serialized;
|
|
391
|
+
return {
|
|
392
|
+
...details,
|
|
393
|
+
message: typeof message === "string" ? message : String(message),
|
|
394
|
+
...typeof type === "string" ? { type } : {}
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
function normalizeSpanAttributes(attributes) {
|
|
398
|
+
return normalizeMetadata(attributes);
|
|
399
|
+
}
|
|
400
|
+
function createGenAiUsageAttributes(usage, options = {}) {
|
|
401
|
+
return {
|
|
402
|
+
"gen_ai.provider.name": usage?.provider ?? options.provider,
|
|
403
|
+
"gen_ai.request.model": usage?.model,
|
|
404
|
+
"gen_ai.response.model": usage?.model,
|
|
405
|
+
"gen_ai.usage.input_tokens": usage?.inputTokens,
|
|
406
|
+
"gen_ai.usage.output_tokens": usage?.outputTokens,
|
|
407
|
+
"gen_ai.usage.reasoning.output_tokens": usage?.reasoningTokens
|
|
408
|
+
};
|
|
409
|
+
}
|
|
223
410
|
function toolCalls(session) {
|
|
224
411
|
return session.messages.flatMap((message) => message.toolCalls ?? []);
|
|
225
412
|
}
|
|
413
|
+
function createToolCallSpans(calls, options = {}) {
|
|
414
|
+
return calls.map((call, index) => {
|
|
415
|
+
const spanError = call.error ? normalizeSpanError(call.error) : void 0;
|
|
416
|
+
const spanId = options.spanIdPrefix ? `${options.spanIdPrefix}:${index + 1}` : call.id;
|
|
417
|
+
return {
|
|
418
|
+
...spanId ? { id: spanId } : {},
|
|
419
|
+
...options.traceId ? { traceId: options.traceId } : {},
|
|
420
|
+
...options.parentId ? { parentId: options.parentId } : {},
|
|
421
|
+
name: call.name,
|
|
422
|
+
kind: "tool",
|
|
423
|
+
...call.startedAt ? { startedAt: call.startedAt } : {},
|
|
424
|
+
...call.finishedAt ? { finishedAt: call.finishedAt } : {},
|
|
425
|
+
...call.durationMs !== void 0 ? { durationMs: call.durationMs } : {},
|
|
426
|
+
status: spanError ? "error" : "ok",
|
|
427
|
+
...spanError ? { error: spanError } : {},
|
|
428
|
+
attributes: normalizeSpanAttributes({
|
|
429
|
+
"gen_ai.operation.name": "execute_tool",
|
|
430
|
+
"gen_ai.tool.name": call.name,
|
|
431
|
+
"gen_ai.tool.type": "function",
|
|
432
|
+
...call.id ? { "gen_ai.tool.call.id": call.id } : {},
|
|
433
|
+
...call.arguments !== void 0 ? { "gen_ai.tool.call.arguments": call.arguments } : {},
|
|
434
|
+
...call.result !== void 0 ? { "gen_ai.tool.call.result": call.result } : {}
|
|
435
|
+
})
|
|
436
|
+
};
|
|
437
|
+
});
|
|
438
|
+
}
|
|
439
|
+
function ensureRunTrace(run, options) {
|
|
440
|
+
if (spans(run).length > 0) {
|
|
441
|
+
return void 0;
|
|
442
|
+
}
|
|
443
|
+
const traceId = options.id ?? createGeneratedTraceId();
|
|
444
|
+
const rootSpanId = `${traceId}:run`;
|
|
445
|
+
const durationMs = options.finishedAt.getTime() - options.startedAt.getTime();
|
|
446
|
+
const rootError = run.errors.length > 0 ? normalizeSpanError(run.errors[0]) : void 0;
|
|
447
|
+
const runSpan = {
|
|
448
|
+
id: rootSpanId,
|
|
449
|
+
traceId,
|
|
450
|
+
name: options.name,
|
|
451
|
+
kind: "run",
|
|
452
|
+
startedAt: options.startedAt.toISOString(),
|
|
453
|
+
finishedAt: options.finishedAt.toISOString(),
|
|
454
|
+
durationMs,
|
|
455
|
+
status: rootError ? "error" : "ok",
|
|
456
|
+
...rootError ? { error: rootError } : {},
|
|
457
|
+
attributes: normalizeSpanAttributes({
|
|
458
|
+
"gen_ai.operation.name": options.operationName ?? "invoke_workflow",
|
|
459
|
+
"gen_ai.workflow.name": options.name,
|
|
460
|
+
...createGenAiUsageAttributes(run.usage)
|
|
461
|
+
})
|
|
462
|
+
};
|
|
463
|
+
const toolSpans = createToolCallSpans(toolCalls(run.session), {
|
|
464
|
+
traceId,
|
|
465
|
+
parentId: rootSpanId,
|
|
466
|
+
spanIdPrefix: `${traceId}:tool`
|
|
467
|
+
});
|
|
468
|
+
const trace = {
|
|
469
|
+
id: traceId,
|
|
470
|
+
name: options.name,
|
|
471
|
+
startedAt: options.startedAt.toISOString(),
|
|
472
|
+
finishedAt: options.finishedAt.toISOString(),
|
|
473
|
+
durationMs,
|
|
474
|
+
...options.source ? { metadata: { source: options.source } } : {},
|
|
475
|
+
spans: [runSpan, ...toolSpans]
|
|
476
|
+
};
|
|
477
|
+
run.traces = [trace];
|
|
478
|
+
return trace;
|
|
479
|
+
}
|
|
480
|
+
var nextGeneratedTraceId = 0;
|
|
481
|
+
function createGeneratedTraceId() {
|
|
482
|
+
nextGeneratedTraceId += 1;
|
|
483
|
+
return `trace_${nextGeneratedTraceId}`;
|
|
484
|
+
}
|
|
485
|
+
function spans(run) {
|
|
486
|
+
return (run.traces ?? []).flatMap((trace) => trace.spans);
|
|
487
|
+
}
|
|
488
|
+
function spansByKind(run, kind) {
|
|
489
|
+
return spans(run).filter((span) => span.kind === kind);
|
|
490
|
+
}
|
|
491
|
+
function failedSpans(run) {
|
|
492
|
+
return spans(run).filter((span) => span.status === "error" || span.error);
|
|
493
|
+
}
|
|
226
494
|
function messagesByRole(session, role) {
|
|
227
495
|
return session.messages.filter((message) => message.role === role);
|
|
228
496
|
}
|
|
@@ -288,7 +556,12 @@ function serializeError(error) {
|
|
|
288
556
|
0 && (module.exports = {
|
|
289
557
|
assistantMessages,
|
|
290
558
|
attachHarnessRunToError,
|
|
559
|
+
createFailedHarnessRun,
|
|
560
|
+
createGenAiUsageAttributes,
|
|
291
561
|
createHarness,
|
|
562
|
+
createToolCallSpans,
|
|
563
|
+
ensureRunTrace,
|
|
564
|
+
failedSpans,
|
|
292
565
|
getHarnessRunFromError,
|
|
293
566
|
hasCallableMethod,
|
|
294
567
|
isHarnessRun,
|
|
@@ -299,8 +572,12 @@ function serializeError(error) {
|
|
|
299
572
|
normalizeHarnessRun,
|
|
300
573
|
normalizeMetadata,
|
|
301
574
|
normalizeRecord,
|
|
575
|
+
normalizeSpanAttributes,
|
|
576
|
+
normalizeSpanError,
|
|
302
577
|
resolveHarnessRunErrors,
|
|
303
578
|
serializeError,
|
|
579
|
+
spans,
|
|
580
|
+
spansByKind,
|
|
304
581
|
systemMessages,
|
|
305
582
|
toJsonValue,
|
|
306
583
|
toolCalls,
|
package/dist/harness.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/harness.ts"],"sourcesContent":["/** Primitive scalar values allowed in normalized JSON-safe eval data. */\nexport type JsonPrimitive = string | number | boolean | null;\n\n/** JSON-safe value shape used by normalized sessions, artifacts, and errors. */\nexport type JsonValue =\n | JsonPrimitive\n | JsonValue[]\n | { [key: string]: JsonValue };\n\n/**\n * Normalized record for one tool call observed during a harness run.\n *\n * @example\n * ```ts\n * const call: ToolCallRecord = {\n * name: \"lookupInvoice\",\n * arguments: { invoiceId: \"inv_123\" },\n * result: { refundable: true },\n * };\n * ```\n */\nexport type ToolCallRecord = {\n /** Provider or runtime tool-call id when one is available. */\n id?: string;\n /** Tool name as exposed to the agent or application runtime. */\n name: string;\n /** JSON-safe tool arguments after provider/runtime normalization. */\n arguments?: Record<string, JsonValue>;\n /** JSON-safe tool result returned by the application tool. */\n result?: JsonValue;\n /** Normalized tool error when execution failed. */\n error?: {\n message: string;\n type?: string;\n [key: string]: JsonValue | undefined;\n };\n /** ISO timestamp for the start of tool execution. */\n startedAt?: string;\n /** ISO timestamp for the end of tool execution. */\n finishedAt?: string;\n /** Tool execution duration in milliseconds. */\n durationMs?: number;\n /** Extra JSON-safe tool metadata for reporters and custom judges. */\n metadata?: Record<string, JsonValue>;\n};\n\n/**\n * Normalized message recorded in a harness session transcript.\n *\n * @example\n * ```ts\n * const message: NormalizedMessage = {\n * role: \"assistant\",\n * content: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\" }],\n * };\n * ```\n */\nexport type NormalizedMessage = {\n /** Transcript role for the normalized message. */\n role: \"system\" | \"user\" | \"assistant\" | \"tool\";\n /** JSON-safe message content. */\n content?: JsonValue;\n /** Tool calls associated with this message. */\n toolCalls?: ToolCallRecord[];\n /** Extra JSON-safe message metadata. */\n metadata?: Record<string, JsonValue>;\n};\n\n/**\n * Provider usage summary attached to a normalized harness run.\n *\n * @example\n * ```ts\n * const usage: UsageSummary = {\n * provider: \"openai\",\n * model: \"gpt-4o-mini\",\n * inputTokens: 212,\n * outputTokens: 48,\n * totalTokens: 260,\n * };\n * ```\n */\nexport type UsageSummary = {\n /** Provider that served the application run. */\n provider?: string;\n /** Model used for the application run. */\n model?: string;\n /** Input, prompt, or request tokens consumed by the run. */\n inputTokens?: number;\n /** Output or completion tokens produced by the run. */\n outputTokens?: number;\n /** Reasoning tokens reported by providers that expose them. */\n reasoningTokens?: number;\n /** Total token count reported by the provider or adapter. */\n totalTokens?: number;\n /** Count of tool calls observed during the run. */\n toolCalls?: number;\n /** Retry count observed during the run. */\n retries?: number;\n /** Provider-specific JSON-safe usage details. Cost estimates belong here. */\n metadata?: Record<string, JsonValue>;\n};\n\n/** Timing summary attached to a normalized harness run. */\nexport type TimingSummary = {\n /** End-to-end run duration in milliseconds. */\n totalMs?: number;\n /** Extra JSON-safe timing metadata. */\n metadata?: Record<string, JsonValue>;\n};\n\n/**\n * JSON-serializable transcript produced by the system under test.\n *\n * @example\n * ```ts\n * const session: NormalizedSession = {\n * provider: \"openai\",\n * model: \"gpt-4o-mini\",\n * messages: [\n * { role: \"user\", content: \"Refund invoice inv_123\" },\n * { role: \"assistant\", content: { status: \"approved\" } },\n * ],\n * };\n * ```\n */\nexport type NormalizedSession = {\n /** Ordered normalized transcript messages. */\n messages: NormalizedMessage[];\n /** Provider that produced the session when known. */\n provider?: string;\n /** Model that produced the session when known. */\n model?: string;\n /** Extra JSON-safe session metadata. */\n metadata?: Record<string, JsonValue>;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/**\n * Normalized result returned by every harness execution.\n *\n * @example\n * ```ts\n * const run: HarnessRun<{ status: \"approved\" }> = {\n * output: { status: \"approved\" },\n * session: {\n * messages: [\n * { role: \"user\", content: \"Refund invoice inv_123\" },\n * { role: \"assistant\", content: { status: \"approved\" } },\n * ],\n * },\n * usage: { totalTokens: 260 },\n * errors: [],\n * };\n * ```\n */\nexport type HarnessRun<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n /** Normalized transcript and provider/session metadata. */\n session: NormalizedSession;\n /** Stable provider usage units such as tokens, tools, and retries. */\n usage: UsageSummary;\n /** Optional timing summary for the run. */\n timings?: TimingSummary;\n /** JSON-safe run artifacts captured by the harness or test context. */\n artifacts?: Record<string, JsonValue>;\n /** Normalized errors captured during execution. */\n errors: Array<Record<string, JsonValue>>;\n};\n\n/** Error value with an attached partial or complete normalized harness run. */\nexport type HarnessRunError = Error & {\n /** Attached normalized harness run recovered by `getHarnessRunFromError(...)`. */\n vitestEvalsRun: HarnessRun;\n};\n\n/** Per-run metadata shape accepted by harnesses and eval tests. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/**\n * Runtime context passed from the eval fixture into a harness run.\n *\n * @example\n * ```ts\n * const harness: Harness<string> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * context.setArtifact(\"inputLength\", input.length);\n *\n * return {\n * output: undefined,\n * session: { messages: [{ role: \"user\", content: input }] },\n * usage: {},\n * errors: [],\n * };\n * },\n * };\n * ```\n */\nexport type HarnessContext<\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Per-run metadata passed through `run(input, { metadata })`. */\n metadata: Readonly<TMetadata>;\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable JSON-safe artifact bag shared with the harness. */\n artifacts: Record<string, JsonValue>;\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/**\n * Adapter that executes the system under test and returns a normalized run.\n *\n * @example\n * ```ts\n * const harness: Harness<string, { status: \"approved\" | \"denied\" }> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * return normalizeHarnessRun(input, await runRefundFlow(input), context);\n * },\n * };\n * ```\n */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes the system under test and returns a normalized run. */\n run: (\n input: TInput,\n context: HarnessContext<TMetadata>,\n ) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n /** Raw tool arguments accepted by `createHarness(...)` before normalization. */\n arguments?: unknown;\n /** Raw tool result accepted by `createHarness(...)` before normalization. */\n result?: unknown;\n /** Raw tool error accepted by `createHarness(...)` before normalization. */\n error?: unknown;\n /** Raw tool metadata accepted by `createHarness(...)` before normalization. */\n metadata?: Record<string, unknown>;\n};\n\n/**\n * Lightweight result shape normalized by `createHarness(...)`.\n *\n * @example\n * ```ts\n * const result: SimpleHarnessResult<{ status: \"approved\" }> = {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { totalTokens: 260 },\n * };\n * ```\n */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n /** Pre-normalized transcript messages. When omitted, a default user/assistant transcript is created. */\n messages?: NormalizedMessage[];\n /** Lightweight tool-call records to normalize into the session. */\n toolCalls?: SimpleToolCallRecord[];\n /** Usage summary to attach to the run. */\n usage?: UsageSummary;\n /** Timing summary to attach to the run. */\n timings?: TimingSummary;\n /** Raw artifact values to normalize and merge into the run. */\n artifacts?: Record<string, unknown>;\n /** Raw session metadata to normalize into the session. */\n metadata?: Record<string, unknown>;\n /** Raw errors to normalize into the run. */\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {\n /** Original input passed to `run(input)`. */\n input: TInput;\n /** Read-only metadata passed to `run(input, { metadata })`. */\n metadata: Readonly<TMetadata>;\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable run artifact bag. */\n artifacts: HarnessContext<TMetadata>[\"artifacts\"];\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: HarnessContext<TMetadata>[\"setArtifact\"];\n};\n\n/**\n * Options for creating a lightweight custom application harness.\n *\n * @example\n * ```ts\n * const options: CreateHarnessOptions<string, { status: \"approved\" }> = {\n * name: \"refund-agent\",\n * run: async ({ input }) => ({\n * output: await classifyRefund(input),\n * }),\n * };\n * ```\n */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes application code and returns either a lightweight result or full `HarnessRun`. */\n run: (\n args: CreateHarnessRunArgs<TInput, TMetadata>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"number\" ||\n typeof value === \"boolean\"\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[]): JsonValue[] {\n return value.map((item) => {\n const normalized = toJsonValue(item);\n return normalized === undefined ? null : normalized;\n });\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValue(entryValue);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value);\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/**\n * Creates a harness from the common \"run app code and return output\" shape.\n *\n * @param options - Harness name plus the callback that executes app code.\n *\n * @example\n * ```ts\n * import { createHarness } from \"vitest-evals\";\n *\n * export const refundHarness = createHarness<\n * string,\n * { status: \"approved\" | \"denied\" },\n * { expected: { status: \"approved\" | \"denied\" } }\n * >({\n * name: \"refund-agent\",\n * run: async ({ input, metadata, setArtifact }) => {\n * const result = await runRefundFlow(input, metadata);\n * const output = { status: result.status };\n *\n * setArtifact(\"case\", { expected: metadata.expected.status });\n *\n * return {\n * output,\n * toolCalls: result.toolCalls,\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * };\n * },\n * });\n * ```\n */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata> {\n const harness: Harness<TInput, TOutput, TMetadata> = {\n name: options.name,\n run: async (input, context) => {\n const result = await options.run({\n input,\n metadata: context.metadata,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n\n return normalizeHarnessRun(input, result, context);\n },\n };\n\n return harness;\n}\n\n/**\n * Normalizes a lightweight harness result into the reporter-facing run shape.\n *\n * @param input - Original input passed to the harness.\n * @param result - Lightweight result or pre-normalized harness run.\n * @param context - Optional per-run context used to merge artifacts.\n *\n * @example\n * ```ts\n * const run = normalizeHarnessRun(\"Refund invoice inv_123\", {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * });\n *\n * expect(toolCalls(run.session)).toHaveLength(1);\n * ```\n */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext<TMetadata>,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\n/**\n * Flattens every recorded tool call from a normalized session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const names = toolCalls(result.session).map((call) => call.name);\n *\n * expect(names).toEqual([\"lookupInvoice\", \"createRefund\"]);\n * ```\n */\nexport function toolCalls(session: NormalizedSession): ToolCallRecord[] {\n return session.messages.flatMap((message) => message.toolCalls ?? []);\n}\n\n/**\n * Filters normalized session messages by role.\n *\n * @param session - Normalized session produced by a harness run.\n * @param role - Message role to keep.\n *\n * @example\n * ```ts\n * const assistantText = messagesByRole(result.session, \"assistant\")\n * .map((message) => message.content)\n * .join(\"\\n\");\n * ```\n */\nexport function messagesByRole(\n session: NormalizedSession,\n role: NormalizedMessage[\"role\"],\n): NormalizedMessage[] {\n return session.messages.filter((message) => message.role === role);\n}\n\nfunction hasNonEmptyMessageContent(message: NormalizedMessage) {\n return (\n message.content !== undefined &&\n (typeof message.content !== \"string\" || message.content.trim().length > 0)\n );\n}\n\n/**\n * Returns every normalized system message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const systemPrompts = systemMessages(result.session);\n * ```\n */\nexport function systemMessages(session: NormalizedSession) {\n return messagesByRole(session, \"system\");\n}\n\n/**\n * Returns every normalized user message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const firstPrompt = userMessages(result.session)[0]?.content;\n * ```\n */\nexport function userMessages(session: NormalizedSession) {\n return messagesByRole(session, \"user\");\n}\n\n/**\n * Returns every normalized assistant message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const finalAnswer = assistantMessages(result.session).at(-1)?.content;\n * ```\n */\nexport function assistantMessages(session: NormalizedSession) {\n return messagesByRole(session, \"assistant\");\n}\n\n/**\n * Returns the latest assistant message content, ignoring empty text messages.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const finalAnswer = latestAssistantMessageContent(result.session);\n * ```\n */\nexport function latestAssistantMessageContent(session: NormalizedSession) {\n return [...assistantMessages(session)]\n .reverse()\n .find(hasNonEmptyMessageContent)?.content;\n}\n\n/**\n * Returns every normalized tool message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const toolOutputs = toolMessages(result.session).map((message) => message.content);\n * ```\n */\nexport function toolMessages(session: NormalizedSession) {\n return messagesByRole(session, \"tool\");\n}\n\n/**\n * Attaches a partial or complete harness run to an arbitrary thrown error.\n *\n * @param error - Thrown value to wrap.\n * @param run - Partial or complete normalized harness run to preserve.\n *\n * @example\n * ```ts\n * try {\n * return await runAgent(input);\n * } catch (error) {\n * throw attachHarnessRunToError(error, partialRun);\n * }\n * ```\n */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/**\n * Reads an attached harness run back off a previously wrapped error value.\n *\n * @param error - Unknown thrown value that may contain a harness run.\n *\n * @example\n * ```ts\n * const partialRun = getHarnessRunFromError(error);\n *\n * if (partialRun) {\n * console.log(toolCalls(partialRun.session));\n * }\n * ```\n */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAiVA,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,YACjB,OAAO,UAAU;AAErB;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAA+B;AACzD,SAAO,MAAM,IAAI,CAAC,SAAS;AACzB,UAAM,aAAa,YAAY,IAAI;AACnC,WAAO,eAAe,SAAY,OAAO;AAAA,EAC3C,CAAC;AACH;AAEA,SAAS,oBACP,OAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,aAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,UAAM,QAAQ,YAAY,UAAU;AACpC,QAAI,UAAU,QAAW;AACvB,iBAAW,GAAG,IAAI;AAAA,IACpB;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,KAAK;AAAA,EACjC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,KAAK;AAAA,EAClC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,KAAK;AAClC;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAuCO,SAAS,cAKd,SACqC;AACrC,QAAM,UAA+C;AAAA,IACnD,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,QAC/B;AAAA,QACA,UAAU,QAAQ;AAAA,QAClB,QAAQ,QAAQ;AAAA,QAChB,WAAW,QAAQ;AAAA,QACnB,aAAa,QAAQ;AAAA,MACvB,CAAC;AAED,aAAO,oBAAoB,OAAO,QAAQ,OAAO;AAAA,IACnD;AAAA,EACF;AAEA,SAAO;AACT;AAoBO,SAAS,oBAKd,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMA,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAcO,SAAS,UAAU,SAA8C;AACtE,SAAO,QAAQ,SAAS,QAAQ,CAAC,YAAY,QAAQ,aAAa,CAAC,CAAC;AACtE;AAeO,SAAS,eACd,SACA,MACqB;AACrB,SAAO,QAAQ,SAAS,OAAO,CAAC,YAAY,QAAQ,SAAS,IAAI;AACnE;AAEA,SAAS,0BAA0B,SAA4B;AAC7D,SACE,QAAQ,YAAY,WACnB,OAAO,QAAQ,YAAY,YAAY,QAAQ,QAAQ,KAAK,EAAE,SAAS;AAE5E;AAYO,SAAS,eAAe,SAA4B;AACzD,SAAO,eAAe,SAAS,QAAQ;AACzC;AAYO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAYO,SAAS,kBAAkB,SAA4B;AAC5D,SAAO,eAAe,SAAS,WAAW;AAC5C;AAYO,SAAS,8BAA8B,SAA4B;AACxE,SAAO,CAAC,GAAG,kBAAkB,OAAO,CAAC,EAClC,QAAQ,EACR,KAAK,yBAAyB,GAAG;AACtC;AAYO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAiBO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAgBO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["toolCalls"]}
|
|
1
|
+
{"version":3,"sources":["../src/harness.ts"],"sourcesContent":["/** Primitive scalar values allowed in normalized JSON-safe eval data. */\nexport type JsonPrimitive = string | number | boolean | null;\n\n/** JSON-safe value shape used by normalized sessions, artifacts, and errors. */\nexport type JsonValue =\n | JsonPrimitive\n | JsonValue[]\n | { [key: string]: JsonValue };\n\n/** Well-known OpenTelemetry GenAI operation names. */\nexport type GenAiOperationName =\n | \"chat\"\n | \"create_agent\"\n | \"embeddings\"\n | \"execute_tool\"\n | \"generate_content\"\n | \"invoke_agent\"\n | \"invoke_workflow\"\n | \"retrieval\"\n | \"text_completion\"\n | (string & {});\n\n/** Well-known OpenTelemetry GenAI output content types. */\nexport type GenAiOutputType =\n | \"image\"\n | \"json\"\n | \"speech\"\n | \"text\"\n | (string & {});\n\n/** Well-known OpenTelemetry GenAI provider names. */\nexport type GenAiProviderName =\n | \"anthropic\"\n | \"aws.bedrock\"\n | \"azure.ai.inference\"\n | \"azure.ai.openai\"\n | \"cohere\"\n | \"deepseek\"\n | \"gcp.gemini\"\n | \"gcp.gen_ai\"\n | \"gcp.vertex_ai\"\n | \"groq\"\n | \"ibm.watsonx.ai\"\n | \"mistral_ai\"\n | \"openai\"\n | \"perplexity\"\n | \"x_ai\"\n | (string & {});\n\n/** Well-known OpenTelemetry GenAI token types. */\nexport type GenAiTokenType = \"input\" | \"output\" | (string & {});\n\n/** Well-known OpenTelemetry GenAI tool execution types. */\nexport type GenAiToolType =\n | \"datastore\"\n | \"extension\"\n | \"function\"\n | (string & {});\n\n/** Typed subset of OpenTelemetry GenAI semantic attributes. */\nexport type GenAiSemanticAttributes = {\n \"gen_ai.agent.description\"?: string;\n \"gen_ai.agent.id\"?: string;\n \"gen_ai.agent.name\"?: string;\n \"gen_ai.agent.version\"?: string;\n \"gen_ai.conversation.id\"?: string;\n \"gen_ai.data_source.id\"?: string;\n \"gen_ai.embeddings.dimension.count\"?: number;\n \"gen_ai.evaluation.explanation\"?: string;\n \"gen_ai.evaluation.name\"?: string;\n \"gen_ai.evaluation.score.label\"?: string;\n \"gen_ai.evaluation.score.value\"?: number;\n \"gen_ai.input.messages\"?: JsonValue;\n \"gen_ai.operation.name\"?: GenAiOperationName;\n \"gen_ai.output.messages\"?: JsonValue;\n \"gen_ai.output.type\"?: GenAiOutputType;\n \"gen_ai.prompt.name\"?: string;\n \"gen_ai.provider.name\"?: GenAiProviderName;\n \"gen_ai.request.choice.count\"?: number;\n \"gen_ai.request.encoding_formats\"?: string[];\n \"gen_ai.request.frequency_penalty\"?: number;\n \"gen_ai.request.max_tokens\"?: number;\n \"gen_ai.request.model\"?: string;\n \"gen_ai.request.presence_penalty\"?: number;\n \"gen_ai.request.seed\"?: number;\n \"gen_ai.request.stop_sequences\"?: string[];\n \"gen_ai.request.stream\"?: boolean;\n \"gen_ai.request.temperature\"?: number;\n \"gen_ai.request.top_k\"?: number;\n \"gen_ai.request.top_p\"?: number;\n \"gen_ai.response.finish_reasons\"?: string[];\n \"gen_ai.response.id\"?: string;\n \"gen_ai.response.model\"?: string;\n \"gen_ai.response.time_to_first_chunk\"?: number;\n \"gen_ai.retrieval.documents\"?: JsonValue;\n \"gen_ai.retrieval.query.text\"?: string;\n \"gen_ai.system_instructions\"?: JsonValue;\n \"gen_ai.token.type\"?: GenAiTokenType;\n \"gen_ai.tool.call.arguments\"?: JsonValue;\n \"gen_ai.tool.call.id\"?: string;\n \"gen_ai.tool.call.result\"?: JsonValue;\n \"gen_ai.tool.definitions\"?: JsonValue;\n \"gen_ai.tool.description\"?: string;\n \"gen_ai.tool.name\"?: string;\n \"gen_ai.tool.type\"?: GenAiToolType;\n \"gen_ai.usage.cache_creation.input_tokens\"?: number;\n \"gen_ai.usage.cache_read.input_tokens\"?: number;\n \"gen_ai.usage.input_tokens\"?: number;\n \"gen_ai.usage.output_tokens\"?: number;\n \"gen_ai.usage.reasoning.output_tokens\"?: number;\n \"gen_ai.workflow.name\"?: string;\n};\n\n/** Attribute keys defined by the OpenTelemetry GenAI semantic conventions. */\nexport type GenAiSemanticAttributeKey = keyof GenAiSemanticAttributes;\n\n/** Typed OpenTelemetry semantic attributes accepted on normalized spans. */\nexport type OpenTelemetrySemanticAttributes = GenAiSemanticAttributes & {\n \"error.type\"?: string;\n \"server.address\"?: string;\n \"server.port\"?: number;\n};\n\n/** Known OpenTelemetry semantic attribute keys accepted on normalized spans. */\nexport type OpenTelemetrySemanticAttributeKey =\n keyof OpenTelemetrySemanticAttributes;\n\n/** Attribute keys accepted on normalized spans. */\nexport type NormalizedSpanAttributeKey =\n | OpenTelemetrySemanticAttributeKey\n | (string & {});\n\n/**\n * JSON-safe span attributes. Known OpenTelemetry GenAI keys are typed while\n * custom provider and application keys remain allowed.\n */\nexport type NormalizedSpanAttributes = OpenTelemetrySemanticAttributes & {\n [key: string]: JsonValue | undefined;\n};\n\n/** Event attached to one normalized span. */\nexport type NormalizedSpanEvent = {\n /** Event name emitted by the runtime or harness. */\n name: string;\n /** ISO timestamp for the event when available. */\n timestamp?: string;\n /** JSON-safe event attributes. */\n attributes?: NormalizedSpanAttributes;\n};\n\n/** Normalized operation span captured during a harness run. */\nexport type NormalizedSpan = {\n /** Runtime or provider span id when one is available. */\n id?: string;\n /** Trace id this span belongs to. */\n traceId?: string;\n /** Parent span id when the runtime exposes hierarchy. */\n parentId?: string;\n /** Human-readable operation name. */\n name: string;\n /** Coarse operation kind used by reporters and judges. */\n kind?:\n | \"run\"\n | \"agent\"\n | \"model\"\n | \"tool\"\n | \"guardrail\"\n | \"handoff\"\n | \"custom\";\n /** ISO timestamp for the start of the span. */\n startedAt?: string;\n /** ISO timestamp for the end of the span. */\n finishedAt?: string;\n /** Span duration in milliseconds. */\n durationMs?: number;\n /** Success or failure status for the span. */\n status?: \"ok\" | \"error\";\n /** Normalized error when the span failed. */\n error?: {\n message: string;\n type?: string;\n [key: string]: JsonValue | undefined;\n };\n /** JSON-safe operation attributes. */\n attributes?: NormalizedSpanAttributes;\n /** Events observed inside this span. */\n events?: NormalizedSpanEvent[];\n};\n\n/** Normalized trace captured during a harness run. */\nexport type NormalizedTrace = {\n /** Runtime or provider trace id when one is available. */\n id?: string;\n /** Human-readable trace or workflow name. */\n name?: string;\n /** ISO timestamp for the start of the trace. */\n startedAt?: string;\n /** ISO timestamp for the end of the trace. */\n finishedAt?: string;\n /** Trace duration in milliseconds. */\n durationMs?: number;\n /** Extra JSON-safe trace metadata. */\n metadata?: Record<string, JsonValue>;\n /** Spans that make up this trace. */\n spans: NormalizedSpan[];\n};\n\n/** Options for converting normalized tool calls into trace spans. */\nexport type CreateToolCallSpansOptions = {\n /** Trace id to attach to each generated tool span. */\n traceId?: string;\n /** Parent span id to attach to each generated tool span. */\n parentId?: string;\n /** Prefix used to create internal span ids instead of reusing tool-call ids. */\n spanIdPrefix?: string;\n};\n\n/** Options for attaching a fallback run trace to a harness result. */\nexport type EnsureRunTraceOptions = {\n /** Human-readable run or harness name. */\n name: string;\n /** Wall-clock start time for the harness run. */\n startedAt: Date;\n /** Wall-clock finish time for the harness run. */\n finishedAt: Date;\n /** Optional trace id. A generated id is used when omitted. */\n id?: string;\n /** GenAI operation name to place on the root run span. */\n operationName?: GenAiOperationName;\n /** Optional JSON-safe source marker for the trace metadata. */\n source?: string;\n};\n\n/**\n * Normalized record for one tool call observed during a harness run.\n *\n * @example\n * ```ts\n * const call: ToolCallRecord = {\n * name: \"lookupInvoice\",\n * arguments: { invoiceId: \"inv_123\" },\n * result: { refundable: true },\n * };\n * ```\n */\nexport type ToolCallRecord = {\n /** Provider or runtime tool-call id when one is available. */\n id?: string;\n /** Tool name as exposed to the agent or application runtime. */\n name: string;\n /** JSON-safe tool arguments after provider/runtime normalization. */\n arguments?: Record<string, JsonValue>;\n /** JSON-safe tool result returned by the application tool. */\n result?: JsonValue;\n /** Normalized tool error when execution failed. */\n error?: {\n message: string;\n type?: string;\n [key: string]: JsonValue | undefined;\n };\n /** ISO timestamp for the start of tool execution. */\n startedAt?: string;\n /** ISO timestamp for the end of tool execution. */\n finishedAt?: string;\n /** Tool execution duration in milliseconds. */\n durationMs?: number;\n /** Extra JSON-safe tool metadata for reporters and custom judges. */\n metadata?: Record<string, JsonValue>;\n};\n\n/**\n * Normalized message recorded in a harness session transcript.\n *\n * @example\n * ```ts\n * const message: NormalizedMessage = {\n * role: \"assistant\",\n * content: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\" }],\n * };\n * ```\n */\nexport type NormalizedMessage = {\n /** Transcript role for the normalized message. */\n role: \"system\" | \"user\" | \"assistant\" | \"tool\";\n /** JSON-safe message content. */\n content?: JsonValue;\n /** Tool calls associated with this message. */\n toolCalls?: ToolCallRecord[];\n /** Extra JSON-safe message metadata. */\n metadata?: Record<string, JsonValue>;\n};\n\n/**\n * Provider usage summary attached to a normalized harness run.\n *\n * @example\n * ```ts\n * const usage: UsageSummary = {\n * provider: \"openai\",\n * model: \"gpt-4o-mini\",\n * inputTokens: 212,\n * outputTokens: 48,\n * totalTokens: 260,\n * };\n * ```\n */\nexport type UsageSummary = {\n /** Provider that served the application run. */\n provider?: string;\n /** Model used for the application run. */\n model?: string;\n /** Input, prompt, or request tokens consumed by the run. */\n inputTokens?: number;\n /** Output or completion tokens produced by the run. */\n outputTokens?: number;\n /** Reasoning tokens reported by providers that expose them. */\n reasoningTokens?: number;\n /** Total token count reported by the provider or adapter. */\n totalTokens?: number;\n /** Count of tool calls observed during the run. */\n toolCalls?: number;\n /** Retry count observed during the run. */\n retries?: number;\n /** Provider-specific JSON-safe usage details. Cost estimates belong here. */\n metadata?: Record<string, JsonValue>;\n};\n\n/** Timing summary attached to a normalized harness run. */\nexport type TimingSummary = {\n /** End-to-end run duration in milliseconds. */\n totalMs?: number;\n /** Extra JSON-safe timing metadata. */\n metadata?: Record<string, JsonValue>;\n};\n\n/**\n * JSON-serializable transcript produced by the system under test.\n *\n * @example\n * ```ts\n * const session: NormalizedSession = {\n * provider: \"openai\",\n * model: \"gpt-4o-mini\",\n * messages: [\n * { role: \"user\", content: \"Refund invoice inv_123\" },\n * { role: \"assistant\", content: { status: \"approved\" } },\n * ],\n * };\n * ```\n */\nexport type NormalizedSession = {\n /** Ordered normalized transcript messages. */\n messages: NormalizedMessage[];\n /** Provider that produced the session when known. */\n provider?: string;\n /** Model that produced the session when known. */\n model?: string;\n /** Extra JSON-safe session metadata. */\n metadata?: Record<string, JsonValue>;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/**\n * Normalized result returned by every harness execution.\n *\n * @example\n * ```ts\n * const run: HarnessRun<{ status: \"approved\" }> = {\n * output: { status: \"approved\" },\n * session: {\n * messages: [\n * { role: \"user\", content: \"Refund invoice inv_123\" },\n * { role: \"assistant\", content: { status: \"approved\" } },\n * ],\n * },\n * usage: { totalTokens: 260 },\n * errors: [],\n * };\n * ```\n */\nexport type HarnessRun<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n /** Normalized transcript and provider/session metadata. */\n session: NormalizedSession;\n /** Stable provider usage units such as tokens, tools, and retries. */\n usage: UsageSummary;\n /** Optional timing summary for the run. */\n timings?: TimingSummary;\n /** JSON-safe run artifacts captured by the harness or test context. */\n artifacts?: Record<string, JsonValue>;\n /** Normalized traces and spans captured during execution. */\n traces?: NormalizedTrace[];\n /** Normalized errors captured during execution. */\n errors: Array<Record<string, JsonValue>>;\n};\n\n/** Error value with an attached partial or complete normalized harness run. */\nexport type HarnessRunError = Error & {\n /** Attached normalized harness run recovered by `getHarnessRunFromError(...)`. */\n vitestEvalsRun: HarnessRun;\n};\n\n/** Per-run metadata shape accepted by harnesses and eval tests. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/**\n * Runtime context passed from the eval fixture into a harness run.\n *\n * @example\n * ```ts\n * const harness: Harness<string> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * context.setArtifact(\"inputLength\", input.length);\n *\n * return {\n * output: undefined,\n * session: { messages: [{ role: \"user\", content: input }] },\n * usage: {},\n * errors: [],\n * };\n * },\n * };\n * ```\n */\nexport type HarnessContext<\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Per-run metadata passed through `run(input, { metadata })`. */\n metadata: Readonly<TMetadata>;\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable JSON-safe artifact bag shared with the harness. */\n artifacts: Record<string, JsonValue>;\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/**\n * Adapter that executes the system under test and returns a normalized run.\n *\n * @example\n * ```ts\n * const harness: Harness<string, { status: \"approved\" | \"denied\" }> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * return normalizeHarnessRun(input, await runRefundFlow(input), context);\n * },\n * };\n * ```\n */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes the system under test and returns a normalized run. */\n run: (\n input: TInput,\n context: HarnessContext<TMetadata>,\n ) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n /** Raw tool arguments accepted by `createHarness(...)` before normalization. */\n arguments?: unknown;\n /** Raw tool result accepted by `createHarness(...)` before normalization. */\n result?: unknown;\n /** Raw tool error accepted by `createHarness(...)` before normalization. */\n error?: unknown;\n /** Raw tool metadata accepted by `createHarness(...)` before normalization. */\n metadata?: Record<string, unknown>;\n};\n\n/** Lightweight span event accepted by `createHarness(...)` results. */\nexport type SimpleSpanEvent = Omit<NormalizedSpanEvent, \"attributes\"> & {\n /** Raw event attributes accepted by `createHarness(...)` before normalization. */\n attributes?: Record<string, unknown>;\n};\n\n/** Lightweight span record accepted by `createHarness(...)` results. */\nexport type SimpleSpanRecord = Omit<\n NormalizedSpan,\n \"attributes\" | \"error\" | \"events\"\n> & {\n /** Raw span attributes accepted by `createHarness(...)` before normalization. */\n attributes?: Record<string, unknown>;\n /** Raw span error accepted by `createHarness(...)` before normalization. */\n error?: unknown;\n /** Raw span events accepted by `createHarness(...)` before normalization. */\n events?: SimpleSpanEvent[];\n};\n\n/** Lightweight trace record accepted by `createHarness(...)` results. */\nexport type SimpleTraceRecord = Omit<NormalizedTrace, \"metadata\" | \"spans\"> & {\n /** Raw trace metadata accepted by `createHarness(...)` before normalization. */\n metadata?: Record<string, unknown>;\n /** Lightweight spans to normalize into the trace. */\n spans: SimpleSpanRecord[];\n};\n\n/**\n * Lightweight result shape normalized by `createHarness(...)`.\n *\n * @example\n * ```ts\n * const result: SimpleHarnessResult<{ status: \"approved\" }> = {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { totalTokens: 260 },\n * };\n * ```\n */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n /** Pre-normalized transcript messages. When omitted, a default user/assistant transcript is created. */\n messages?: NormalizedMessage[];\n /** Lightweight tool-call records to normalize into the session. */\n toolCalls?: SimpleToolCallRecord[];\n /** Usage summary to attach to the run. */\n usage?: UsageSummary;\n /** Timing summary to attach to the run. */\n timings?: TimingSummary;\n /** Raw artifact values to normalize and merge into the run. */\n artifacts?: Record<string, unknown>;\n /** Lightweight traces and spans to normalize into the run. */\n traces?: SimpleTraceRecord[];\n /** Raw session metadata to normalize into the session. */\n metadata?: Record<string, unknown>;\n /** Raw errors to normalize into the run. */\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {\n /** Original input passed to `run(input)`. */\n input: TInput;\n /** Read-only metadata passed to `run(input, { metadata })`. */\n metadata: Readonly<TMetadata>;\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable run artifact bag. */\n artifacts: HarnessContext<TMetadata>[\"artifacts\"];\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: HarnessContext<TMetadata>[\"setArtifact\"];\n};\n\n/**\n * Options for creating a lightweight custom application harness.\n *\n * @example\n * ```ts\n * const options: CreateHarnessOptions<string, { status: \"approved\" }> = {\n * name: \"refund-agent\",\n * run: async ({ input }) => ({\n * output: await classifyRefund(input),\n * }),\n * };\n * ```\n */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes application code and returns either a lightweight result or full `HarnessRun`. */\n run: (\n args: CreateHarnessRunArgs<TInput, TMetadata>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"boolean\" ||\n (typeof value === \"number\" && Number.isFinite(value))\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[], seen: WeakSet<object>) {\n if (seen.has(value)) {\n return undefined;\n }\n\n seen.add(value);\n const normalized = value.map((item) => {\n const normalized = toJsonValueInternal(item, seen);\n return normalized === undefined ? null : normalized;\n });\n seen.delete(value);\n\n return normalized;\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n seen: WeakSet<object>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n if (seen.has(value)) {\n return normalized;\n }\n\n seen.add(value);\n try {\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValueInternal(entryValue, seen);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n } finally {\n seen.delete(value);\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n return toJsonValueInternal(value, new WeakSet());\n}\n\nfunction toJsonValueInternal(\n value: unknown,\n seen: WeakSet<object>,\n): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (\n value !== null &&\n typeof value === \"object\" &&\n seen.has(value as object)\n ) {\n return undefined;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value, seen);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value, seen);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value, new WeakSet());\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/**\n * Creates a harness from the common \"run app code and return output\" shape.\n *\n * @param options - Harness name plus the callback that executes app code.\n *\n * @example\n * ```ts\n * import { createHarness } from \"vitest-evals\";\n *\n * export const refundHarness = createHarness<\n * string,\n * { status: \"approved\" | \"denied\" },\n * { expected: { status: \"approved\" | \"denied\" } }\n * >({\n * name: \"refund-agent\",\n * run: async ({ input, metadata, setArtifact }) => {\n * const result = await runRefundFlow(input, metadata);\n * const output = { status: result.status };\n *\n * setArtifact(\"case\", { expected: metadata.expected.status });\n *\n * return {\n * output,\n * toolCalls: result.toolCalls,\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * };\n * },\n * });\n * ```\n */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata> {\n const harness: Harness<TInput, TOutput, TMetadata> = {\n name: options.name,\n run: async (input, context) => {\n const startedAt = new Date();\n\n try {\n const result = await options.run({\n input,\n metadata: context.metadata,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n const run = normalizeHarnessRun(input, result, context);\n ensureRunTrace(run, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n\n return run;\n } catch (error) {\n const partialRun = getHarnessRunFromError(error);\n if (partialRun) {\n if (\n Object.keys(context.artifacts).length > 0 &&\n !partialRun.artifacts\n ) {\n partialRun.artifacts = context.artifacts;\n }\n ensureRunTrace(partialRun, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n throw attachHarnessRunToError(error, partialRun);\n }\n\n const failedRun = createFailedHarnessRun(input, error, {\n artifacts: context.artifacts,\n });\n ensureRunTrace(failedRun, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n\n throw attachHarnessRunToError(error, failedRun);\n }\n },\n };\n\n return harness;\n}\n\n/**\n * Normalizes a lightweight harness result into the reporter-facing run shape.\n *\n * @param input - Original input passed to the harness.\n * @param result - Lightweight result or pre-normalized harness run.\n * @param context - Optional per-run context used to merge artifacts.\n *\n * @example\n * ```ts\n * const run = normalizeHarnessRun(\"Refund invoice inv_123\", {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * });\n *\n * expect(toolCalls(run.session)).toHaveLength(1);\n * ```\n */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext<TMetadata>,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n const traces = normalizeSimpleTraces(result.traces);\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n ...(traces ? { traces } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\n/**\n * Builds a JSON-safe failed run for errors that happen before a harness can return.\n *\n * @param input - Original input passed to the harness.\n * @param error - Error thrown by setup or execution.\n * @param options - Optional artifacts to preserve on the failed run.\n */\nexport function createFailedHarnessRun(\n input: unknown,\n error: unknown,\n options: { artifacts?: Record<string, JsonValue> } = {},\n): HarnessRun {\n const artifacts = options.artifacts;\n\n return {\n session: {\n messages: [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ],\n },\n usage: {},\n ...(artifacts && Object.keys(artifacts).length > 0 ? { artifacts } : {}),\n errors: [serializeError(error)],\n };\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\nfunction normalizeSimpleTraces(\n traces: SimpleTraceRecord[] | undefined,\n): NormalizedTrace[] | undefined {\n if (!Array.isArray(traces)) {\n return undefined;\n }\n\n const normalized = traces\n .map(normalizeSimpleTrace)\n .filter((trace): trace is NormalizedTrace => Boolean(trace));\n\n return normalized.length > 0 ? normalized : undefined;\n}\n\nfunction normalizeSimpleTrace(trace: unknown): NormalizedTrace | undefined {\n if (!isJsonRecord(trace)) {\n return undefined;\n }\n\n const {\n metadata: rawMetadata,\n spans: rawSpans,\n ...traceFields\n } = trace as Partial<SimpleTraceRecord>;\n const spans = (Array.isArray(rawSpans) ? rawSpans : [])\n .map((span) => normalizeSimpleSpan(span))\n .filter((span): span is NormalizedSpan => Boolean(span));\n const metadata = isJsonRecord(rawMetadata)\n ? normalizeMetadata(rawMetadata)\n : undefined;\n\n if (spans.length === 0 && !traceFields.id && !traceFields.name) {\n return undefined;\n }\n\n return {\n ...traceFields,\n ...(metadata ? { metadata } : {}),\n spans,\n };\n}\n\nfunction normalizeSimpleSpan(span: unknown): NormalizedSpan | undefined {\n if (!isJsonRecord(span) || typeof span.name !== \"string\" || !span.name) {\n return undefined;\n }\n\n const {\n attributes: rawAttributes,\n error: rawError,\n events: rawEvents,\n ...spanFields\n } = span as Partial<SimpleSpanRecord> & { name: string };\n const attributes = rawAttributes\n ? isJsonRecord(rawAttributes)\n ? normalizeMetadata(rawAttributes)\n : undefined\n : undefined;\n const error = normalizeSpanError(rawError);\n const events = normalizeSimpleSpanEvents(rawEvents);\n\n return {\n ...spanFields,\n ...(attributes\n ? { attributes: attributes as NormalizedSpanAttributes }\n : {}),\n ...(error ? { error } : {}),\n ...(events ? { events } : {}),\n };\n}\n\nfunction normalizeSimpleSpanEvents(\n events: unknown,\n): NormalizedSpanEvent[] | undefined {\n if (!Array.isArray(events)) {\n return undefined;\n }\n\n const normalized = events\n .map(normalizeSimpleSpanEvent)\n .filter((event): event is NormalizedSpanEvent => Boolean(event));\n\n return normalized.length > 0 ? normalized : undefined;\n}\n\nfunction normalizeSimpleSpanEvent(\n event: unknown,\n): NormalizedSpanEvent | undefined {\n if (!isJsonRecord(event) || typeof event.name !== \"string\" || !event.name) {\n return undefined;\n }\n\n const { attributes: rawAttributes, ...eventFields } =\n event as Partial<SimpleSpanEvent> & { name: string };\n const attributes = rawAttributes\n ? isJsonRecord(rawAttributes)\n ? normalizeMetadata(rawAttributes)\n : undefined\n : undefined;\n\n return {\n ...eventFields,\n ...(attributes\n ? { attributes: attributes as NormalizedSpanAttributes }\n : {}),\n };\n}\n\n/** Normalizes arbitrary span errors while preserving object-shaped messages. */\nexport function normalizeSpanError(\n error: unknown,\n): NormalizedSpan[\"error\"] | undefined {\n if (error === undefined) {\n return undefined;\n }\n\n if (error instanceof Error) {\n const details = normalizeMetadata(\n error as unknown as Record<string, unknown>,\n );\n\n return {\n ...(details ?? {}),\n type: error.name,\n message: error.message,\n };\n }\n\n if (\n error &&\n typeof error === \"object\" &&\n !Array.isArray(error) &&\n typeof (error as { message?: unknown }).message === \"string\"\n ) {\n const normalized = normalizeMetadata(error as Record<string, unknown>);\n const { message, type, ...details } = normalized ?? {};\n\n return {\n ...details,\n message: message as string,\n ...(typeof type === \"string\" ? { type } : {}),\n };\n }\n\n const serialized = serializeError(error);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\n/** Normalizes raw span attributes into the JSON-safe span attribute shape. */\nexport function normalizeSpanAttributes(\n attributes: Record<string, unknown>,\n): NormalizedSpanAttributes | undefined {\n return normalizeMetadata(attributes) as NormalizedSpanAttributes | undefined;\n}\n\n/** Builds common OpenTelemetry GenAI usage attributes from a usage summary. */\nexport function createGenAiUsageAttributes(\n usage: UsageSummary | undefined,\n options: { provider?: string } = {},\n) {\n return {\n \"gen_ai.provider.name\": usage?.provider ?? options.provider,\n \"gen_ai.request.model\": usage?.model,\n \"gen_ai.response.model\": usage?.model,\n \"gen_ai.usage.input_tokens\": usage?.inputTokens,\n \"gen_ai.usage.output_tokens\": usage?.outputTokens,\n \"gen_ai.usage.reasoning.output_tokens\": usage?.reasoningTokens,\n } satisfies Record<string, unknown>;\n}\n\n/**\n * Flattens every recorded tool call from a normalized session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const names = toolCalls(result.session).map((call) => call.name);\n *\n * expect(names).toEqual([\"lookupInvoice\", \"createRefund\"]);\n * ```\n */\nexport function toolCalls(session: NormalizedSession): ToolCallRecord[] {\n return session.messages.flatMap((message) => message.toolCalls ?? []);\n}\n\n/**\n * Converts normalized tool-call records into trace spans.\n *\n * Tool-call ids are preserved as GenAI attributes. Pass `spanIdPrefix` when the\n * spans belong to a known trace so span ids stay internally unique.\n */\nexport function createToolCallSpans(\n calls: ToolCallRecord[],\n options: CreateToolCallSpansOptions = {},\n): NormalizedSpan[] {\n return calls.map((call, index) => {\n const spanError = call.error ? normalizeSpanError(call.error) : undefined;\n const spanId = options.spanIdPrefix\n ? `${options.spanIdPrefix}:${index + 1}`\n : call.id;\n\n return {\n ...(spanId ? { id: spanId } : {}),\n ...(options.traceId ? { traceId: options.traceId } : {}),\n ...(options.parentId ? { parentId: options.parentId } : {}),\n name: call.name,\n kind: \"tool\",\n ...(call.startedAt ? { startedAt: call.startedAt } : {}),\n ...(call.finishedAt ? { finishedAt: call.finishedAt } : {}),\n ...(call.durationMs !== undefined ? { durationMs: call.durationMs } : {}),\n status: spanError ? \"error\" : \"ok\",\n ...(spanError ? { error: spanError } : {}),\n attributes: normalizeSpanAttributes({\n \"gen_ai.operation.name\": \"execute_tool\",\n \"gen_ai.tool.name\": call.name,\n \"gen_ai.tool.type\": \"function\",\n ...(call.id ? { \"gen_ai.tool.call.id\": call.id } : {}),\n ...(call.arguments !== undefined\n ? { \"gen_ai.tool.call.arguments\": call.arguments }\n : {}),\n ...(call.result !== undefined\n ? { \"gen_ai.tool.call.result\": call.result }\n : {}),\n }),\n } satisfies NormalizedSpan;\n });\n}\n\n/**\n * Attaches a fallback run trace when a harness result does not already contain spans.\n *\n * This keeps custom harnesses inspectable while first-party harness packages\n * remain free to attach richer native traces.\n */\nexport function ensureRunTrace(\n run: HarnessRun,\n options: EnsureRunTraceOptions,\n): NormalizedTrace | undefined {\n if (spans(run).length > 0) {\n return undefined;\n }\n\n const traceId = options.id ?? createGeneratedTraceId();\n const rootSpanId = `${traceId}:run`;\n const durationMs = options.finishedAt.getTime() - options.startedAt.getTime();\n const rootError =\n run.errors.length > 0 ? normalizeSpanError(run.errors[0]) : undefined;\n const runSpan: NormalizedSpan = {\n id: rootSpanId,\n traceId,\n name: options.name,\n kind: \"run\",\n startedAt: options.startedAt.toISOString(),\n finishedAt: options.finishedAt.toISOString(),\n durationMs,\n status: rootError ? \"error\" : \"ok\",\n ...(rootError ? { error: rootError } : {}),\n attributes: normalizeSpanAttributes({\n \"gen_ai.operation.name\": options.operationName ?? \"invoke_workflow\",\n \"gen_ai.workflow.name\": options.name,\n ...createGenAiUsageAttributes(run.usage),\n }),\n };\n const toolSpans = createToolCallSpans(toolCalls(run.session), {\n traceId,\n parentId: rootSpanId,\n spanIdPrefix: `${traceId}:tool`,\n });\n const trace: NormalizedTrace = {\n id: traceId,\n name: options.name,\n startedAt: options.startedAt.toISOString(),\n finishedAt: options.finishedAt.toISOString(),\n durationMs,\n ...(options.source ? { metadata: { source: options.source } } : {}),\n spans: [runSpan, ...toolSpans],\n };\n\n run.traces = [trace];\n return trace;\n}\n\nlet nextGeneratedTraceId = 0;\n\nfunction createGeneratedTraceId() {\n nextGeneratedTraceId += 1;\n return `trace_${nextGeneratedTraceId}`;\n}\n\n/**\n * Flattens every recorded span from a normalized harness run.\n *\n * @param run - Normalized harness run produced by a harness.\n *\n * @example\n * ```ts\n * const modelSpans = spans(result).filter((span) => span.kind === \"model\");\n * ```\n */\nexport function spans(run: HarnessRun): NormalizedSpan[] {\n return (run.traces ?? []).flatMap((trace) => trace.spans);\n}\n\n/**\n * Returns spans of one coarse operation kind from a normalized run.\n *\n * @param run - Normalized harness run produced by a harness.\n * @param kind - Span kind to keep.\n */\nexport function spansByKind(\n run: HarnessRun,\n kind: NonNullable<NormalizedSpan[\"kind\"]>,\n): NormalizedSpan[] {\n return spans(run).filter((span) => span.kind === kind);\n}\n\n/**\n * Returns every span that explicitly failed or carries a normalized error.\n *\n * @param run - Normalized harness run produced by a harness.\n */\nexport function failedSpans(run: HarnessRun): NormalizedSpan[] {\n return spans(run).filter((span) => span.status === \"error\" || span.error);\n}\n\n/**\n * Filters normalized session messages by role.\n *\n * @param session - Normalized session produced by a harness run.\n * @param role - Message role to keep.\n *\n * @example\n * ```ts\n * const assistantText = messagesByRole(result.session, \"assistant\")\n * .map((message) => message.content)\n * .join(\"\\n\");\n * ```\n */\nexport function messagesByRole(\n session: NormalizedSession,\n role: NormalizedMessage[\"role\"],\n): NormalizedMessage[] {\n return session.messages.filter((message) => message.role === role);\n}\n\nfunction hasNonEmptyMessageContent(message: NormalizedMessage) {\n return (\n message.content !== undefined &&\n (typeof message.content !== \"string\" || message.content.trim().length > 0)\n );\n}\n\n/**\n * Returns every normalized system message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const systemPrompts = systemMessages(result.session);\n * ```\n */\nexport function systemMessages(session: NormalizedSession) {\n return messagesByRole(session, \"system\");\n}\n\n/**\n * Returns every normalized user message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const firstPrompt = userMessages(result.session)[0]?.content;\n * ```\n */\nexport function userMessages(session: NormalizedSession) {\n return messagesByRole(session, \"user\");\n}\n\n/**\n * Returns every normalized assistant message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const finalAnswer = assistantMessages(result.session).at(-1)?.content;\n * ```\n */\nexport function assistantMessages(session: NormalizedSession) {\n return messagesByRole(session, \"assistant\");\n}\n\n/**\n * Returns the latest assistant message content, ignoring empty text messages.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const finalAnswer = latestAssistantMessageContent(result.session);\n * ```\n */\nexport function latestAssistantMessageContent(session: NormalizedSession) {\n return [...assistantMessages(session)]\n .reverse()\n .find(hasNonEmptyMessageContent)?.content;\n}\n\n/**\n * Returns every normalized tool message from a session.\n *\n * @param session - Normalized session produced by a harness run.\n *\n * @example\n * ```ts\n * const toolOutputs = toolMessages(result.session).map((message) => message.content);\n * ```\n */\nexport function toolMessages(session: NormalizedSession) {\n return messagesByRole(session, \"tool\");\n}\n\n/**\n * Attaches a partial or complete harness run to an arbitrary thrown error.\n *\n * @param error - Thrown value to wrap.\n * @param run - Partial or complete normalized harness run to preserve.\n *\n * @example\n * ```ts\n * try {\n * return await runAgent(input);\n * } catch (error) {\n * throw attachHarnessRunToError(error, partialRun);\n * }\n * ```\n */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/**\n * Reads an attached harness run back off a previously wrapped error value.\n *\n * @param error - Unknown thrown value that may contain a harness run.\n *\n * @example\n * ```ts\n * const partialRun = getHarnessRunFromError(error);\n *\n * if (partialRun) {\n * console.log(toolCalls(partialRun.session));\n * }\n * ```\n */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAglBA,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,aAChB,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK;AAEvD;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAAkB,MAAuB;AACnE,MAAI,KAAK,IAAI,KAAK,GAAG;AACnB,WAAO;AAAA,EACT;AAEA,OAAK,IAAI,KAAK;AACd,QAAM,aAAa,MAAM,IAAI,CAAC,SAAS;AACrC,UAAMA,cAAa,oBAAoB,MAAM,IAAI;AACjD,WAAOA,gBAAe,SAAY,OAAOA;AAAA,EAC3C,CAAC;AACD,OAAK,OAAO,KAAK;AAEjB,SAAO;AACT;AAEA,SAAS,oBACP,OACA,MAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,MAAI,KAAK,IAAI,KAAK,GAAG;AACnB,WAAO;AAAA,EACT;AAEA,OAAK,IAAI,KAAK;AACd,MAAI;AACF,eAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,YAAM,QAAQ,oBAAoB,YAAY,IAAI;AAClD,UAAI,UAAU,QAAW;AACvB,mBAAW,GAAG,IAAI;AAAA,MACpB;AAAA,IACF;AAAA,EACF,UAAE;AACA,SAAK,OAAO,KAAK;AAAA,EACnB;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,SAAO,oBAAoB,OAAO,oBAAI,QAAQ,CAAC;AACjD;AAEA,SAAS,oBACP,OACA,MACuB;AACvB,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MACE,UAAU,QACV,OAAO,UAAU,YACjB,KAAK,IAAI,KAAe,GACxB;AACA,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,OAAO,IAAI;AAAA,EACvC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,OAAO,IAAI;AAAA,EACxC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,OAAO,oBAAI,QAAQ,CAAC;AACjD;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAuCO,SAAS,cAKd,SACqC;AACrC,QAAM,UAA+C;AAAA,IACnD,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,YAAY,oBAAI,KAAK;AAE3B,UAAI;AACF,cAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,UAC/B;AAAA,UACA,UAAU,QAAQ;AAAA,UAClB,QAAQ,QAAQ;AAAA,UAChB,WAAW,QAAQ;AAAA,UACnB,aAAa,QAAQ;AAAA,QACvB,CAAC;AACD,cAAM,MAAM,oBAAoB,OAAO,QAAQ,OAAO;AACtD,uBAAe,KAAK;AAAA,UAClB,MAAM,QAAQ;AAAA,UACd;AAAA,UACA,YAAY,oBAAI,KAAK;AAAA,QACvB,CAAC;AAED,eAAO;AAAA,MACT,SAAS,OAAO;AACd,cAAM,aAAa,uBAAuB,KAAK;AAC/C,YAAI,YAAY;AACd,cACE,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,WAAW,WACZ;AACA,uBAAW,YAAY,QAAQ;AAAA,UACjC;AACA,yBAAe,YAAY;AAAA,YACzB,MAAM,QAAQ;AAAA,YACd;AAAA,YACA,YAAY,oBAAI,KAAK;AAAA,UACvB,CAAC;AACD,gBAAM,wBAAwB,OAAO,UAAU;AAAA,QACjD;AAEA,cAAM,YAAY,uBAAuB,OAAO,OAAO;AAAA,UACrD,WAAW,QAAQ;AAAA,QACrB,CAAC;AACD,uBAAe,WAAW;AAAA,UACxB,MAAM,QAAQ;AAAA,UACd;AAAA,UACA,YAAY,oBAAI,KAAK;AAAA,QACvB,CAAC;AAED,cAAM,wBAAwB,OAAO,SAAS;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAoBO,SAAS,oBAKd,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMC,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AACA,QAAM,SAAS,sBAAsB,OAAO,MAAM;AAElD,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,GAAI,SAAS,EAAE,OAAO,IAAI,CAAC;AAAA,IAC3B,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AASO,SAAS,uBACd,OACA,OACA,UAAqD,CAAC,GAC1C;AACZ,QAAM,YAAY,QAAQ;AAE1B,SAAO;AAAA,IACL,SAAS;AAAA,MACP,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN,SAAS,iBAAiB,KAAK;AAAA,QACjC;AAAA,MACF;AAAA,IACF;AAAA,IACA,OAAO,CAAC;AAAA,IACR,GAAI,aAAa,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,EAAE,UAAU,IAAI,CAAC;AAAA,IACtE,QAAQ,CAAC,eAAe,KAAK,CAAC;AAAA,EAChC;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAEA,SAAS,sBACP,QAC+B;AAC/B,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,OAChB,IAAI,oBAAoB,EACxB,OAAO,CAAC,UAAoC,QAAQ,KAAK,CAAC;AAE7D,SAAO,WAAW,SAAS,IAAI,aAAa;AAC9C;AAEA,SAAS,qBAAqB,OAA6C;AACzE,MAAI,CAAC,aAAa,KAAK,GAAG;AACxB,WAAO;AAAA,EACT;AAEA,QAAM;AAAA,IACJ,UAAU;AAAA,IACV,OAAO;AAAA,IACP,GAAG;AAAA,EACL,IAAI;AACJ,QAAMC,UAAS,MAAM,QAAQ,QAAQ,IAAI,WAAW,CAAC,GAClD,IAAI,CAAC,SAAS,oBAAoB,IAAI,CAAC,EACvC,OAAO,CAAC,SAAiC,QAAQ,IAAI,CAAC;AACzD,QAAM,WAAW,aAAa,WAAW,IACrC,kBAAkB,WAAW,IAC7B;AAEJ,MAAIA,OAAM,WAAW,KAAK,CAAC,YAAY,MAAM,CAAC,YAAY,MAAM;AAC9D,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IAC/B,OAAAA;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,MAA2C;AACtE,MAAI,CAAC,aAAa,IAAI,KAAK,OAAO,KAAK,SAAS,YAAY,CAAC,KAAK,MAAM;AACtE,WAAO;AAAA,EACT;AAEA,QAAM;AAAA,IACJ,YAAY;AAAA,IACZ,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,GAAG;AAAA,EACL,IAAI;AACJ,QAAM,aAAa,gBACf,aAAa,aAAa,IACxB,kBAAkB,aAAa,IAC/B,SACF;AACJ,QAAM,QAAQ,mBAAmB,QAAQ;AACzC,QAAM,SAAS,0BAA0B,SAAS;AAElD,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,aACA,EAAE,WAAmD,IACrD,CAAC;AAAA,IACL,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,IACzB,GAAI,SAAS,EAAE,OAAO,IAAI,CAAC;AAAA,EAC7B;AACF;AAEA,SAAS,0BACP,QACmC;AACnC,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,OAChB,IAAI,wBAAwB,EAC5B,OAAO,CAAC,UAAwC,QAAQ,KAAK,CAAC;AAEjE,SAAO,WAAW,SAAS,IAAI,aAAa;AAC9C;AAEA,SAAS,yBACP,OACiC;AACjC,MAAI,CAAC,aAAa,KAAK,KAAK,OAAO,MAAM,SAAS,YAAY,CAAC,MAAM,MAAM;AACzE,WAAO;AAAA,EACT;AAEA,QAAM,EAAE,YAAY,eAAe,GAAG,YAAY,IAChD;AACF,QAAM,aAAa,gBACf,aAAa,aAAa,IACxB,kBAAkB,aAAa,IAC/B,SACF;AAEJ,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,aACA,EAAE,WAAmD,IACrD,CAAC;AAAA,EACP;AACF;AAGO,SAAS,mBACd,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,MAAI,iBAAiB,OAAO;AAC1B,UAAMC,WAAU;AAAA,MACd;AAAA,IACF;AAEA,WAAO;AAAA,MACL,GAAIA,YAAW,CAAC;AAAA,MAChB,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,MACE,SACA,OAAO,UAAU,YACjB,CAAC,MAAM,QAAQ,KAAK,KACpB,OAAQ,MAAgC,YAAY,UACpD;AACA,UAAM,aAAa,kBAAkB,KAAgC;AACrE,UAAM,EAAE,SAAAC,UAAS,MAAAC,OAAM,GAAGF,SAAQ,IAAI,cAAc,CAAC;AAErD,WAAO;AAAA,MACL,GAAGA;AAAA,MACH,SAASC;AAAA,MACT,GAAI,OAAOC,UAAS,WAAW,EAAE,MAAAA,MAAK,IAAI,CAAC;AAAA,IAC7C;AAAA,EACF;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAGO,SAAS,wBACd,YACsC;AACtC,SAAO,kBAAkB,UAAU;AACrC;AAGO,SAAS,2BACd,OACA,UAAiC,CAAC,GAClC;AACA,SAAO;AAAA,IACL,wBAAwB,OAAO,YAAY,QAAQ;AAAA,IACnD,wBAAwB,OAAO;AAAA,IAC/B,yBAAyB,OAAO;AAAA,IAChC,6BAA6B,OAAO;AAAA,IACpC,8BAA8B,OAAO;AAAA,IACrC,wCAAwC,OAAO;AAAA,EACjD;AACF;AAcO,SAAS,UAAU,SAA8C;AACtE,SAAO,QAAQ,SAAS,QAAQ,CAAC,YAAY,QAAQ,aAAa,CAAC,CAAC;AACtE;AAQO,SAAS,oBACd,OACA,UAAsC,CAAC,GACrB;AAClB,SAAO,MAAM,IAAI,CAAC,MAAM,UAAU;AAChC,UAAM,YAAY,KAAK,QAAQ,mBAAmB,KAAK,KAAK,IAAI;AAChE,UAAM,SAAS,QAAQ,eACnB,GAAG,QAAQ,YAAY,IAAI,QAAQ,CAAC,KACpC,KAAK;AAET,WAAO;AAAA,MACL,GAAI,SAAS,EAAE,IAAI,OAAO,IAAI,CAAC;AAAA,MAC/B,GAAI,QAAQ,UAAU,EAAE,SAAS,QAAQ,QAAQ,IAAI,CAAC;AAAA,MACtD,GAAI,QAAQ,WAAW,EAAE,UAAU,QAAQ,SAAS,IAAI,CAAC;AAAA,MACzD,MAAM,KAAK;AAAA,MACX,MAAM;AAAA,MACN,GAAI,KAAK,YAAY,EAAE,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,MACtD,GAAI,KAAK,aAAa,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,MACzD,GAAI,KAAK,eAAe,SAAY,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,MACvE,QAAQ,YAAY,UAAU;AAAA,MAC9B,GAAI,YAAY,EAAE,OAAO,UAAU,IAAI,CAAC;AAAA,MACxC,YAAY,wBAAwB;AAAA,QAClC,yBAAyB;AAAA,QACzB,oBAAoB,KAAK;AAAA,QACzB,oBAAoB;AAAA,QACpB,GAAI,KAAK,KAAK,EAAE,uBAAuB,KAAK,GAAG,IAAI,CAAC;AAAA,QACpD,GAAI,KAAK,cAAc,SACnB,EAAE,8BAA8B,KAAK,UAAU,IAC/C,CAAC;AAAA,QACL,GAAI,KAAK,WAAW,SAChB,EAAE,2BAA2B,KAAK,OAAO,IACzC,CAAC;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AACH;AAQO,SAAS,eACd,KACA,SAC6B;AAC7B,MAAI,MAAM,GAAG,EAAE,SAAS,GAAG;AACzB,WAAO;AAAA,EACT;AAEA,QAAM,UAAU,QAAQ,MAAM,uBAAuB;AACrD,QAAM,aAAa,GAAG,OAAO;AAC7B,QAAM,aAAa,QAAQ,WAAW,QAAQ,IAAI,QAAQ,UAAU,QAAQ;AAC5E,QAAM,YACJ,IAAI,OAAO,SAAS,IAAI,mBAAmB,IAAI,OAAO,CAAC,CAAC,IAAI;AAC9D,QAAM,UAA0B;AAAA,IAC9B,IAAI;AAAA,IACJ;AAAA,IACA,MAAM,QAAQ;AAAA,IACd,MAAM;AAAA,IACN,WAAW,QAAQ,UAAU,YAAY;AAAA,IACzC,YAAY,QAAQ,WAAW,YAAY;AAAA,IAC3C;AAAA,IACA,QAAQ,YAAY,UAAU;AAAA,IAC9B,GAAI,YAAY,EAAE,OAAO,UAAU,IAAI,CAAC;AAAA,IACxC,YAAY,wBAAwB;AAAA,MAClC,yBAAyB,QAAQ,iBAAiB;AAAA,MAClD,wBAAwB,QAAQ;AAAA,MAChC,GAAG,2BAA2B,IAAI,KAAK;AAAA,IACzC,CAAC;AAAA,EACH;AACA,QAAM,YAAY,oBAAoB,UAAU,IAAI,OAAO,GAAG;AAAA,IAC5D;AAAA,IACA,UAAU;AAAA,IACV,cAAc,GAAG,OAAO;AAAA,EAC1B,CAAC;AACD,QAAM,QAAyB;AAAA,IAC7B,IAAI;AAAA,IACJ,MAAM,QAAQ;AAAA,IACd,WAAW,QAAQ,UAAU,YAAY;AAAA,IACzC,YAAY,QAAQ,WAAW,YAAY;AAAA,IAC3C;AAAA,IACA,GAAI,QAAQ,SAAS,EAAE,UAAU,EAAE,QAAQ,QAAQ,OAAO,EAAE,IAAI,CAAC;AAAA,IACjE,OAAO,CAAC,SAAS,GAAG,SAAS;AAAA,EAC/B;AAEA,MAAI,SAAS,CAAC,KAAK;AACnB,SAAO;AACT;AAEA,IAAI,uBAAuB;AAE3B,SAAS,yBAAyB;AAChC,0BAAwB;AACxB,SAAO,SAAS,oBAAoB;AACtC;AAYO,SAAS,MAAM,KAAmC;AACvD,UAAQ,IAAI,UAAU,CAAC,GAAG,QAAQ,CAAC,UAAU,MAAM,KAAK;AAC1D;AAQO,SAAS,YACd,KACA,MACkB;AAClB,SAAO,MAAM,GAAG,EAAE,OAAO,CAAC,SAAS,KAAK,SAAS,IAAI;AACvD;AAOO,SAAS,YAAY,KAAmC;AAC7D,SAAO,MAAM,GAAG,EAAE,OAAO,CAAC,SAAS,KAAK,WAAW,WAAW,KAAK,KAAK;AAC1E;AAeO,SAAS,eACd,SACA,MACqB;AACrB,SAAO,QAAQ,SAAS,OAAO,CAAC,YAAY,QAAQ,SAAS,IAAI;AACnE;AAEA,SAAS,0BAA0B,SAA4B;AAC7D,SACE,QAAQ,YAAY,WACnB,OAAO,QAAQ,YAAY,YAAY,QAAQ,QAAQ,KAAK,EAAE,SAAS;AAE5E;AAYO,SAAS,eAAe,SAA4B;AACzD,SAAO,eAAe,SAAS,QAAQ;AACzC;AAYO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAYO,SAAS,kBAAkB,SAA4B;AAC5D,SAAO,eAAe,SAAS,WAAW;AAC5C;AAYO,SAAS,8BAA8B,SAA4B;AACxE,SAAO,CAAC,GAAG,kBAAkB,OAAO,CAAC,EAClC,QAAQ,EACR,KAAK,yBAAyB,GAAG;AACtC;AAYO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAiBO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAgBO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["normalized","toolCalls","spans","details","message","type"]}
|