vitest-evals 0.9.0-beta.6 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +1 -14
  2. package/dist/harness.d.mts +329 -19
  3. package/dist/harness.d.ts +329 -19
  4. package/dist/harness.js.map +1 -1
  5. package/dist/harness.mjs.map +1 -1
  6. package/dist/index.d.mts +155 -12
  7. package/dist/index.d.ts +155 -12
  8. package/dist/index.js.map +1 -1
  9. package/dist/index.mjs.map +1 -1
  10. package/dist/internal/matchers.d.mts +41 -3
  11. package/dist/internal/matchers.d.ts +41 -3
  12. package/dist/internal/matchers.js.map +1 -1
  13. package/dist/internal/matchers.mjs.map +1 -1
  14. package/dist/internal/structuredOutputScorer.d.mts +4 -0
  15. package/dist/internal/structuredOutputScorer.d.ts +4 -0
  16. package/dist/internal/structuredOutputScorer.js.map +1 -1
  17. package/dist/internal/structuredOutputScorer.mjs.map +1 -1
  18. package/dist/internal/toolCallScorer.d.mts +6 -0
  19. package/dist/internal/toolCallScorer.d.ts +6 -0
  20. package/dist/internal/toolCallScorer.js.map +1 -1
  21. package/dist/internal/toolCallScorer.mjs.map +1 -1
  22. package/dist/judges/index.d.mts +2 -2
  23. package/dist/judges/index.d.ts +2 -2
  24. package/dist/judges/index.js.map +1 -1
  25. package/dist/judges/index.mjs.map +1 -1
  26. package/dist/judges/structuredOutputJudge.d.mts +54 -4
  27. package/dist/judges/structuredOutputJudge.d.ts +54 -4
  28. package/dist/judges/structuredOutputJudge.js.map +1 -1
  29. package/dist/judges/structuredOutputJudge.mjs.map +1 -1
  30. package/dist/judges/toolCallJudge.d.mts +56 -6
  31. package/dist/judges/toolCallJudge.d.ts +56 -6
  32. package/dist/judges/toolCallJudge.js.map +1 -1
  33. package/dist/judges/toolCallJudge.mjs.map +1 -1
  34. package/dist/judges/types.d.mts +68 -3
  35. package/dist/judges/types.d.ts +68 -3
  36. package/dist/judges/types.js.map +1 -1
  37. package/dist/legacy/scorers/index.js.map +1 -1
  38. package/dist/legacy/scorers/index.mjs.map +1 -1
  39. package/dist/legacy/scorers/structuredOutputScorer.js.map +1 -1
  40. package/dist/legacy/scorers/structuredOutputScorer.mjs.map +1 -1
  41. package/dist/legacy/scorers/toolCallScorer.js.map +1 -1
  42. package/dist/legacy/scorers/toolCallScorer.mjs.map +1 -1
  43. package/dist/legacy/scorers/utils.js.map +1 -1
  44. package/dist/legacy/scorers/utils.mjs.map +1 -1
  45. package/dist/legacy.js.map +1 -1
  46. package/dist/legacy.mjs.map +1 -1
  47. package/dist/reporter.js.map +1 -1
  48. package/dist/reporter.mjs.map +1 -1
  49. package/package.json +13 -1
package/README.md CHANGED
@@ -153,7 +153,7 @@ Use Vitest JSON as the eval report artifact. It preserves the `meta` field that
153
153
  contains eval scores and normalized harness runs.
154
154
 
155
155
  ```sh
156
- vitest run evals \
156
+ vitest run --config vitest.evals.config.ts \
157
157
  --reporter=vitest-evals/reporter \
158
158
  --reporter=json \
159
159
  --outputFile.json=vitest-results.json
@@ -389,16 +389,3 @@ When you only need deterministic contract checks, built-ins such as
389
389
  `StructuredOutputJudge()` and `ToolCallJudge()` are still available. The primary
390
390
  documentation examples intentionally use factuality/rubric judges because those
391
391
  match the product's LLM-as-a-judge direction.
392
-
393
- ## Legacy Compatibility
394
-
395
- The root package is harness-first and judge-first. Legacy scorer-first suites
396
- and `evaluate(...)` live under `vitest-evals/legacy`.
397
-
398
- ```ts
399
- import {
400
- describeEval,
401
- StructuredOutputScorer,
402
- ToolCallScorer,
403
- } from "vitest-evals/legacy";
404
- ```
@@ -4,51 +4,128 @@ type JsonPrimitive = string | number | boolean | null;
4
4
  type JsonValue = JsonPrimitive | JsonValue[] | {
5
5
  [key: string]: JsonValue;
6
6
  };
7
- /** Normalized record for one tool call observed during a harness run. */
7
+ /**
8
+ * Normalized record for one tool call observed during a harness run.
9
+ *
10
+ * @example
11
+ * ```ts
12
+ * const call: ToolCallRecord = {
13
+ * name: "lookupInvoice",
14
+ * arguments: { invoiceId: "inv_123" },
15
+ * result: { refundable: true },
16
+ * };
17
+ * ```
18
+ */
8
19
  type ToolCallRecord = {
20
+ /** Provider or runtime tool-call id when one is available. */
9
21
  id?: string;
22
+ /** Tool name as exposed to the agent or application runtime. */
10
23
  name: string;
24
+ /** JSON-safe tool arguments after provider/runtime normalization. */
11
25
  arguments?: Record<string, JsonValue>;
26
+ /** JSON-safe tool result returned by the application tool. */
12
27
  result?: JsonValue;
28
+ /** Normalized tool error when execution failed. */
13
29
  error?: {
14
30
  message: string;
15
31
  type?: string;
16
32
  [key: string]: JsonValue | undefined;
17
33
  };
34
+ /** ISO timestamp for the start of tool execution. */
18
35
  startedAt?: string;
36
+ /** ISO timestamp for the end of tool execution. */
19
37
  finishedAt?: string;
38
+ /** Tool execution duration in milliseconds. */
20
39
  durationMs?: number;
40
+ /** Extra JSON-safe tool metadata for reporters and custom judges. */
21
41
  metadata?: Record<string, JsonValue>;
22
42
  };
23
- /** Normalized message recorded in a harness session transcript. */
43
+ /**
44
+ * Normalized message recorded in a harness session transcript.
45
+ *
46
+ * @example
47
+ * ```ts
48
+ * const message: NormalizedMessage = {
49
+ * role: "assistant",
50
+ * content: { status: "approved" },
51
+ * toolCalls: [{ name: "lookupInvoice" }],
52
+ * };
53
+ * ```
54
+ */
24
55
  type NormalizedMessage = {
56
+ /** Transcript role for the normalized message. */
25
57
  role: "system" | "user" | "assistant" | "tool";
58
+ /** JSON-safe message content. */
26
59
  content?: JsonValue;
60
+ /** Tool calls associated with this message. */
27
61
  toolCalls?: ToolCallRecord[];
62
+ /** Extra JSON-safe message metadata. */
28
63
  metadata?: Record<string, JsonValue>;
29
64
  };
30
- /** Provider usage summary attached to a normalized harness run. */
65
+ /**
66
+ * Provider usage summary attached to a normalized harness run.
67
+ *
68
+ * @example
69
+ * ```ts
70
+ * const usage: UsageSummary = {
71
+ * provider: "openai",
72
+ * model: "gpt-4o-mini",
73
+ * inputTokens: 212,
74
+ * outputTokens: 48,
75
+ * totalTokens: 260,
76
+ * };
77
+ * ```
78
+ */
31
79
  type UsageSummary = {
80
+ /** Provider that served the application run. */
32
81
  provider?: string;
82
+ /** Model used for the application run. */
33
83
  model?: string;
84
+ /** Input, prompt, or request tokens consumed by the run. */
34
85
  inputTokens?: number;
86
+ /** Output or completion tokens produced by the run. */
35
87
  outputTokens?: number;
88
+ /** Reasoning tokens reported by providers that expose them. */
36
89
  reasoningTokens?: number;
90
+ /** Total token count reported by the provider or adapter. */
37
91
  totalTokens?: number;
92
+ /** Count of tool calls observed during the run. */
38
93
  toolCalls?: number;
94
+ /** Retry count observed during the run. */
39
95
  retries?: number;
96
+ /** Provider-specific JSON-safe usage details. Cost estimates belong here. */
40
97
  metadata?: Record<string, JsonValue>;
41
98
  };
42
99
  /** Timing summary attached to a normalized harness run. */
43
100
  type TimingSummary = {
101
+ /** End-to-end run duration in milliseconds. */
44
102
  totalMs?: number;
103
+ /** Extra JSON-safe timing metadata. */
45
104
  metadata?: Record<string, JsonValue>;
46
105
  };
47
- /** JSON-serializable transcript produced by the system under test. */
106
+ /**
107
+ * JSON-serializable transcript produced by the system under test.
108
+ *
109
+ * @example
110
+ * ```ts
111
+ * const session: NormalizedSession = {
112
+ * provider: "openai",
113
+ * model: "gpt-4o-mini",
114
+ * messages: [
115
+ * { role: "user", content: "Refund invoice inv_123" },
116
+ * { role: "assistant", content: { status: "approved" } },
117
+ * ],
118
+ * };
119
+ * ```
120
+ */
48
121
  type NormalizedSession = {
122
+ /** Ordered normalized transcript messages. */
49
123
  messages: NormalizedMessage[];
124
+ /** Provider that produced the session when known. */
50
125
  provider?: string;
126
+ /** Model that produced the session when known. */
51
127
  model?: string;
128
+ /** Extra JSON-safe session metadata. */
52
129
  metadata?: Record<string, JsonValue>;
53
130
  };
54
131
  type OutputField<TOutput extends JsonValue | undefined> = undefined extends TOutput ? {
@@ -56,64 +133,165 @@ type OutputField<TOutput extends JsonValue | undefined> = undefined extends TOut
56
133
  } : {
57
134
  output: TOutput;
58
135
  };
59
- /** Normalized result returned by every harness execution. */
136
+ /**
137
+ * Normalized result returned by every harness execution.
138
+ *
139
+ * @example
140
+ * ```ts
141
+ * const run: HarnessRun<{ status: "approved" }> = {
142
+ * output: { status: "approved" },
143
+ * session: {
144
+ * messages: [
145
+ * { role: "user", content: "Refund invoice inv_123" },
146
+ * { role: "assistant", content: { status: "approved" } },
147
+ * ],
148
+ * },
149
+ * usage: { totalTokens: 260 },
150
+ * errors: [],
151
+ * };
152
+ * ```
153
+ */
60
154
  type HarnessRun<TOutput extends JsonValue | undefined = JsonValue | undefined> = OutputField<TOutput> & {
155
+ /** Normalized transcript and provider/session metadata. */
61
156
  session: NormalizedSession;
157
+ /** Stable provider usage units such as tokens, tools, and retries. */
62
158
  usage: UsageSummary;
159
+ /** Optional timing summary for the run. */
63
160
  timings?: TimingSummary;
161
+ /** JSON-safe run artifacts captured by the harness or test context. */
64
162
  artifacts?: Record<string, JsonValue>;
163
+ /** Normalized errors captured during execution. */
65
164
  errors: Array<Record<string, JsonValue>>;
66
165
  };
67
166
  /** Error value with an attached partial or complete normalized harness run. */
68
167
  type HarnessRunError = Error & {
168
+ /** Attached normalized harness run recovered by `getHarnessRunFromError(...)`. */
69
169
  vitestEvalsRun: HarnessRun;
70
170
  };
71
171
  /** Per-run metadata shape accepted by harnesses and eval tests. */
72
172
  type HarnessMetadata = Record<string, unknown>;
73
- /** Runtime context passed from the eval fixture into a harness run. */
173
+ /**
174
+ * Runtime context passed from the eval fixture into a harness run.
175
+ *
176
+ * @example
177
+ * ```ts
178
+ * const harness: Harness<string> = {
179
+ * name: "refund-agent",
180
+ * async run(input, context) {
181
+ * context.setArtifact("inputLength", input.length);
182
+ *
183
+ * return {
184
+ * output: undefined,
185
+ * session: { messages: [{ role: "user", content: input }] },
186
+ * usage: {},
187
+ * errors: [],
188
+ * };
189
+ * },
190
+ * };
191
+ * ```
192
+ */
74
193
  type HarnessContext<TMetadata extends HarnessMetadata = HarnessMetadata> = {
194
+ /** Per-run metadata passed through `run(input, { metadata })`. */
75
195
  metadata: Readonly<TMetadata>;
196
+ /** Abort signal from Vitest when available. */
76
197
  signal?: AbortSignal;
198
+ /** Mutable JSON-safe artifact bag shared with the harness. */
77
199
  artifacts: Record<string, JsonValue>;
200
+ /** Stores one JSON-safe artifact on the current run. */
78
201
  setArtifact: (name: string, value: JsonValue) => void;
79
202
  };
80
- /** Adapter that executes the system under test and returns a normalized run. */
203
+ /**
204
+ * Adapter that executes the system under test and returns a normalized run.
205
+ *
206
+ * @example
207
+ * ```ts
208
+ * const harness: Harness<string, { status: "approved" | "denied" }> = {
209
+ * name: "refund-agent",
210
+ * async run(input, context) {
211
+ * return normalizeHarnessRun(input, await runRefundFlow(input), context);
212
+ * },
213
+ * };
214
+ * ```
215
+ */
81
216
  type Harness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata> = {
217
+ /** Stable harness name used in reports. */
82
218
  name: string;
219
+ /** Executes the system under test and returns a normalized run. */
83
220
  run: (input: TInput, context: HarnessContext<TMetadata>) => Promise<HarnessRun<TOutput>>;
84
221
  };
85
222
  /** Value or promise accepted by lightweight harness callbacks. */
86
223
  type MaybePromise<T> = T | Promise<T>;
87
224
  /** Lightweight tool-call record accepted by `createHarness(...)` results. */
88
225
  type SimpleToolCallRecord = Omit<ToolCallRecord, "arguments" | "result" | "error" | "metadata"> & {
226
+ /** Raw tool arguments accepted by `createHarness(...)` before normalization. */
89
227
  arguments?: unknown;
228
+ /** Raw tool result accepted by `createHarness(...)` before normalization. */
90
229
  result?: unknown;
230
+ /** Raw tool error accepted by `createHarness(...)` before normalization. */
91
231
  error?: unknown;
232
+ /** Raw tool metadata accepted by `createHarness(...)` before normalization. */
92
233
  metadata?: Record<string, unknown>;
93
234
  };
94
- /** Lightweight result shape normalized by `createHarness(...)`. */
235
+ /**
236
+ * Lightweight result shape normalized by `createHarness(...)`.
237
+ *
238
+ * @example
239
+ * ```ts
240
+ * const result: SimpleHarnessResult<{ status: "approved" }> = {
241
+ * output: { status: "approved" },
242
+ * toolCalls: [{ name: "lookupInvoice", arguments: { invoiceId: "inv_123" } }],
243
+ * usage: { totalTokens: 260 },
244
+ * };
245
+ * ```
246
+ */
95
247
  type SimpleHarnessResult<TOutput extends JsonValue | undefined = JsonValue | undefined> = OutputField<TOutput> & {
248
+ /** Pre-normalized transcript messages. When omitted, a default user/assistant transcript is created. */
96
249
  messages?: NormalizedMessage[];
250
+ /** Lightweight tool-call records to normalize into the session. */
97
251
  toolCalls?: SimpleToolCallRecord[];
252
+ /** Usage summary to attach to the run. */
98
253
  usage?: UsageSummary;
254
+ /** Timing summary to attach to the run. */
99
255
  timings?: TimingSummary;
256
+ /** Raw artifact values to normalize and merge into the run. */
100
257
  artifacts?: Record<string, unknown>;
258
+ /** Raw session metadata to normalize into the session. */
101
259
  metadata?: Record<string, unknown>;
260
+ /** Raw errors to normalize into the run. */
102
261
  errors?: unknown[];
103
262
  };
104
263
  /** Either a complete normalized run or a lightweight result to normalize. */
105
264
  type HarnessResultLike<TOutput extends JsonValue | undefined = JsonValue | undefined> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;
106
265
  /** Arguments passed to the `createHarness(...)` convenience callback. */
107
266
  type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {
267
+ /** Original input passed to `run(input)`. */
108
268
  input: TInput;
269
+ /** Read-only metadata passed to `run(input, { metadata })`. */
109
270
  metadata: Readonly<TMetadata>;
271
+ /** Abort signal from Vitest when available. */
110
272
  signal?: AbortSignal;
273
+ /** Mutable run artifact bag. */
111
274
  artifacts: HarnessContext<TMetadata>["artifacts"];
275
+ /** Stores one JSON-safe artifact on the current run. */
112
276
  setArtifact: HarnessContext<TMetadata>["setArtifact"];
113
277
  };
114
- /** Options for creating a lightweight custom application harness. */
278
+ /**
279
+ * Options for creating a lightweight custom application harness.
280
+ *
281
+ * @example
282
+ * ```ts
283
+ * const options: CreateHarnessOptions<string, { status: "approved" }> = {
284
+ * name: "refund-agent",
285
+ * run: async ({ input }) => ({
286
+ * output: await classifyRefund(input),
287
+ * }),
288
+ * };
289
+ * ```
290
+ */
115
291
  type CreateHarnessOptions<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata> = {
292
+ /** Stable harness name used in reports. */
116
293
  name: string;
294
+ /** Executes application code and returns either a lightweight result or full `HarnessRun`. */
117
295
  run: (args: CreateHarnessRunArgs<TInput, TMetadata>) => MaybePromise<HarnessResultLike<TOutput>>;
118
296
  };
119
297
  /** Returns true when a value exposes a callable method with the given name. */
@@ -126,25 +304,157 @@ declare function normalizeRecord(value: Record<string, unknown>): Record<string,
126
304
  declare function normalizeMetadata(value: Record<string, unknown>): Record<string, JsonValue> | undefined;
127
305
  /** Converts arbitrary content into the JSON-safe message content shape. */
128
306
  declare function normalizeContent(value: unknown): JsonValue;
129
- /** Creates a harness from the common "run app code and return output" shape. */
307
+ /**
308
+ * Creates a harness from the common "run app code and return output" shape.
309
+ *
310
+ * @param options - Harness name plus the callback that executes app code.
311
+ *
312
+ * @example
313
+ * ```ts
314
+ * import { createHarness } from "vitest-evals";
315
+ *
316
+ * export const refundHarness = createHarness<
317
+ * string,
318
+ * { status: "approved" | "denied" },
319
+ * { expected: { status: "approved" | "denied" } }
320
+ * >({
321
+ * name: "refund-agent",
322
+ * run: async ({ input, metadata, setArtifact }) => {
323
+ * const result = await runRefundFlow(input, metadata);
324
+ * const output = { status: result.status };
325
+ *
326
+ * setArtifact("case", { expected: metadata.expected.status });
327
+ *
328
+ * return {
329
+ * output,
330
+ * toolCalls: result.toolCalls,
331
+ * usage: { provider: "openai", model: "gpt-4o-mini" },
332
+ * };
333
+ * },
334
+ * });
335
+ * ```
336
+ */
130
337
  declare function createHarness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata>(options: CreateHarnessOptions<TInput, TOutput, TMetadata>): Harness<TInput, TOutput, TMetadata>;
131
- /** Normalizes a lightweight harness result into the reporter-facing run shape. */
338
+ /**
339
+ * Normalizes a lightweight harness result into the reporter-facing run shape.
340
+ *
341
+ * @param input - Original input passed to the harness.
342
+ * @param result - Lightweight result or pre-normalized harness run.
343
+ * @param context - Optional per-run context used to merge artifacts.
344
+ *
345
+ * @example
346
+ * ```ts
347
+ * const run = normalizeHarnessRun("Refund invoice inv_123", {
348
+ * output: { status: "approved" },
349
+ * toolCalls: [{ name: "lookupInvoice", arguments: { invoiceId: "inv_123" } }],
350
+ * usage: { provider: "openai", model: "gpt-4o-mini" },
351
+ * });
352
+ *
353
+ * expect(toolCalls(run.session)).toHaveLength(1);
354
+ * ```
355
+ */
132
356
  declare function normalizeHarnessRun<TInput = unknown, TMetadata extends HarnessMetadata = HarnessMetadata, TOutput extends JsonValue | undefined = JsonValue | undefined>(input: TInput, result: HarnessResultLike<TOutput>, context?: HarnessContext<TMetadata>): HarnessRun<TOutput>;
133
- /** Flattens every recorded tool call from a normalized session. */
357
+ /**
358
+ * Flattens every recorded tool call from a normalized session.
359
+ *
360
+ * @param session - Normalized session produced by a harness run.
361
+ *
362
+ * @example
363
+ * ```ts
364
+ * const names = toolCalls(result.session).map((call) => call.name);
365
+ *
366
+ * expect(names).toEqual(["lookupInvoice", "createRefund"]);
367
+ * ```
368
+ */
134
369
  declare function toolCalls(session: NormalizedSession): ToolCallRecord[];
135
- /** Filters normalized session messages by role. */
370
+ /**
371
+ * Filters normalized session messages by role.
372
+ *
373
+ * @param session - Normalized session produced by a harness run.
374
+ * @param role - Message role to keep.
375
+ *
376
+ * @example
377
+ * ```ts
378
+ * const assistantText = messagesByRole(result.session, "assistant")
379
+ * .map((message) => message.content)
380
+ * .join("\n");
381
+ * ```
382
+ */
136
383
  declare function messagesByRole(session: NormalizedSession, role: NormalizedMessage["role"]): NormalizedMessage[];
137
- /** Returns every normalized system message from a session. */
384
+ /**
385
+ * Returns every normalized system message from a session.
386
+ *
387
+ * @param session - Normalized session produced by a harness run.
388
+ *
389
+ * @example
390
+ * ```ts
391
+ * const systemPrompts = systemMessages(result.session);
392
+ * ```
393
+ */
138
394
  declare function systemMessages(session: NormalizedSession): NormalizedMessage[];
139
- /** Returns every normalized user message from a session. */
395
+ /**
396
+ * Returns every normalized user message from a session.
397
+ *
398
+ * @param session - Normalized session produced by a harness run.
399
+ *
400
+ * @example
401
+ * ```ts
402
+ * const firstPrompt = userMessages(result.session)[0]?.content;
403
+ * ```
404
+ */
140
405
  declare function userMessages(session: NormalizedSession): NormalizedMessage[];
141
- /** Returns every normalized assistant message from a session. */
406
+ /**
407
+ * Returns every normalized assistant message from a session.
408
+ *
409
+ * @param session - Normalized session produced by a harness run.
410
+ *
411
+ * @example
412
+ * ```ts
413
+ * const finalAnswer = assistantMessages(result.session).at(-1)?.content;
414
+ * ```
415
+ */
142
416
  declare function assistantMessages(session: NormalizedSession): NormalizedMessage[];
143
- /** Returns every normalized tool message from a session. */
417
+ /**
418
+ * Returns every normalized tool message from a session.
419
+ *
420
+ * @param session - Normalized session produced by a harness run.
421
+ *
422
+ * @example
423
+ * ```ts
424
+ * const toolOutputs = toolMessages(result.session).map((message) => message.content);
425
+ * ```
426
+ */
144
427
  declare function toolMessages(session: NormalizedSession): NormalizedMessage[];
145
- /** Attaches a partial or complete harness run to an arbitrary thrown error. */
428
+ /**
429
+ * Attaches a partial or complete harness run to an arbitrary thrown error.
430
+ *
431
+ * @param error - Thrown value to wrap.
432
+ * @param run - Partial or complete normalized harness run to preserve.
433
+ *
434
+ * @example
435
+ * ```ts
436
+ * try {
437
+ * return await runAgent(input);
438
+ * } catch (error) {
439
+ * throw attachHarnessRunToError(error, partialRun);
440
+ * }
441
+ * ```
442
+ */
146
443
  declare function attachHarnessRunToError(error: unknown, run: HarnessRun): HarnessRunError;
147
- /** Reads an attached harness run back off a previously wrapped error value. */
444
+ /**
445
+ * Reads an attached harness run back off a previously wrapped error value.
446
+ *
447
+ * @param error - Unknown thrown value that may contain a harness run.
448
+ *
449
+ * @example
450
+ * ```ts
451
+ * const partialRun = getHarnessRunFromError(error);
452
+ *
453
+ * if (partialRun) {
454
+ * console.log(toolCalls(partialRun.session));
455
+ * }
456
+ * ```
457
+ */
148
458
  declare function getHarnessRunFromError(error: unknown): HarnessRun | undefined;
149
459
  /** Returns true when a value matches the normalized `HarnessRun` contract. */
150
460
  declare function isHarnessRun(value: unknown): value is HarnessRun;