vitest-evals 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +27 -35
  2. package/dist/harness.d.mts +15 -20
  3. package/dist/harness.d.ts +15 -20
  4. package/dist/harness.js +0 -1
  5. package/dist/harness.js.map +1 -1
  6. package/dist/harness.mjs +0 -1
  7. package/dist/harness.mjs.map +1 -1
  8. package/dist/index.d.mts +45 -68
  9. package/dist/index.d.ts +45 -68
  10. package/dist/index.js +21 -40
  11. package/dist/index.js.map +1 -1
  12. package/dist/index.mjs +21 -40
  13. package/dist/index.mjs.map +1 -1
  14. package/dist/internal/toolCallScorer.js.map +1 -1
  15. package/dist/internal/toolCallScorer.mjs.map +1 -1
  16. package/dist/judges/factualityJudge.d.mts +14 -13
  17. package/dist/judges/factualityJudge.d.ts +14 -13
  18. package/dist/judges/factualityJudge.js +9 -9
  19. package/dist/judges/factualityJudge.js.map +1 -1
  20. package/dist/judges/factualityJudge.mjs +9 -9
  21. package/dist/judges/factualityJudge.mjs.map +1 -1
  22. package/dist/judges/index.js +17 -20
  23. package/dist/judges/index.js.map +1 -1
  24. package/dist/judges/index.mjs +17 -20
  25. package/dist/judges/index.mjs.map +1 -1
  26. package/dist/judges/judgeHarness.d.mts +6 -10
  27. package/dist/judges/judgeHarness.d.ts +6 -10
  28. package/dist/judges/judgeHarness.js +3 -8
  29. package/dist/judges/judgeHarness.js.map +1 -1
  30. package/dist/judges/judgeHarness.mjs +3 -8
  31. package/dist/judges/judgeHarness.mjs.map +1 -1
  32. package/dist/judges/structuredOutputJudge.d.mts +7 -9
  33. package/dist/judges/structuredOutputJudge.d.ts +7 -9
  34. package/dist/judges/structuredOutputJudge.js +3 -3
  35. package/dist/judges/structuredOutputJudge.js.map +1 -1
  36. package/dist/judges/structuredOutputJudge.mjs +3 -3
  37. package/dist/judges/structuredOutputJudge.mjs.map +1 -1
  38. package/dist/judges/toolCallJudge.d.mts +12 -9
  39. package/dist/judges/toolCallJudge.d.ts +12 -9
  40. package/dist/judges/toolCallJudge.js +3 -3
  41. package/dist/judges/toolCallJudge.js.map +1 -1
  42. package/dist/judges/toolCallJudge.mjs +3 -3
  43. package/dist/judges/toolCallJudge.mjs.map +1 -1
  44. package/dist/judges/types.d.mts +13 -24
  45. package/dist/judges/types.d.ts +13 -24
  46. package/dist/judges/types.js.map +1 -1
  47. package/dist/legacy/scorers/index.js.map +1 -1
  48. package/dist/legacy/scorers/index.mjs.map +1 -1
  49. package/dist/legacy/scorers/toolCallScorer.js.map +1 -1
  50. package/dist/legacy/scorers/toolCallScorer.mjs.map +1 -1
  51. package/dist/legacy.js.map +1 -1
  52. package/dist/legacy.mjs.map +1 -1
  53. package/dist/reporter.js.map +1 -1
  54. package/dist/reporter.mjs.map +1 -1
  55. package/package.json +3 -3
package/README.md CHANGED
@@ -30,8 +30,8 @@ workflow.
30
30
 
31
31
  - `describeEval(...)` binds exactly one harness to a suite
32
32
  - the suite callback receives a fixture-backed Vitest `it`
33
- - `run(input, { metadata? })` executes the harness explicitly and returns a
34
- normalized `HarnessRun`
33
+ - `run(input)` executes the harness explicitly and returns a normalized
34
+ `HarnessRun`
35
35
  - the returned `result.output` is the app-facing value you assert on directly
36
36
  - the returned `result.session` is the canonical JSON-serializable transcript for
37
37
  reporting, replay, tool assertions, and judges
@@ -41,19 +41,18 @@ workflow.
41
41
  that do not return traces themselves. Span attributes include typed
42
42
  OpenTelemetry GenAI semantic keys while still allowing provider-specific
43
43
  metadata
44
- - scenario-specific judge criteria can live in `input`; use `metadata` for
45
- per-run expectations or harness configuration that are not part of the
46
- scenario payload
44
+ - scenario-specific judge criteria should live in `input` or explicit matcher
45
+ options, depending on whether the app or only the judge needs them
47
46
  - suite-level `judges` are optional and run automatically after each `run(...)`
48
47
  - suite-level `judgeThreshold` controls fail-on-score for those automatic judges
49
48
  - every judge is a named object with `assess(ctx)`
50
49
  - every judge receives `JudgeContext` with typed `input`, typed `output`, the
51
- normalized run/session, tool calls, and metadata; `output` is only optional
50
+ normalized run/session, and tool calls; `output` is only optional
52
51
  when the harness output type includes `undefined`
53
52
  - judges own their prompt, rubric, and parsing; LLM-backed judges use
54
53
  `ctx.runJudge(...)` from a configured `judgeHarness`
55
54
  - explicit judge assertions use
56
- `await expect(result).toSatisfyJudge(judge, context)`
55
+ `await expect(result).toSatisfyJudge(judge, options)`
57
56
 
58
57
  ## Explicit Run Example
59
58
 
@@ -80,18 +79,16 @@ describeEval(
80
79
  agent: () => createRefundAgent(),
81
80
  }),
82
81
  judgeHarness,
83
- judges: [FactualityJudge()],
82
+ judges: [
83
+ FactualityJudge({
84
+ expected: "The refund request is approved.",
85
+ }),
86
+ ],
84
87
  judgeThreshold: 0.6,
85
88
  },
86
89
  (it) => {
87
90
  it("approves a refundable invoice", async ({ run }) => {
88
- const result = await run("Refund invoice inv_123", {
89
- metadata: {
90
- expected: "The refund request is approved.",
91
- expectedStatus: "approved",
92
- expectedTools: ["lookupInvoice", "createRefund"],
93
- },
94
- });
91
+ const result = await run("Refund invoice inv_123");
95
92
 
96
93
  expect(result.output).toMatchObject({ status: "approved" });
97
94
  expect(toolCalls(result.session).map((call) => call.name)).toEqual([
@@ -121,13 +118,11 @@ describeEval("refund agent", { harness }, (it) => {
121
118
  input: "Refund invoice inv_404",
122
119
  expectedStatus: "denied",
123
120
  },
124
- ])("$name", async ({ input, ...metadata }, { run }) => {
125
- const result = await run(input, {
126
- metadata,
127
- });
121
+ ])("$name", async ({ input, expectedStatus }, { run }) => {
122
+ const result = await run(input);
128
123
 
129
124
  expect(result.output).toMatchObject({
130
- status: metadata.expectedStatus,
125
+ status: expectedStatus,
131
126
  });
132
127
  });
133
128
  });
@@ -213,7 +208,7 @@ First-party harness packages are conveniences, not the only supported path. If
213
208
  you need to test a full application flow, use `createHarness(...)` to run your
214
209
  app through its normal entrypoint and return the app-facing output. Judges own
215
210
  their prompt/rubric text separately from the system under test.
216
- When generics are needed, use `createHarness<Input, Output, Metadata>(...)`.
211
+ When generics are needed, use `createHarness<Input, Output>(...)`.
217
212
 
218
213
  ```ts
219
214
  import {
@@ -221,7 +216,6 @@ import {
221
216
  createJudge,
222
217
  createJudgeHarness,
223
218
  describeEval,
224
- type JudgeContext,
225
219
  } from "vitest-evals";
226
220
 
227
221
  type AppEvent = {
@@ -238,14 +232,12 @@ type AppEvalInput = {
238
232
  };
239
233
  };
240
234
 
241
- type AppEvalMetadata = Record<string, never>;
242
-
243
235
  type AppOutput = {
244
236
  replies: Array<{ text: string }>;
245
237
  sideEffects: string[];
246
238
  };
247
239
 
248
- const appHarness = createHarness<AppEvalInput, AppOutput, AppEvalMetadata>({
240
+ const appHarness = createHarness<AppEvalInput, AppOutput>({
249
241
  name: "custom-app",
250
242
  run: async ({ input, signal }) => {
251
243
  const result = await replayAppEvents(input.events, {
@@ -271,9 +263,9 @@ const judgeHarness = createJudgeHarness({
271
263
  promptJudgeModel({ prompt, signal }),
272
264
  });
273
265
 
274
- const AppRubricJudge = createJudge(
266
+ const AppRubricJudge = createJudge<AppEvalInput, AppOutput>(
275
267
  "AppRubricJudge",
276
- async (ctx: JudgeContext<AppEvalInput, AppOutput, AppEvalMetadata>) => {
268
+ async (ctx) => {
277
269
  if (!ctx.runJudge) {
278
270
  throw new Error("AppRubricJudge requires a configured judgeHarness.");
279
271
  }
@@ -323,11 +315,11 @@ describeEval(
323
315
  Use `Harness.run(...)` for the application under test. Calling
324
316
  `ctx.harness.run(...)` from inside a judge runs the application a second time,
325
317
  so reserve that for judges that intentionally need a second execution. Put
326
- criteria on `input` when they are part of the scenario itself; use per-run
327
- `metadata` for harness configuration or expectations that are not part of the
328
- scenario payload. `createHarness(...)` builds a default user/assistant session
329
- from `input` and typed `output`; return a full `HarnessRun` only when you need
330
- exact session control.
318
+ criteria on `input` when they are part of the scenario itself; pass
319
+ case-specific judge criteria through matcher options, or configure suite-wide
320
+ criteria on the judge instance. `createHarness(...)` builds a default
321
+ user/assistant session from `input` and typed `output`; return a full
322
+ `HarnessRun` only when you need exact session control.
331
323
 
332
324
  Provider setup and rubric parsing stay in your judge. The core
333
325
  package only requires the judge to return a `JudgeResult` with a score and
@@ -449,7 +441,7 @@ so use that only when a second run is intentional.
449
441
 
450
442
  For an `EvalHarnessRun` returned by fixture `run(...)`,
451
443
  `toSatisfyJudge(...)` uses the run's typed `output` and reuses the registered
452
- input and metadata. It requires any custom judge params and rejects judges whose
444
+ input. It requires any custom judge params and rejects judges whose
453
445
  output type cannot assess the received value. Inside an eval test,
454
446
  matcher calls on registered output objects or session objects reuse that exact
455
447
  run context when the value can be registered by reference, so
@@ -457,10 +449,10 @@ run context when the value can be registered by reference, so
457
449
  outputs. Other raw values fall back to the current test's most recent
458
450
  `run(...)` context. For
459
451
  manually-created runs or values outside an eval context, pass any required
460
- `input`, `metadata`, or `harness` in matcher options. Structured or
452
+ `input` or `harness` in matcher options. Structured or
461
453
  programmatic result checks should usually assert on `result.output` directly.
462
454
  When a judge needs richer normalized context or the configured suite harness,
463
- type it with `JudgeContext`.
455
+ type it with `createJudge<Input, Output>(...)` or `JudgeContext<Input, Output>`.
464
456
 
465
457
  When you only need deterministic contract checks, built-ins such as
466
458
  `StructuredOutputJudge()` and `ToolCallJudge()` are still available.
@@ -30,7 +30,7 @@ type OutputField<TOutput extends JsonValue | undefined> = undefined extends TOut
30
30
  } : {
31
31
  output: TOutput;
32
32
  };
33
- /** Per-run metadata shape accepted by harnesses and eval tests. */
33
+ /** Generic JSON-like metadata record used by normalized artifacts and reports. */
34
34
  type HarnessMetadata = Record<string, unknown>;
35
35
  /**
36
36
  * Runtime context passed from the eval fixture into a harness run.
@@ -52,9 +52,7 @@ type HarnessMetadata = Record<string, unknown>;
52
52
  * };
53
53
  * ```
54
54
  */
55
- type HarnessContext<TMetadata extends HarnessMetadata = HarnessMetadata> = {
56
- /** Per-run metadata passed through `run(input, { metadata })`. */
57
- metadata: Readonly<TMetadata>;
55
+ type HarnessContext = {
58
56
  /** Abort signal from Vitest when available. */
59
57
  signal?: AbortSignal;
60
58
  /** Mutable JSON-safe artifact bag shared with the harness. */
@@ -75,11 +73,11 @@ type HarnessContext<TMetadata extends HarnessMetadata = HarnessMetadata> = {
75
73
  * };
76
74
  * ```
77
75
  */
78
- type Harness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata> = {
76
+ type Harness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined> = {
79
77
  /** Stable harness name used in reports. */
80
78
  name: string;
81
79
  /** Executes the system under test and returns a normalized run. */
82
- run: (input: TInput, context: HarnessContext<TMetadata>) => Promise<HarnessRun<TOutput>>;
80
+ run: (input: TInput, context: HarnessContext) => Promise<HarnessRun<TOutput>>;
83
81
  };
84
82
  /** Value or promise accepted by lightweight harness callbacks. */
85
83
  type MaybePromise<T> = T | Promise<T>;
@@ -148,17 +146,15 @@ type SimpleHarnessResult<TOutput extends JsonValue | undefined = JsonValue | und
148
146
  /** Either a complete normalized run or a lightweight result to normalize. */
149
147
  type HarnessResultLike<TOutput extends JsonValue | undefined = JsonValue | undefined> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;
150
148
  /** Arguments passed to the `createHarness(...)` convenience callback. */
151
- type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {
149
+ type CreateHarnessRunArgs<TInput> = {
152
150
  /** Original input passed to `run(input)`. */
153
151
  input: TInput;
154
- /** Read-only metadata passed to `run(input, { metadata })`. */
155
- metadata: Readonly<TMetadata>;
156
152
  /** Abort signal from Vitest when available. */
157
153
  signal?: AbortSignal;
158
154
  /** Mutable run artifact bag. */
159
- artifacts: HarnessContext<TMetadata>["artifacts"];
155
+ artifacts: HarnessContext["artifacts"];
160
156
  /** Stores one JSON-safe artifact on the current run. */
161
- setArtifact: HarnessContext<TMetadata>["setArtifact"];
157
+ setArtifact: HarnessContext["setArtifact"];
162
158
  };
163
159
  /**
164
160
  * Options for creating a lightweight custom application harness.
@@ -173,11 +169,11 @@ type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {
173
169
  * };
174
170
  * ```
175
171
  */
176
- type CreateHarnessOptions<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata> = {
172
+ type CreateHarnessOptions<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined> = {
177
173
  /** Stable harness name used in reports. */
178
174
  name: string;
179
175
  /** Executes application code and returns either a lightweight result or full `HarnessRun`. */
180
- run: (args: CreateHarnessRunArgs<TInput, TMetadata>) => MaybePromise<HarnessResultLike<TOutput>>;
176
+ run: (args: CreateHarnessRunArgs<TInput>) => MaybePromise<HarnessResultLike<TOutput>>;
181
177
  };
182
178
  /** Returns true when a value exposes a callable method with the given name. */
183
179
  declare function hasCallableMethod(value: unknown, methodName: string): boolean;
@@ -200,15 +196,14 @@ declare function normalizeContent(value: unknown): JsonValue;
200
196
  *
201
197
  * export const refundHarness = createHarness<
202
198
  * string,
203
- * { status: "approved" | "denied" },
204
- * { expected: { status: "approved" | "denied" } }
199
+ * { status: "approved" | "denied" }
205
200
  * >({
206
201
  * name: "refund-agent",
207
- * run: async ({ input, metadata, setArtifact }) => {
208
- * const result = await runRefundFlow(input, metadata);
202
+ * run: async ({ input, setArtifact }) => {
203
+ * const result = await runRefundFlow(input);
209
204
  * const output = { status: result.status };
210
205
  *
211
- * setArtifact("case", { expected: metadata.expected.status });
206
+ * setArtifact("case", { invoiceId: result.invoiceId });
212
207
  *
213
208
  * return {
214
209
  * output,
@@ -219,7 +214,7 @@ declare function normalizeContent(value: unknown): JsonValue;
219
214
  * });
220
215
  * ```
221
216
  */
222
- declare function createHarness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata>(options: CreateHarnessOptions<TInput, TOutput, TMetadata>): Harness<TInput, TOutput, TMetadata>;
217
+ declare function createHarness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined>(options: CreateHarnessOptions<TInput, TOutput>): Harness<TInput, TOutput>;
223
218
  /**
224
219
  * Normalizes a lightweight harness result into the reporter-facing run shape.
225
220
  *
@@ -238,7 +233,7 @@ declare function createHarness<TInput = unknown, TOutput extends JsonValue | und
238
233
  * expect(toolCalls(run.session)).toHaveLength(1);
239
234
  * ```
240
235
  */
241
- declare function normalizeHarnessRun<TInput = unknown, TMetadata extends HarnessMetadata = HarnessMetadata, TOutput extends JsonValue | undefined = JsonValue | undefined>(input: TInput, result: HarnessResultLike<TOutput>, context?: HarnessContext<TMetadata>): HarnessRun<TOutput>;
236
+ declare function normalizeHarnessRun<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined>(input: TInput, result: HarnessResultLike<TOutput>, context?: HarnessContext): HarnessRun<TOutput>;
242
237
  /**
243
238
  * Builds a JSON-safe failed run for errors that happen before a harness can return.
244
239
  *
package/dist/harness.d.ts CHANGED
@@ -30,7 +30,7 @@ type OutputField<TOutput extends JsonValue | undefined> = undefined extends TOut
30
30
  } : {
31
31
  output: TOutput;
32
32
  };
33
- /** Per-run metadata shape accepted by harnesses and eval tests. */
33
+ /** Generic JSON-like metadata record used by normalized artifacts and reports. */
34
34
  type HarnessMetadata = Record<string, unknown>;
35
35
  /**
36
36
  * Runtime context passed from the eval fixture into a harness run.
@@ -52,9 +52,7 @@ type HarnessMetadata = Record<string, unknown>;
52
52
  * };
53
53
  * ```
54
54
  */
55
- type HarnessContext<TMetadata extends HarnessMetadata = HarnessMetadata> = {
56
- /** Per-run metadata passed through `run(input, { metadata })`. */
57
- metadata: Readonly<TMetadata>;
55
+ type HarnessContext = {
58
56
  /** Abort signal from Vitest when available. */
59
57
  signal?: AbortSignal;
60
58
  /** Mutable JSON-safe artifact bag shared with the harness. */
@@ -75,11 +73,11 @@ type HarnessContext<TMetadata extends HarnessMetadata = HarnessMetadata> = {
75
73
  * };
76
74
  * ```
77
75
  */
78
- type Harness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata> = {
76
+ type Harness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined> = {
79
77
  /** Stable harness name used in reports. */
80
78
  name: string;
81
79
  /** Executes the system under test and returns a normalized run. */
82
- run: (input: TInput, context: HarnessContext<TMetadata>) => Promise<HarnessRun<TOutput>>;
80
+ run: (input: TInput, context: HarnessContext) => Promise<HarnessRun<TOutput>>;
83
81
  };
84
82
  /** Value or promise accepted by lightweight harness callbacks. */
85
83
  type MaybePromise<T> = T | Promise<T>;
@@ -148,17 +146,15 @@ type SimpleHarnessResult<TOutput extends JsonValue | undefined = JsonValue | und
148
146
  /** Either a complete normalized run or a lightweight result to normalize. */
149
147
  type HarnessResultLike<TOutput extends JsonValue | undefined = JsonValue | undefined> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;
150
148
  /** Arguments passed to the `createHarness(...)` convenience callback. */
151
- type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {
149
+ type CreateHarnessRunArgs<TInput> = {
152
150
  /** Original input passed to `run(input)`. */
153
151
  input: TInput;
154
- /** Read-only metadata passed to `run(input, { metadata })`. */
155
- metadata: Readonly<TMetadata>;
156
152
  /** Abort signal from Vitest when available. */
157
153
  signal?: AbortSignal;
158
154
  /** Mutable run artifact bag. */
159
- artifacts: HarnessContext<TMetadata>["artifacts"];
155
+ artifacts: HarnessContext["artifacts"];
160
156
  /** Stores one JSON-safe artifact on the current run. */
161
- setArtifact: HarnessContext<TMetadata>["setArtifact"];
157
+ setArtifact: HarnessContext["setArtifact"];
162
158
  };
163
159
  /**
164
160
  * Options for creating a lightweight custom application harness.
@@ -173,11 +169,11 @@ type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {
173
169
  * };
174
170
  * ```
175
171
  */
176
- type CreateHarnessOptions<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata> = {
172
+ type CreateHarnessOptions<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined> = {
177
173
  /** Stable harness name used in reports. */
178
174
  name: string;
179
175
  /** Executes application code and returns either a lightweight result or full `HarnessRun`. */
180
- run: (args: CreateHarnessRunArgs<TInput, TMetadata>) => MaybePromise<HarnessResultLike<TOutput>>;
176
+ run: (args: CreateHarnessRunArgs<TInput>) => MaybePromise<HarnessResultLike<TOutput>>;
181
177
  };
182
178
  /** Returns true when a value exposes a callable method with the given name. */
183
179
  declare function hasCallableMethod(value: unknown, methodName: string): boolean;
@@ -200,15 +196,14 @@ declare function normalizeContent(value: unknown): JsonValue;
200
196
  *
201
197
  * export const refundHarness = createHarness<
202
198
  * string,
203
- * { status: "approved" | "denied" },
204
- * { expected: { status: "approved" | "denied" } }
199
+ * { status: "approved" | "denied" }
205
200
  * >({
206
201
  * name: "refund-agent",
207
- * run: async ({ input, metadata, setArtifact }) => {
208
- * const result = await runRefundFlow(input, metadata);
202
+ * run: async ({ input, setArtifact }) => {
203
+ * const result = await runRefundFlow(input);
209
204
  * const output = { status: result.status };
210
205
  *
211
- * setArtifact("case", { expected: metadata.expected.status });
206
+ * setArtifact("case", { invoiceId: result.invoiceId });
212
207
  *
213
208
  * return {
214
209
  * output,
@@ -219,7 +214,7 @@ declare function normalizeContent(value: unknown): JsonValue;
219
214
  * });
220
215
  * ```
221
216
  */
222
- declare function createHarness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined, TMetadata extends HarnessMetadata = HarnessMetadata>(options: CreateHarnessOptions<TInput, TOutput, TMetadata>): Harness<TInput, TOutput, TMetadata>;
217
+ declare function createHarness<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined>(options: CreateHarnessOptions<TInput, TOutput>): Harness<TInput, TOutput>;
223
218
  /**
224
219
  * Normalizes a lightweight harness result into the reporter-facing run shape.
225
220
  *
@@ -238,7 +233,7 @@ declare function createHarness<TInput = unknown, TOutput extends JsonValue | und
238
233
  * expect(toolCalls(run.session)).toHaveLength(1);
239
234
  * ```
240
235
  */
241
- declare function normalizeHarnessRun<TInput = unknown, TMetadata extends HarnessMetadata = HarnessMetadata, TOutput extends JsonValue | undefined = JsonValue | undefined>(input: TInput, result: HarnessResultLike<TOutput>, context?: HarnessContext<TMetadata>): HarnessRun<TOutput>;
236
+ declare function normalizeHarnessRun<TInput = unknown, TOutput extends JsonValue | undefined = JsonValue | undefined>(input: TInput, result: HarnessResultLike<TOutput>, context?: HarnessContext): HarnessRun<TOutput>;
242
237
  /**
243
238
  * Builds a JSON-safe failed run for errors that happen before a harness can return.
244
239
  *
package/dist/harness.js CHANGED
@@ -129,7 +129,6 @@ function createHarness(options) {
129
129
  try {
130
130
  const result = await options.run({
131
131
  input,
132
- metadata: context.metadata,
133
132
  signal: context.signal,
134
133
  artifacts: context.artifacts,
135
134
  setArtifact: context.setArtifact
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/harness.ts"],"sourcesContent":["import {\n assistantMessages,\n failedSpans,\n latestAssistantMessageContent,\n messagesByRole,\n spans,\n spansByKind,\n systemMessages,\n toolCalls,\n toolMessages,\n userMessages,\n} from \"@vitest-evals/core\";\nimport type {\n GenAiOperationName,\n HarnessRun,\n HarnessRunError,\n JsonPrimitive,\n JsonValue,\n NormalizedMessage,\n NormalizedSession,\n NormalizedSpan,\n NormalizedSpanAttributes,\n NormalizedSpanEvent,\n NormalizedTrace,\n TimingSummary,\n ToolCallRecord,\n UsageSummary,\n} from \"@vitest-evals/core\";\n\nexport {\n assistantMessages,\n failedSpans,\n latestAssistantMessageContent,\n messagesByRole,\n spans,\n spansByKind,\n systemMessages,\n toolCalls,\n toolMessages,\n userMessages,\n} from \"@vitest-evals/core\";\nexport type {\n GenAiOperationName,\n GenAiOutputType,\n GenAiProviderName,\n GenAiSemanticAttributeKey,\n GenAiSemanticAttributes,\n GenAiTokenType,\n GenAiToolType,\n HarnessRun,\n HarnessRunError,\n JsonPrimitive,\n JsonValue,\n NormalizedMessage,\n NormalizedSession,\n NormalizedSpan,\n NormalizedSpanAttributeKey,\n NormalizedSpanAttributes,\n NormalizedSpanEvent,\n NormalizedTrace,\n OpenTelemetrySemanticAttributeKey,\n OpenTelemetrySemanticAttributes,\n TimingSummary,\n ToolCallRecord,\n UsageSummary,\n} from \"@vitest-evals/core\";\n\n/** Options for converting normalized tool calls into trace spans. */\nexport type CreateToolCallSpansOptions = {\n /** Trace id to attach to each generated tool span. */\n traceId?: string;\n /** Parent span id to attach to each generated tool span. */\n parentId?: string;\n /** Prefix used to create internal span ids instead of reusing tool-call ids. */\n spanIdPrefix?: string;\n};\n\n/** Options for attaching a fallback run trace to a harness result. */\nexport type EnsureRunTraceOptions = {\n /** Human-readable run or harness name. */\n name: string;\n /** Wall-clock start time for the harness run. */\n startedAt: Date;\n /** Wall-clock finish time for the harness run. */\n finishedAt: Date;\n /** Optional trace id. A generated id is used when omitted. */\n id?: string;\n /** GenAI operation name to place on the root run span. */\n operationName?: GenAiOperationName;\n /** Optional JSON-safe source marker for the trace metadata. */\n source?: string;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/** Per-run metadata shape accepted by harnesses and eval tests. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/**\n * Runtime context passed from the eval fixture into a harness run.\n *\n * @example\n * ```ts\n * const harness: Harness<string> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * context.setArtifact(\"inputLength\", input.length);\n *\n * return {\n * output: undefined,\n * session: { messages: [{ role: \"user\", content: input }] },\n * usage: {},\n * errors: [],\n * };\n * },\n * };\n * ```\n */\nexport type HarnessContext<\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Per-run metadata passed through `run(input, { metadata })`. */\n metadata: Readonly<TMetadata>;\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable JSON-safe artifact bag shared with the harness. */\n artifacts: Record<string, JsonValue>;\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/**\n * Adapter that executes the system under test and returns a normalized run.\n *\n * @example\n * ```ts\n * const harness: Harness<string, { status: \"approved\" | \"denied\" }> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * return normalizeHarnessRun(input, await runRefundFlow(input), context);\n * },\n * };\n * ```\n */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes the system under test and returns a normalized run. */\n run: (\n input: TInput,\n context: HarnessContext<TMetadata>,\n ) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n /** Raw tool arguments accepted by `createHarness(...)` before normalization. */\n arguments?: unknown;\n /** Raw tool result accepted by `createHarness(...)` before normalization. */\n result?: unknown;\n /** Raw tool error accepted by `createHarness(...)` before normalization. */\n error?: unknown;\n /** Raw tool metadata accepted by `createHarness(...)` before normalization. */\n metadata?: Record<string, unknown>;\n};\n\n/** Lightweight span event accepted by `createHarness(...)` results. */\nexport type SimpleSpanEvent = Omit<NormalizedSpanEvent, \"attributes\"> & {\n /** Raw event attributes accepted by `createHarness(...)` before normalization. */\n attributes?: Record<string, unknown>;\n};\n\n/** Lightweight span record accepted by `createHarness(...)` results. */\nexport type SimpleSpanRecord = Omit<\n NormalizedSpan,\n \"attributes\" | \"error\" | \"events\"\n> & {\n /** Raw span attributes accepted by `createHarness(...)` before normalization. */\n attributes?: Record<string, unknown>;\n /** Raw span error accepted by `createHarness(...)` before normalization. */\n error?: unknown;\n /** Raw span events accepted by `createHarness(...)` before normalization. */\n events?: SimpleSpanEvent[];\n};\n\n/** Lightweight trace record accepted by `createHarness(...)` results. */\nexport type SimpleTraceRecord = Omit<NormalizedTrace, \"metadata\" | \"spans\"> & {\n /** Raw trace metadata accepted by `createHarness(...)` before normalization. */\n metadata?: Record<string, unknown>;\n /** Lightweight spans to normalize into the trace. */\n spans: SimpleSpanRecord[];\n};\n\n/**\n * Lightweight result shape normalized by `createHarness(...)`.\n *\n * @example\n * ```ts\n * const result: SimpleHarnessResult<{ status: \"approved\" }> = {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { totalTokens: 260 },\n * };\n * ```\n */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n /** Pre-normalized transcript messages. When omitted, a default user/assistant transcript is created. */\n messages?: NormalizedMessage[];\n /** Lightweight tool-call records to normalize into the session. */\n toolCalls?: SimpleToolCallRecord[];\n /** Usage summary to attach to the run. */\n usage?: UsageSummary;\n /** Timing summary to attach to the run. */\n timings?: TimingSummary;\n /** Raw artifact values to normalize and merge into the run. */\n artifacts?: Record<string, unknown>;\n /** Lightweight traces and spans to normalize into the run. */\n traces?: SimpleTraceRecord[];\n /** Raw session metadata to normalize into the session. */\n metadata?: Record<string, unknown>;\n /** Raw errors to normalize into the run. */\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {\n /** Original input passed to `run(input)`. */\n input: TInput;\n /** Read-only metadata passed to `run(input, { metadata })`. */\n metadata: Readonly<TMetadata>;\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable run artifact bag. */\n artifacts: HarnessContext<TMetadata>[\"artifacts\"];\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: HarnessContext<TMetadata>[\"setArtifact\"];\n};\n\n/**\n * Options for creating a lightweight custom application harness.\n *\n * @example\n * ```ts\n * const options: CreateHarnessOptions<string, { status: \"approved\" }> = {\n * name: \"refund-agent\",\n * run: async ({ input }) => ({\n * output: await classifyRefund(input),\n * }),\n * };\n * ```\n */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes application code and returns either a lightweight result or full `HarnessRun`. */\n run: (\n args: CreateHarnessRunArgs<TInput, TMetadata>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"boolean\" ||\n (typeof value === \"number\" && Number.isFinite(value))\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[], seen: WeakSet<object>) {\n if (seen.has(value)) {\n return undefined;\n }\n\n seen.add(value);\n const normalized = value.map((item) => {\n const normalized = toJsonValueInternal(item, seen);\n return normalized === undefined ? null : normalized;\n });\n seen.delete(value);\n\n return normalized;\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n seen: WeakSet<object>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n if (seen.has(value)) {\n return normalized;\n }\n\n seen.add(value);\n try {\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValueInternal(entryValue, seen);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n } finally {\n seen.delete(value);\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n return toJsonValueInternal(value, new WeakSet());\n}\n\nfunction toJsonValueInternal(\n value: unknown,\n seen: WeakSet<object>,\n): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (\n value !== null &&\n typeof value === \"object\" &&\n seen.has(value as object)\n ) {\n return undefined;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value, seen);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value, seen);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value, new WeakSet());\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/**\n * Creates a harness from the common \"run app code and return output\" shape.\n *\n * @param options - Harness name plus the callback that executes app code.\n *\n * @example\n * ```ts\n * import { createHarness } from \"vitest-evals\";\n *\n * export const refundHarness = createHarness<\n * string,\n * { status: \"approved\" | \"denied\" },\n * { expected: { status: \"approved\" | \"denied\" } }\n * >({\n * name: \"refund-agent\",\n * run: async ({ input, metadata, setArtifact }) => {\n * const result = await runRefundFlow(input, metadata);\n * const output = { status: result.status };\n *\n * setArtifact(\"case\", { expected: metadata.expected.status });\n *\n * return {\n * output,\n * toolCalls: result.toolCalls,\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * };\n * },\n * });\n * ```\n */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata> {\n const harness: Harness<TInput, TOutput, TMetadata> = {\n name: options.name,\n run: async (input, context) => {\n const startedAt = new Date();\n\n try {\n const result = await options.run({\n input,\n metadata: context.metadata,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n const run = normalizeHarnessRun(input, result, context);\n ensureRunTrace(run, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n\n return run;\n } catch (error) {\n const partialRun = getHarnessRunFromError(error);\n if (partialRun) {\n if (\n Object.keys(context.artifacts).length > 0 &&\n !partialRun.artifacts\n ) {\n partialRun.artifacts = context.artifacts;\n }\n ensureRunTrace(partialRun, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n throw attachHarnessRunToError(error, partialRun);\n }\n\n const failedRun = createFailedHarnessRun(input, error, {\n artifacts: context.artifacts,\n });\n ensureRunTrace(failedRun, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n\n throw attachHarnessRunToError(error, failedRun);\n }\n },\n };\n\n return harness;\n}\n\n/**\n * Normalizes a lightweight harness result into the reporter-facing run shape.\n *\n * @param input - Original input passed to the harness.\n * @param result - Lightweight result or pre-normalized harness run.\n * @param context - Optional per-run context used to merge artifacts.\n *\n * @example\n * ```ts\n * const run = normalizeHarnessRun(\"Refund invoice inv_123\", {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * });\n *\n * expect(toolCalls(run.session)).toHaveLength(1);\n * ```\n */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext<TMetadata>,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n const traces = normalizeSimpleTraces(result.traces);\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n ...(traces ? { traces } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\n/**\n * Builds a JSON-safe failed run for errors that happen before a harness can return.\n *\n * @param input - Original input passed to the harness.\n * @param error - Error thrown by setup or execution.\n * @param options - Optional artifacts to preserve on the failed run.\n */\nexport function createFailedHarnessRun(\n input: unknown,\n error: unknown,\n options: { artifacts?: Record<string, JsonValue> } = {},\n): HarnessRun {\n const artifacts = options.artifacts;\n\n return {\n session: {\n messages: [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ],\n },\n usage: {},\n ...(artifacts && Object.keys(artifacts).length > 0 ? { artifacts } : {}),\n errors: [serializeError(error)],\n };\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\nfunction normalizeSimpleTraces(\n traces: SimpleTraceRecord[] | undefined,\n): NormalizedTrace[] | undefined {\n if (!Array.isArray(traces)) {\n return undefined;\n }\n\n const normalized = traces\n .map(normalizeSimpleTrace)\n .filter((trace): trace is NormalizedTrace => Boolean(trace));\n\n return normalized.length > 0 ? normalized : undefined;\n}\n\nfunction normalizeSimpleTrace(trace: unknown): NormalizedTrace | undefined {\n if (!isJsonRecord(trace)) {\n return undefined;\n }\n\n const {\n metadata: rawMetadata,\n spans: rawSpans,\n ...traceFields\n } = trace as Partial<SimpleTraceRecord>;\n const spans = (Array.isArray(rawSpans) ? rawSpans : [])\n .map((span) => normalizeSimpleSpan(span))\n .filter((span): span is NormalizedSpan => Boolean(span));\n const metadata = isJsonRecord(rawMetadata)\n ? normalizeMetadata(rawMetadata)\n : undefined;\n\n if (spans.length === 0 && !traceFields.id && !traceFields.name) {\n return undefined;\n }\n\n return {\n ...traceFields,\n ...(metadata ? { metadata } : {}),\n spans,\n };\n}\n\nfunction normalizeSimpleSpan(span: unknown): NormalizedSpan | undefined {\n if (!isJsonRecord(span) || typeof span.name !== \"string\" || !span.name) {\n return undefined;\n }\n\n const {\n attributes: rawAttributes,\n error: rawError,\n events: rawEvents,\n ...spanFields\n } = span as Partial<SimpleSpanRecord> & { name: string };\n const attributes = rawAttributes\n ? isJsonRecord(rawAttributes)\n ? normalizeMetadata(rawAttributes)\n : undefined\n : undefined;\n const error = normalizeSpanError(rawError);\n const events = normalizeSimpleSpanEvents(rawEvents);\n\n return {\n ...spanFields,\n ...(attributes\n ? { attributes: attributes as NormalizedSpanAttributes }\n : {}),\n ...(error ? { error } : {}),\n ...(events ? { events } : {}),\n };\n}\n\nfunction normalizeSimpleSpanEvents(\n events: unknown,\n): NormalizedSpanEvent[] | undefined {\n if (!Array.isArray(events)) {\n return undefined;\n }\n\n const normalized = events\n .map(normalizeSimpleSpanEvent)\n .filter((event): event is NormalizedSpanEvent => Boolean(event));\n\n return normalized.length > 0 ? normalized : undefined;\n}\n\nfunction normalizeSimpleSpanEvent(\n event: unknown,\n): NormalizedSpanEvent | undefined {\n if (!isJsonRecord(event) || typeof event.name !== \"string\" || !event.name) {\n return undefined;\n }\n\n const { attributes: rawAttributes, ...eventFields } =\n event as Partial<SimpleSpanEvent> & { name: string };\n const attributes = rawAttributes\n ? isJsonRecord(rawAttributes)\n ? normalizeMetadata(rawAttributes)\n : undefined\n : undefined;\n\n return {\n ...eventFields,\n ...(attributes\n ? { attributes: attributes as NormalizedSpanAttributes }\n : {}),\n };\n}\n\n/** Normalizes arbitrary span errors while preserving object-shaped messages. */\nexport function normalizeSpanError(\n error: unknown,\n): NormalizedSpan[\"error\"] | undefined {\n if (error === undefined) {\n return undefined;\n }\n\n if (error instanceof Error) {\n const details = normalizeMetadata(\n error as unknown as Record<string, unknown>,\n );\n\n return {\n ...(details ?? {}),\n type: error.name,\n message: error.message,\n };\n }\n\n if (\n error &&\n typeof error === \"object\" &&\n !Array.isArray(error) &&\n typeof (error as { message?: unknown }).message === \"string\"\n ) {\n const normalized = normalizeMetadata(error as Record<string, unknown>);\n const { message, type, ...details } = normalized ?? {};\n\n return {\n ...details,\n message: message as string,\n ...(typeof type === \"string\" ? { type } : {}),\n };\n }\n\n const serialized = serializeError(error);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\n/** Normalizes raw span attributes into the JSON-safe span attribute shape. */\nexport function normalizeSpanAttributes(\n attributes: Record<string, unknown>,\n): NormalizedSpanAttributes | undefined {\n return normalizeMetadata(attributes) as NormalizedSpanAttributes | undefined;\n}\n\n/** Builds common OpenTelemetry GenAI usage attributes from a usage summary. */\nexport function createGenAiUsageAttributes(\n usage: UsageSummary | undefined,\n options: { provider?: string } = {},\n) {\n return {\n \"gen_ai.provider.name\": usage?.provider ?? options.provider,\n \"gen_ai.request.model\": usage?.model,\n \"gen_ai.response.model\": usage?.model,\n \"gen_ai.usage.input_tokens\": usage?.inputTokens,\n \"gen_ai.usage.output_tokens\": usage?.outputTokens,\n \"gen_ai.usage.reasoning.output_tokens\": usage?.reasoningTokens,\n } satisfies Record<string, unknown>;\n}\n\n/**\n * Converts normalized tool-call records into trace spans.\n *\n * Tool-call ids are preserved as GenAI attributes. Pass `spanIdPrefix` when the\n * spans belong to a known trace so span ids stay internally unique.\n */\nexport function createToolCallSpans(\n calls: ToolCallRecord[],\n options: CreateToolCallSpansOptions = {},\n): NormalizedSpan[] {\n return calls.map((call, index) => {\n const spanError = call.error ? normalizeSpanError(call.error) : undefined;\n const spanId = options.spanIdPrefix\n ? `${options.spanIdPrefix}:${index + 1}`\n : call.id;\n\n return {\n ...(spanId ? { id: spanId } : {}),\n ...(options.traceId ? { traceId: options.traceId } : {}),\n ...(options.parentId ? { parentId: options.parentId } : {}),\n name: call.name,\n kind: \"tool\",\n ...(call.startedAt ? { startedAt: call.startedAt } : {}),\n ...(call.finishedAt ? { finishedAt: call.finishedAt } : {}),\n ...(call.durationMs !== undefined ? { durationMs: call.durationMs } : {}),\n status: spanError ? \"error\" : \"ok\",\n ...(spanError ? { error: spanError } : {}),\n attributes: normalizeSpanAttributes({\n \"gen_ai.operation.name\": \"execute_tool\",\n \"gen_ai.tool.name\": call.name,\n \"gen_ai.tool.type\": \"function\",\n ...(call.id ? { \"gen_ai.tool.call.id\": call.id } : {}),\n ...(call.arguments !== undefined\n ? { \"gen_ai.tool.call.arguments\": call.arguments }\n : {}),\n ...(call.result !== undefined\n ? { \"gen_ai.tool.call.result\": call.result }\n : {}),\n }),\n } satisfies NormalizedSpan;\n });\n}\n\n/**\n * Attaches a fallback run trace when a harness result does not already contain spans.\n *\n * This keeps custom harnesses inspectable while first-party harness packages\n * remain free to attach richer native traces.\n */\nexport function ensureRunTrace(\n run: HarnessRun,\n options: EnsureRunTraceOptions,\n): NormalizedTrace | undefined {\n if (spans(run).length > 0) {\n return undefined;\n }\n\n const traceId = options.id ?? createGeneratedTraceId();\n const rootSpanId = `${traceId}:run`;\n const durationMs = options.finishedAt.getTime() - options.startedAt.getTime();\n const rootError =\n run.errors.length > 0 ? normalizeSpanError(run.errors[0]) : undefined;\n const runSpan: NormalizedSpan = {\n id: rootSpanId,\n traceId,\n name: options.name,\n kind: \"run\",\n startedAt: options.startedAt.toISOString(),\n finishedAt: options.finishedAt.toISOString(),\n durationMs,\n status: rootError ? \"error\" : \"ok\",\n ...(rootError ? { error: rootError } : {}),\n attributes: normalizeSpanAttributes({\n \"gen_ai.operation.name\": options.operationName ?? \"invoke_workflow\",\n \"gen_ai.workflow.name\": options.name,\n ...createGenAiUsageAttributes(run.usage),\n }),\n };\n const toolSpans = createToolCallSpans(toolCalls(run.session), {\n traceId,\n parentId: rootSpanId,\n spanIdPrefix: `${traceId}:tool`,\n });\n const trace: NormalizedTrace = {\n id: traceId,\n name: options.name,\n startedAt: options.startedAt.toISOString(),\n finishedAt: options.finishedAt.toISOString(),\n durationMs,\n ...(options.source ? { metadata: { source: options.source } } : {}),\n spans: [runSpan, ...toolSpans],\n };\n\n run.traces = [trace];\n return trace;\n}\n\nlet nextGeneratedTraceId = 0;\n\nfunction createGeneratedTraceId() {\n nextGeneratedTraceId += 1;\n return `trace_${nextGeneratedTraceId}`;\n}\n\n/**\n * Attaches a partial or complete harness run to an arbitrary thrown error.\n *\n * @param error - Thrown value to wrap.\n * @param run - Partial or complete normalized harness run to preserve.\n *\n * @example\n * ```ts\n * try {\n * return await runAgent(input);\n * } catch (error) {\n * throw attachHarnessRunToError(error, partialRun);\n * }\n * ```\n */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/**\n * Reads an attached harness run back off a previously wrapped error value.\n *\n * @param error - Unknown thrown value that may contain a harness run.\n *\n * @example\n * ```ts\n * const partialRun = getHarnessRunFromError(error);\n *\n * if (partialRun) {\n * console.log(toolCalls(partialRun.session));\n * }\n * ```\n */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAWO;AAkBP,IAAAA,eAWO;AAkPP,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,aAChB,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK;AAEvD;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAAkB,MAAuB;AACnE,MAAI,KAAK,IAAI,KAAK,GAAG;AACnB,WAAO;AAAA,EACT;AAEA,OAAK,IAAI,KAAK;AACd,QAAM,aAAa,MAAM,IAAI,CAAC,SAAS;AACrC,UAAMC,cAAa,oBAAoB,MAAM,IAAI;AACjD,WAAOA,gBAAe,SAAY,OAAOA;AAAA,EAC3C,CAAC;AACD,OAAK,OAAO,KAAK;AAEjB,SAAO;AACT;AAEA,SAAS,oBACP,OACA,MAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,MAAI,KAAK,IAAI,KAAK,GAAG;AACnB,WAAO;AAAA,EACT;AAEA,OAAK,IAAI,KAAK;AACd,MAAI;AACF,eAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,YAAM,QAAQ,oBAAoB,YAAY,IAAI;AAClD,UAAI,UAAU,QAAW;AACvB,mBAAW,GAAG,IAAI;AAAA,MACpB;AAAA,IACF;AAAA,EACF,UAAE;AACA,SAAK,OAAO,KAAK;AAAA,EACnB;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,SAAO,oBAAoB,OAAO,oBAAI,QAAQ,CAAC;AACjD;AAEA,SAAS,oBACP,OACA,MACuB;AACvB,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MACE,UAAU,QACV,OAAO,UAAU,YACjB,KAAK,IAAI,KAAe,GACxB;AACA,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,OAAO,IAAI;AAAA,EACvC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,OAAO,IAAI;AAAA,EACxC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,OAAO,oBAAI,QAAQ,CAAC;AACjD;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAuCO,SAAS,cAKd,SACqC;AACrC,QAAM,UAA+C;AAAA,IACnD,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,YAAY,oBAAI,KAAK;AAE3B,UAAI;AACF,cAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,UAC/B;AAAA,UACA,UAAU,QAAQ;AAAA,UAClB,QAAQ,QAAQ;AAAA,UAChB,WAAW,QAAQ;AAAA,UACnB,aAAa,QAAQ;AAAA,QACvB,CAAC;AACD,cAAM,MAAM,oBAAoB,OAAO,QAAQ,OAAO;AACtD,uBAAe,KAAK;AAAA,UAClB,MAAM,QAAQ;AAAA,UACd;AAAA,UACA,YAAY,oBAAI,KAAK;AAAA,QACvB,CAAC;AAED,eAAO;AAAA,MACT,SAAS,OAAO;AACd,cAAM,aAAa,uBAAuB,KAAK;AAC/C,YAAI,YAAY;AACd,cACE,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,WAAW,WACZ;AACA,uBAAW,YAAY,QAAQ;AAAA,UACjC;AACA,yBAAe,YAAY;AAAA,YACzB,MAAM,QAAQ;AAAA,YACd;AAAA,YACA,YAAY,oBAAI,KAAK;AAAA,UACvB,CAAC;AACD,gBAAM,wBAAwB,OAAO,UAAU;AAAA,QACjD;AAEA,cAAM,YAAY,uBAAuB,OAAO,OAAO;AAAA,UACrD,WAAW,QAAQ;AAAA,QACrB,CAAC;AACD,uBAAe,WAAW;AAAA,UACxB,MAAM,QAAQ;AAAA,UACd;AAAA,UACA,YAAY,oBAAI,KAAK;AAAA,QACvB,CAAC;AAED,cAAM,wBAAwB,OAAO,SAAS;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAoBO,SAAS,oBAKd,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMC,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AACA,QAAM,SAAS,sBAAsB,OAAO,MAAM;AAElD,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,GAAI,SAAS,EAAE,OAAO,IAAI,CAAC;AAAA,IAC3B,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AASO,SAAS,uBACd,OACA,OACA,UAAqD,CAAC,GAC1C;AACZ,QAAM,YAAY,QAAQ;AAE1B,SAAO;AAAA,IACL,SAAS;AAAA,MACP,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN,SAAS,iBAAiB,KAAK;AAAA,QACjC;AAAA,MACF;AAAA,IACF;AAAA,IACA,OAAO,CAAC;AAAA,IACR,GAAI,aAAa,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,EAAE,UAAU,IAAI,CAAC;AAAA,IACtE,QAAQ,CAAC,eAAe,KAAK,CAAC;AAAA,EAChC;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAEA,SAAS,sBACP,QAC+B;AAC/B,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,OAChB,IAAI,oBAAoB,EACxB,OAAO,CAAC,UAAoC,QAAQ,KAAK,CAAC;AAE7D,SAAO,WAAW,SAAS,IAAI,aAAa;AAC9C;AAEA,SAAS,qBAAqB,OAA6C;AACzE,MAAI,CAAC,aAAa,KAAK,GAAG;AACxB,WAAO;AAAA,EACT;AAEA,QAAM;AAAA,IACJ,UAAU;AAAA,IACV,OAAO;AAAA,IACP,GAAG;AAAA,EACL,IAAI;AACJ,QAAMC,UAAS,MAAM,QAAQ,QAAQ,IAAI,WAAW,CAAC,GAClD,IAAI,CAAC,SAAS,oBAAoB,IAAI,CAAC,EACvC,OAAO,CAAC,SAAiC,QAAQ,IAAI,CAAC;AACzD,QAAM,WAAW,aAAa,WAAW,IACrC,kBAAkB,WAAW,IAC7B;AAEJ,MAAIA,OAAM,WAAW,KAAK,CAAC,YAAY,MAAM,CAAC,YAAY,MAAM;AAC9D,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IAC/B,OAAAA;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,MAA2C;AACtE,MAAI,CAAC,aAAa,IAAI,KAAK,OAAO,KAAK,SAAS,YAAY,CAAC,KAAK,MAAM;AACtE,WAAO;AAAA,EACT;AAEA,QAAM;AAAA,IACJ,YAAY;AAAA,IACZ,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,GAAG;AAAA,EACL,IAAI;AACJ,QAAM,aAAa,gBACf,aAAa,aAAa,IACxB,kBAAkB,aAAa,IAC/B,SACF;AACJ,QAAM,QAAQ,mBAAmB,QAAQ;AACzC,QAAM,SAAS,0BAA0B,SAAS;AAElD,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,aACA,EAAE,WAAmD,IACrD,CAAC;AAAA,IACL,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,IACzB,GAAI,SAAS,EAAE,OAAO,IAAI,CAAC;AAAA,EAC7B;AACF;AAEA,SAAS,0BACP,QACmC;AACnC,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,OAChB,IAAI,wBAAwB,EAC5B,OAAO,CAAC,UAAwC,QAAQ,KAAK,CAAC;AAEjE,SAAO,WAAW,SAAS,IAAI,aAAa;AAC9C;AAEA,SAAS,yBACP,OACiC;AACjC,MAAI,CAAC,aAAa,KAAK,KAAK,OAAO,MAAM,SAAS,YAAY,CAAC,MAAM,MAAM;AACzE,WAAO;AAAA,EACT;AAEA,QAAM,EAAE,YAAY,eAAe,GAAG,YAAY,IAChD;AACF,QAAM,aAAa,gBACf,aAAa,aAAa,IACxB,kBAAkB,aAAa,IAC/B,SACF;AAEJ,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,aACA,EAAE,WAAmD,IACrD,CAAC;AAAA,EACP;AACF;AAGO,SAAS,mBACd,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,MAAI,iBAAiB,OAAO;AAC1B,UAAMC,WAAU;AAAA,MACd;AAAA,IACF;AAEA,WAAO;AAAA,MACL,GAAIA,YAAW,CAAC;AAAA,MAChB,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,MACE,SACA,OAAO,UAAU,YACjB,CAAC,MAAM,QAAQ,KAAK,KACpB,OAAQ,MAAgC,YAAY,UACpD;AACA,UAAM,aAAa,kBAAkB,KAAgC;AACrE,UAAM,EAAE,SAAAC,UAAS,MAAAC,OAAM,GAAGF,SAAQ,IAAI,cAAc,CAAC;AAErD,WAAO;AAAA,MACL,GAAGA;AAAA,MACH,SAASC;AAAA,MACT,GAAI,OAAOC,UAAS,WAAW,EAAE,MAAAA,MAAK,IAAI,CAAC;AAAA,IAC7C;AAAA,EACF;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAGO,SAAS,wBACd,YACsC;AACtC,SAAO,kBAAkB,UAAU;AACrC;AAGO,SAAS,2BACd,OACA,UAAiC,CAAC,GAClC;AACA,SAAO;AAAA,IACL,wBAAwB,OAAO,YAAY,QAAQ;AAAA,IACnD,wBAAwB,OAAO;AAAA,IAC/B,yBAAyB,OAAO;AAAA,IAChC,6BAA6B,OAAO;AAAA,IACpC,8BAA8B,OAAO;AAAA,IACrC,wCAAwC,OAAO;AAAA,EACjD;AACF;AAQO,SAAS,oBACd,OACA,UAAsC,CAAC,GACrB;AAClB,SAAO,MAAM,IAAI,CAAC,MAAM,UAAU;AAChC,UAAM,YAAY,KAAK,QAAQ,mBAAmB,KAAK,KAAK,IAAI;AAChE,UAAM,SAAS,QAAQ,eACnB,GAAG,QAAQ,YAAY,IAAI,QAAQ,CAAC,KACpC,KAAK;AAET,WAAO;AAAA,MACL,GAAI,SAAS,EAAE,IAAI,OAAO,IAAI,CAAC;AAAA,MAC/B,GAAI,QAAQ,UAAU,EAAE,SAAS,QAAQ,QAAQ,IAAI,CAAC;AAAA,MACtD,GAAI,QAAQ,WAAW,EAAE,UAAU,QAAQ,SAAS,IAAI,CAAC;AAAA,MACzD,MAAM,KAAK;AAAA,MACX,MAAM;AAAA,MACN,GAAI,KAAK,YAAY,EAAE,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,MACtD,GAAI,KAAK,aAAa,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,MACzD,GAAI,KAAK,eAAe,SAAY,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,MACvE,QAAQ,YAAY,UAAU;AAAA,MAC9B,GAAI,YAAY,EAAE,OAAO,UAAU,IAAI,CAAC;AAAA,MACxC,YAAY,wBAAwB;AAAA,QAClC,yBAAyB;AAAA,QACzB,oBAAoB,KAAK;AAAA,QACzB,oBAAoB;AAAA,QACpB,GAAI,KAAK,KAAK,EAAE,uBAAuB,KAAK,GAAG,IAAI,CAAC;AAAA,QACpD,GAAI,KAAK,cAAc,SACnB,EAAE,8BAA8B,KAAK,UAAU,IAC/C,CAAC;AAAA,QACL,GAAI,KAAK,WAAW,SAChB,EAAE,2BAA2B,KAAK,OAAO,IACzC,CAAC;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AACH;AAQO,SAAS,eACd,KACA,SAC6B;AAC7B,UAAI,mBAAM,GAAG,EAAE,SAAS,GAAG;AACzB,WAAO;AAAA,EACT;AAEA,QAAM,UAAU,QAAQ,MAAM,uBAAuB;AACrD,QAAM,aAAa,GAAG,OAAO;AAC7B,QAAM,aAAa,QAAQ,WAAW,QAAQ,IAAI,QAAQ,UAAU,QAAQ;AAC5E,QAAM,YACJ,IAAI,OAAO,SAAS,IAAI,mBAAmB,IAAI,OAAO,CAAC,CAAC,IAAI;AAC9D,QAAM,UAA0B;AAAA,IAC9B,IAAI;AAAA,IACJ;AAAA,IACA,MAAM,QAAQ;AAAA,IACd,MAAM;AAAA,IACN,WAAW,QAAQ,UAAU,YAAY;AAAA,IACzC,YAAY,QAAQ,WAAW,YAAY;AAAA,IAC3C;AAAA,IACA,QAAQ,YAAY,UAAU;AAAA,IAC9B,GAAI,YAAY,EAAE,OAAO,UAAU,IAAI,CAAC;AAAA,IACxC,YAAY,wBAAwB;AAAA,MAClC,yBAAyB,QAAQ,iBAAiB;AAAA,MAClD,wBAAwB,QAAQ;AAAA,MAChC,GAAG,2BAA2B,IAAI,KAAK;AAAA,IACzC,CAAC;AAAA,EACH;AACA,QAAM,YAAY,wBAAoB,uBAAU,IAAI,OAAO,GAAG;AAAA,IAC5D;AAAA,IACA,UAAU;AAAA,IACV,cAAc,GAAG,OAAO;AAAA,EAC1B,CAAC;AACD,QAAM,QAAyB;AAAA,IAC7B,IAAI;AAAA,IACJ,MAAM,QAAQ;AAAA,IACd,WAAW,QAAQ,UAAU,YAAY;AAAA,IACzC,YAAY,QAAQ,WAAW,YAAY;AAAA,IAC3C;AAAA,IACA,GAAI,QAAQ,SAAS,EAAE,UAAU,EAAE,QAAQ,QAAQ,OAAO,EAAE,IAAI,CAAC;AAAA,IACjE,OAAO,CAAC,SAAS,GAAG,SAAS;AAAA,EAC/B;AAEA,MAAI,SAAS,CAAC,KAAK;AACnB,SAAO;AACT;AAEA,IAAI,uBAAuB;AAE3B,SAAS,yBAAyB;AAChC,0BAAwB;AACxB,SAAO,SAAS,oBAAoB;AACtC;AAiBO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAgBO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["import_core","normalized","toolCalls","spans","details","message","type"]}
1
+ {"version":3,"sources":["../src/harness.ts"],"sourcesContent":["import {\n assistantMessages,\n failedSpans,\n latestAssistantMessageContent,\n messagesByRole,\n spans,\n spansByKind,\n systemMessages,\n toolCalls,\n toolMessages,\n userMessages,\n} from \"@vitest-evals/core\";\nimport type {\n GenAiOperationName,\n HarnessRun,\n HarnessRunError,\n JsonPrimitive,\n JsonValue,\n NormalizedMessage,\n NormalizedSession,\n NormalizedSpan,\n NormalizedSpanAttributes,\n NormalizedSpanEvent,\n NormalizedTrace,\n TimingSummary,\n ToolCallRecord,\n UsageSummary,\n} from \"@vitest-evals/core\";\n\nexport {\n assistantMessages,\n failedSpans,\n latestAssistantMessageContent,\n messagesByRole,\n spans,\n spansByKind,\n systemMessages,\n toolCalls,\n toolMessages,\n userMessages,\n} from \"@vitest-evals/core\";\nexport type {\n GenAiOperationName,\n GenAiOutputType,\n GenAiProviderName,\n GenAiSemanticAttributeKey,\n GenAiSemanticAttributes,\n GenAiTokenType,\n GenAiToolType,\n HarnessRun,\n HarnessRunError,\n JsonPrimitive,\n JsonValue,\n NormalizedMessage,\n NormalizedSession,\n NormalizedSpan,\n NormalizedSpanAttributeKey,\n NormalizedSpanAttributes,\n NormalizedSpanEvent,\n NormalizedTrace,\n OpenTelemetrySemanticAttributeKey,\n OpenTelemetrySemanticAttributes,\n TimingSummary,\n ToolCallRecord,\n UsageSummary,\n} from \"@vitest-evals/core\";\n\n/** Options for converting normalized tool calls into trace spans. */\nexport type CreateToolCallSpansOptions = {\n /** Trace id to attach to each generated tool span. */\n traceId?: string;\n /** Parent span id to attach to each generated tool span. */\n parentId?: string;\n /** Prefix used to create internal span ids instead of reusing tool-call ids. */\n spanIdPrefix?: string;\n};\n\n/** Options for attaching a fallback run trace to a harness result. */\nexport type EnsureRunTraceOptions = {\n /** Human-readable run or harness name. */\n name: string;\n /** Wall-clock start time for the harness run. */\n startedAt: Date;\n /** Wall-clock finish time for the harness run. */\n finishedAt: Date;\n /** Optional trace id. A generated id is used when omitted. */\n id?: string;\n /** GenAI operation name to place on the root run span. */\n operationName?: GenAiOperationName;\n /** Optional JSON-safe source marker for the trace metadata. */\n source?: string;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/** Generic JSON-like metadata record used by normalized artifacts and reports. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/**\n * Runtime context passed from the eval fixture into a harness run.\n *\n * @example\n * ```ts\n * const harness: Harness<string> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * context.setArtifact(\"inputLength\", input.length);\n *\n * return {\n * output: undefined,\n * session: { messages: [{ role: \"user\", content: input }] },\n * usage: {},\n * errors: [],\n * };\n * },\n * };\n * ```\n */\nexport type HarnessContext = {\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable JSON-safe artifact bag shared with the harness. */\n artifacts: Record<string, JsonValue>;\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/**\n * Adapter that executes the system under test and returns a normalized run.\n *\n * @example\n * ```ts\n * const harness: Harness<string, { status: \"approved\" | \"denied\" }> = {\n * name: \"refund-agent\",\n * async run(input, context) {\n * return normalizeHarnessRun(input, await runRefundFlow(input), context);\n * },\n * };\n * ```\n */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes the system under test and returns a normalized run. */\n run: (input: TInput, context: HarnessContext) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n /** Raw tool arguments accepted by `createHarness(...)` before normalization. */\n arguments?: unknown;\n /** Raw tool result accepted by `createHarness(...)` before normalization. */\n result?: unknown;\n /** Raw tool error accepted by `createHarness(...)` before normalization. */\n error?: unknown;\n /** Raw tool metadata accepted by `createHarness(...)` before normalization. */\n metadata?: Record<string, unknown>;\n};\n\n/** Lightweight span event accepted by `createHarness(...)` results. */\nexport type SimpleSpanEvent = Omit<NormalizedSpanEvent, \"attributes\"> & {\n /** Raw event attributes accepted by `createHarness(...)` before normalization. */\n attributes?: Record<string, unknown>;\n};\n\n/** Lightweight span record accepted by `createHarness(...)` results. */\nexport type SimpleSpanRecord = Omit<\n NormalizedSpan,\n \"attributes\" | \"error\" | \"events\"\n> & {\n /** Raw span attributes accepted by `createHarness(...)` before normalization. */\n attributes?: Record<string, unknown>;\n /** Raw span error accepted by `createHarness(...)` before normalization. */\n error?: unknown;\n /** Raw span events accepted by `createHarness(...)` before normalization. */\n events?: SimpleSpanEvent[];\n};\n\n/** Lightweight trace record accepted by `createHarness(...)` results. */\nexport type SimpleTraceRecord = Omit<NormalizedTrace, \"metadata\" | \"spans\"> & {\n /** Raw trace metadata accepted by `createHarness(...)` before normalization. */\n metadata?: Record<string, unknown>;\n /** Lightweight spans to normalize into the trace. */\n spans: SimpleSpanRecord[];\n};\n\n/**\n * Lightweight result shape normalized by `createHarness(...)`.\n *\n * @example\n * ```ts\n * const result: SimpleHarnessResult<{ status: \"approved\" }> = {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { totalTokens: 260 },\n * };\n * ```\n */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n /** Pre-normalized transcript messages. When omitted, a default user/assistant transcript is created. */\n messages?: NormalizedMessage[];\n /** Lightweight tool-call records to normalize into the session. */\n toolCalls?: SimpleToolCallRecord[];\n /** Usage summary to attach to the run. */\n usage?: UsageSummary;\n /** Timing summary to attach to the run. */\n timings?: TimingSummary;\n /** Raw artifact values to normalize and merge into the run. */\n artifacts?: Record<string, unknown>;\n /** Lightweight traces and spans to normalize into the run. */\n traces?: SimpleTraceRecord[];\n /** Raw session metadata to normalize into the session. */\n metadata?: Record<string, unknown>;\n /** Raw errors to normalize into the run. */\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput> = {\n /** Original input passed to `run(input)`. */\n input: TInput;\n /** Abort signal from Vitest when available. */\n signal?: AbortSignal;\n /** Mutable run artifact bag. */\n artifacts: HarnessContext[\"artifacts\"];\n /** Stores one JSON-safe artifact on the current run. */\n setArtifact: HarnessContext[\"setArtifact\"];\n};\n\n/**\n * Options for creating a lightweight custom application harness.\n *\n * @example\n * ```ts\n * const options: CreateHarnessOptions<string, { status: \"approved\" }> = {\n * name: \"refund-agent\",\n * run: async ({ input }) => ({\n * output: await classifyRefund(input),\n * }),\n * };\n * ```\n */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = {\n /** Stable harness name used in reports. */\n name: string;\n /** Executes application code and returns either a lightweight result or full `HarnessRun`. */\n run: (\n args: CreateHarnessRunArgs<TInput>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"boolean\" ||\n (typeof value === \"number\" && Number.isFinite(value))\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[], seen: WeakSet<object>) {\n if (seen.has(value)) {\n return undefined;\n }\n\n seen.add(value);\n const normalized = value.map((item) => {\n const normalized = toJsonValueInternal(item, seen);\n return normalized === undefined ? null : normalized;\n });\n seen.delete(value);\n\n return normalized;\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n seen: WeakSet<object>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n if (seen.has(value)) {\n return normalized;\n }\n\n seen.add(value);\n try {\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValueInternal(entryValue, seen);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n } finally {\n seen.delete(value);\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n return toJsonValueInternal(value, new WeakSet());\n}\n\nfunction toJsonValueInternal(\n value: unknown,\n seen: WeakSet<object>,\n): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (\n value !== null &&\n typeof value === \"object\" &&\n seen.has(value as object)\n ) {\n return undefined;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value, seen);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value, seen);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value, new WeakSet());\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/**\n * Creates a harness from the common \"run app code and return output\" shape.\n *\n * @param options - Harness name plus the callback that executes app code.\n *\n * @example\n * ```ts\n * import { createHarness } from \"vitest-evals\";\n *\n * export const refundHarness = createHarness<\n * string,\n * { status: \"approved\" | \"denied\" }\n * >({\n * name: \"refund-agent\",\n * run: async ({ input, setArtifact }) => {\n * const result = await runRefundFlow(input);\n * const output = { status: result.status };\n *\n * setArtifact(\"case\", { invoiceId: result.invoiceId });\n *\n * return {\n * output,\n * toolCalls: result.toolCalls,\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * };\n * },\n * });\n * ```\n */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(options: CreateHarnessOptions<TInput, TOutput>): Harness<TInput, TOutput>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(options: CreateHarnessOptions<TInput, TOutput>): Harness<TInput, TOutput> {\n const harness: Harness<TInput, TOutput> = {\n name: options.name,\n run: async (input, context) => {\n const startedAt = new Date();\n\n try {\n const result = await options.run({\n input,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n const run = normalizeHarnessRun(input, result, context);\n ensureRunTrace(run, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n\n return run;\n } catch (error) {\n const partialRun = getHarnessRunFromError(error);\n if (partialRun) {\n if (\n Object.keys(context.artifacts).length > 0 &&\n !partialRun.artifacts\n ) {\n partialRun.artifacts = context.artifacts;\n }\n ensureRunTrace(partialRun, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n throw attachHarnessRunToError(error, partialRun);\n }\n\n const failedRun = createFailedHarnessRun(input, error, {\n artifacts: context.artifacts,\n });\n ensureRunTrace(failedRun, {\n name: options.name,\n startedAt,\n finishedAt: new Date(),\n });\n\n throw attachHarnessRunToError(error, failedRun);\n }\n },\n };\n\n return harness;\n}\n\n/**\n * Normalizes a lightweight harness result into the reporter-facing run shape.\n *\n * @param input - Original input passed to the harness.\n * @param result - Lightweight result or pre-normalized harness run.\n * @param context - Optional per-run context used to merge artifacts.\n *\n * @example\n * ```ts\n * const run = normalizeHarnessRun(\"Refund invoice inv_123\", {\n * output: { status: \"approved\" },\n * toolCalls: [{ name: \"lookupInvoice\", arguments: { invoiceId: \"inv_123\" } }],\n * usage: { provider: \"openai\", model: \"gpt-4o-mini\" },\n * });\n *\n * expect(toolCalls(run.session)).toHaveLength(1);\n * ```\n */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n const traces = normalizeSimpleTraces(result.traces);\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n ...(traces ? { traces } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\n/**\n * Builds a JSON-safe failed run for errors that happen before a harness can return.\n *\n * @param input - Original input passed to the harness.\n * @param error - Error thrown by setup or execution.\n * @param options - Optional artifacts to preserve on the failed run.\n */\nexport function createFailedHarnessRun(\n input: unknown,\n error: unknown,\n options: { artifacts?: Record<string, JsonValue> } = {},\n): HarnessRun {\n const artifacts = options.artifacts;\n\n return {\n session: {\n messages: [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ],\n },\n usage: {},\n ...(artifacts && Object.keys(artifacts).length > 0 ? { artifacts } : {}),\n errors: [serializeError(error)],\n };\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\nfunction normalizeSimpleTraces(\n traces: SimpleTraceRecord[] | undefined,\n): NormalizedTrace[] | undefined {\n if (!Array.isArray(traces)) {\n return undefined;\n }\n\n const normalized = traces\n .map(normalizeSimpleTrace)\n .filter((trace): trace is NormalizedTrace => Boolean(trace));\n\n return normalized.length > 0 ? normalized : undefined;\n}\n\nfunction normalizeSimpleTrace(trace: unknown): NormalizedTrace | undefined {\n if (!isJsonRecord(trace)) {\n return undefined;\n }\n\n const {\n metadata: rawMetadata,\n spans: rawSpans,\n ...traceFields\n } = trace as Partial<SimpleTraceRecord>;\n const spans = (Array.isArray(rawSpans) ? rawSpans : [])\n .map((span) => normalizeSimpleSpan(span))\n .filter((span): span is NormalizedSpan => Boolean(span));\n const metadata = isJsonRecord(rawMetadata)\n ? normalizeMetadata(rawMetadata)\n : undefined;\n\n if (spans.length === 0 && !traceFields.id && !traceFields.name) {\n return undefined;\n }\n\n return {\n ...traceFields,\n ...(metadata ? { metadata } : {}),\n spans,\n };\n}\n\nfunction normalizeSimpleSpan(span: unknown): NormalizedSpan | undefined {\n if (!isJsonRecord(span) || typeof span.name !== \"string\" || !span.name) {\n return undefined;\n }\n\n const {\n attributes: rawAttributes,\n error: rawError,\n events: rawEvents,\n ...spanFields\n } = span as Partial<SimpleSpanRecord> & { name: string };\n const attributes = rawAttributes\n ? isJsonRecord(rawAttributes)\n ? normalizeMetadata(rawAttributes)\n : undefined\n : undefined;\n const error = normalizeSpanError(rawError);\n const events = normalizeSimpleSpanEvents(rawEvents);\n\n return {\n ...spanFields,\n ...(attributes\n ? { attributes: attributes as NormalizedSpanAttributes }\n : {}),\n ...(error ? { error } : {}),\n ...(events ? { events } : {}),\n };\n}\n\nfunction normalizeSimpleSpanEvents(\n events: unknown,\n): NormalizedSpanEvent[] | undefined {\n if (!Array.isArray(events)) {\n return undefined;\n }\n\n const normalized = events\n .map(normalizeSimpleSpanEvent)\n .filter((event): event is NormalizedSpanEvent => Boolean(event));\n\n return normalized.length > 0 ? normalized : undefined;\n}\n\nfunction normalizeSimpleSpanEvent(\n event: unknown,\n): NormalizedSpanEvent | undefined {\n if (!isJsonRecord(event) || typeof event.name !== \"string\" || !event.name) {\n return undefined;\n }\n\n const { attributes: rawAttributes, ...eventFields } =\n event as Partial<SimpleSpanEvent> & { name: string };\n const attributes = rawAttributes\n ? isJsonRecord(rawAttributes)\n ? normalizeMetadata(rawAttributes)\n : undefined\n : undefined;\n\n return {\n ...eventFields,\n ...(attributes\n ? { attributes: attributes as NormalizedSpanAttributes }\n : {}),\n };\n}\n\n/** Normalizes arbitrary span errors while preserving object-shaped messages. */\nexport function normalizeSpanError(\n error: unknown,\n): NormalizedSpan[\"error\"] | undefined {\n if (error === undefined) {\n return undefined;\n }\n\n if (error instanceof Error) {\n const details = normalizeMetadata(\n error as unknown as Record<string, unknown>,\n );\n\n return {\n ...(details ?? {}),\n type: error.name,\n message: error.message,\n };\n }\n\n if (\n error &&\n typeof error === \"object\" &&\n !Array.isArray(error) &&\n typeof (error as { message?: unknown }).message === \"string\"\n ) {\n const normalized = normalizeMetadata(error as Record<string, unknown>);\n const { message, type, ...details } = normalized ?? {};\n\n return {\n ...details,\n message: message as string,\n ...(typeof type === \"string\" ? { type } : {}),\n };\n }\n\n const serialized = serializeError(error);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\n/** Normalizes raw span attributes into the JSON-safe span attribute shape. */\nexport function normalizeSpanAttributes(\n attributes: Record<string, unknown>,\n): NormalizedSpanAttributes | undefined {\n return normalizeMetadata(attributes) as NormalizedSpanAttributes | undefined;\n}\n\n/** Builds common OpenTelemetry GenAI usage attributes from a usage summary. */\nexport function createGenAiUsageAttributes(\n usage: UsageSummary | undefined,\n options: { provider?: string } = {},\n) {\n return {\n \"gen_ai.provider.name\": usage?.provider ?? options.provider,\n \"gen_ai.request.model\": usage?.model,\n \"gen_ai.response.model\": usage?.model,\n \"gen_ai.usage.input_tokens\": usage?.inputTokens,\n \"gen_ai.usage.output_tokens\": usage?.outputTokens,\n \"gen_ai.usage.reasoning.output_tokens\": usage?.reasoningTokens,\n } satisfies Record<string, unknown>;\n}\n\n/**\n * Converts normalized tool-call records into trace spans.\n *\n * Tool-call ids are preserved as GenAI attributes. Pass `spanIdPrefix` when the\n * spans belong to a known trace so span ids stay internally unique.\n */\nexport function createToolCallSpans(\n calls: ToolCallRecord[],\n options: CreateToolCallSpansOptions = {},\n): NormalizedSpan[] {\n return calls.map((call, index) => {\n const spanError = call.error ? normalizeSpanError(call.error) : undefined;\n const spanId = options.spanIdPrefix\n ? `${options.spanIdPrefix}:${index + 1}`\n : call.id;\n\n return {\n ...(spanId ? { id: spanId } : {}),\n ...(options.traceId ? { traceId: options.traceId } : {}),\n ...(options.parentId ? { parentId: options.parentId } : {}),\n name: call.name,\n kind: \"tool\",\n ...(call.startedAt ? { startedAt: call.startedAt } : {}),\n ...(call.finishedAt ? { finishedAt: call.finishedAt } : {}),\n ...(call.durationMs !== undefined ? { durationMs: call.durationMs } : {}),\n status: spanError ? \"error\" : \"ok\",\n ...(spanError ? { error: spanError } : {}),\n attributes: normalizeSpanAttributes({\n \"gen_ai.operation.name\": \"execute_tool\",\n \"gen_ai.tool.name\": call.name,\n \"gen_ai.tool.type\": \"function\",\n ...(call.id ? { \"gen_ai.tool.call.id\": call.id } : {}),\n ...(call.arguments !== undefined\n ? { \"gen_ai.tool.call.arguments\": call.arguments }\n : {}),\n ...(call.result !== undefined\n ? { \"gen_ai.tool.call.result\": call.result }\n : {}),\n }),\n } satisfies NormalizedSpan;\n });\n}\n\n/**\n * Attaches a fallback run trace when a harness result does not already contain spans.\n *\n * This keeps custom harnesses inspectable while first-party harness packages\n * remain free to attach richer native traces.\n */\nexport function ensureRunTrace(\n run: HarnessRun,\n options: EnsureRunTraceOptions,\n): NormalizedTrace | undefined {\n if (spans(run).length > 0) {\n return undefined;\n }\n\n const traceId = options.id ?? createGeneratedTraceId();\n const rootSpanId = `${traceId}:run`;\n const durationMs = options.finishedAt.getTime() - options.startedAt.getTime();\n const rootError =\n run.errors.length > 0 ? normalizeSpanError(run.errors[0]) : undefined;\n const runSpan: NormalizedSpan = {\n id: rootSpanId,\n traceId,\n name: options.name,\n kind: \"run\",\n startedAt: options.startedAt.toISOString(),\n finishedAt: options.finishedAt.toISOString(),\n durationMs,\n status: rootError ? \"error\" : \"ok\",\n ...(rootError ? { error: rootError } : {}),\n attributes: normalizeSpanAttributes({\n \"gen_ai.operation.name\": options.operationName ?? \"invoke_workflow\",\n \"gen_ai.workflow.name\": options.name,\n ...createGenAiUsageAttributes(run.usage),\n }),\n };\n const toolSpans = createToolCallSpans(toolCalls(run.session), {\n traceId,\n parentId: rootSpanId,\n spanIdPrefix: `${traceId}:tool`,\n });\n const trace: NormalizedTrace = {\n id: traceId,\n name: options.name,\n startedAt: options.startedAt.toISOString(),\n finishedAt: options.finishedAt.toISOString(),\n durationMs,\n ...(options.source ? { metadata: { source: options.source } } : {}),\n spans: [runSpan, ...toolSpans],\n };\n\n run.traces = [trace];\n return trace;\n}\n\nlet nextGeneratedTraceId = 0;\n\nfunction createGeneratedTraceId() {\n nextGeneratedTraceId += 1;\n return `trace_${nextGeneratedTraceId}`;\n}\n\n/**\n * Attaches a partial or complete harness run to an arbitrary thrown error.\n *\n * @param error - Thrown value to wrap.\n * @param run - Partial or complete normalized harness run to preserve.\n *\n * @example\n * ```ts\n * try {\n * return await runAgent(input);\n * } catch (error) {\n * throw attachHarnessRunToError(error, partialRun);\n * }\n * ```\n */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/**\n * Reads an attached harness run back off a previously wrapped error value.\n *\n * @param error - Unknown thrown value that may contain a harness run.\n *\n * @example\n * ```ts\n * const partialRun = getHarnessRunFromError(error);\n *\n * if (partialRun) {\n * console.log(toolCalls(partialRun.session));\n * }\n * ```\n */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAWO;AAkBP,IAAAA,eAWO;AAuOP,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,aAChB,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK;AAEvD;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAAkB,MAAuB;AACnE,MAAI,KAAK,IAAI,KAAK,GAAG;AACnB,WAAO;AAAA,EACT;AAEA,OAAK,IAAI,KAAK;AACd,QAAM,aAAa,MAAM,IAAI,CAAC,SAAS;AACrC,UAAMC,cAAa,oBAAoB,MAAM,IAAI;AACjD,WAAOA,gBAAe,SAAY,OAAOA;AAAA,EAC3C,CAAC;AACD,OAAK,OAAO,KAAK;AAEjB,SAAO;AACT;AAEA,SAAS,oBACP,OACA,MAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,MAAI,KAAK,IAAI,KAAK,GAAG;AACnB,WAAO;AAAA,EACT;AAEA,OAAK,IAAI,KAAK;AACd,MAAI;AACF,eAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,YAAM,QAAQ,oBAAoB,YAAY,IAAI;AAClD,UAAI,UAAU,QAAW;AACvB,mBAAW,GAAG,IAAI;AAAA,MACpB;AAAA,IACF;AAAA,EACF,UAAE;AACA,SAAK,OAAO,KAAK;AAAA,EACnB;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,SAAO,oBAAoB,OAAO,oBAAI,QAAQ,CAAC;AACjD;AAEA,SAAS,oBACP,OACA,MACuB;AACvB,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MACE,UAAU,QACV,OAAO,UAAU,YACjB,KAAK,IAAI,KAAe,GACxB;AACA,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,OAAO,IAAI;AAAA,EACvC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,OAAO,IAAI;AAAA,EACxC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,OAAO,oBAAI,QAAQ,CAAC;AACjD;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAmCO,SAAS,cAGd,SAA0E;AAC1E,QAAM,UAAoC;AAAA,IACxC,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,YAAY,oBAAI,KAAK;AAE3B,UAAI;AACF,cAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,UAC/B;AAAA,UACA,QAAQ,QAAQ;AAAA,UAChB,WAAW,QAAQ;AAAA,UACnB,aAAa,QAAQ;AAAA,QACvB,CAAC;AACD,cAAM,MAAM,oBAAoB,OAAO,QAAQ,OAAO;AACtD,uBAAe,KAAK;AAAA,UAClB,MAAM,QAAQ;AAAA,UACd;AAAA,UACA,YAAY,oBAAI,KAAK;AAAA,QACvB,CAAC;AAED,eAAO;AAAA,MACT,SAAS,OAAO;AACd,cAAM,aAAa,uBAAuB,KAAK;AAC/C,YAAI,YAAY;AACd,cACE,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,WAAW,WACZ;AACA,uBAAW,YAAY,QAAQ;AAAA,UACjC;AACA,yBAAe,YAAY;AAAA,YACzB,MAAM,QAAQ;AAAA,YACd;AAAA,YACA,YAAY,oBAAI,KAAK;AAAA,UACvB,CAAC;AACD,gBAAM,wBAAwB,OAAO,UAAU;AAAA,QACjD;AAEA,cAAM,YAAY,uBAAuB,OAAO,OAAO;AAAA,UACrD,WAAW,QAAQ;AAAA,QACrB,CAAC;AACD,uBAAe,WAAW;AAAA,UACxB,MAAM,QAAQ;AAAA,UACd;AAAA,UACA,YAAY,oBAAI,KAAK;AAAA,QACvB,CAAC;AAED,cAAM,wBAAwB,OAAO,SAAS;AAAA,MAChD;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAoBO,SAAS,oBAId,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMC,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AACA,QAAM,SAAS,sBAAsB,OAAO,MAAM;AAElD,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,GAAI,SAAS,EAAE,OAAO,IAAI,CAAC;AAAA,IAC3B,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AASO,SAAS,uBACd,OACA,OACA,UAAqD,CAAC,GAC1C;AACZ,QAAM,YAAY,QAAQ;AAE1B,SAAO;AAAA,IACL,SAAS;AAAA,MACP,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN,SAAS,iBAAiB,KAAK;AAAA,QACjC;AAAA,MACF;AAAA,IACF;AAAA,IACA,OAAO,CAAC;AAAA,IACR,GAAI,aAAa,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,EAAE,UAAU,IAAI,CAAC;AAAA,IACtE,QAAQ,CAAC,eAAe,KAAK,CAAC;AAAA,EAChC;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAEA,SAAS,sBACP,QAC+B;AAC/B,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,OAChB,IAAI,oBAAoB,EACxB,OAAO,CAAC,UAAoC,QAAQ,KAAK,CAAC;AAE7D,SAAO,WAAW,SAAS,IAAI,aAAa;AAC9C;AAEA,SAAS,qBAAqB,OAA6C;AACzE,MAAI,CAAC,aAAa,KAAK,GAAG;AACxB,WAAO;AAAA,EACT;AAEA,QAAM;AAAA,IACJ,UAAU;AAAA,IACV,OAAO;AAAA,IACP,GAAG;AAAA,EACL,IAAI;AACJ,QAAMC,UAAS,MAAM,QAAQ,QAAQ,IAAI,WAAW,CAAC,GAClD,IAAI,CAAC,SAAS,oBAAoB,IAAI,CAAC,EACvC,OAAO,CAAC,SAAiC,QAAQ,IAAI,CAAC;AACzD,QAAM,WAAW,aAAa,WAAW,IACrC,kBAAkB,WAAW,IAC7B;AAEJ,MAAIA,OAAM,WAAW,KAAK,CAAC,YAAY,MAAM,CAAC,YAAY,MAAM;AAC9D,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IAC/B,OAAAA;AAAA,EACF;AACF;AAEA,SAAS,oBAAoB,MAA2C;AACtE,MAAI,CAAC,aAAa,IAAI,KAAK,OAAO,KAAK,SAAS,YAAY,CAAC,KAAK,MAAM;AACtE,WAAO;AAAA,EACT;AAEA,QAAM;AAAA,IACJ,YAAY;AAAA,IACZ,OAAO;AAAA,IACP,QAAQ;AAAA,IACR,GAAG;AAAA,EACL,IAAI;AACJ,QAAM,aAAa,gBACf,aAAa,aAAa,IACxB,kBAAkB,aAAa,IAC/B,SACF;AACJ,QAAM,QAAQ,mBAAmB,QAAQ;AACzC,QAAM,SAAS,0BAA0B,SAAS;AAElD,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,aACA,EAAE,WAAmD,IACrD,CAAC;AAAA,IACL,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,IACzB,GAAI,SAAS,EAAE,OAAO,IAAI,CAAC;AAAA,EAC7B;AACF;AAEA,SAAS,0BACP,QACmC;AACnC,MAAI,CAAC,MAAM,QAAQ,MAAM,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,OAChB,IAAI,wBAAwB,EAC5B,OAAO,CAAC,UAAwC,QAAQ,KAAK,CAAC;AAEjE,SAAO,WAAW,SAAS,IAAI,aAAa;AAC9C;AAEA,SAAS,yBACP,OACiC;AACjC,MAAI,CAAC,aAAa,KAAK,KAAK,OAAO,MAAM,SAAS,YAAY,CAAC,MAAM,MAAM;AACzE,WAAO;AAAA,EACT;AAEA,QAAM,EAAE,YAAY,eAAe,GAAG,YAAY,IAChD;AACF,QAAM,aAAa,gBACf,aAAa,aAAa,IACxB,kBAAkB,aAAa,IAC/B,SACF;AAEJ,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAI,aACA,EAAE,WAAmD,IACrD,CAAC;AAAA,EACP;AACF;AAGO,SAAS,mBACd,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,MAAI,iBAAiB,OAAO;AAC1B,UAAMC,WAAU;AAAA,MACd;AAAA,IACF;AAEA,WAAO;AAAA,MACL,GAAIA,YAAW,CAAC;AAAA,MAChB,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,MACE,SACA,OAAO,UAAU,YACjB,CAAC,MAAM,QAAQ,KAAK,KACpB,OAAQ,MAAgC,YAAY,UACpD;AACA,UAAM,aAAa,kBAAkB,KAAgC;AACrE,UAAM,EAAE,SAAAC,UAAS,MAAAC,OAAM,GAAGF,SAAQ,IAAI,cAAc,CAAC;AAErD,WAAO;AAAA,MACL,GAAGA;AAAA,MACH,SAASC;AAAA,MACT,GAAI,OAAOC,UAAS,WAAW,EAAE,MAAAA,MAAK,IAAI,CAAC;AAAA,IAC7C;AAAA,EACF;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAGO,SAAS,wBACd,YACsC;AACtC,SAAO,kBAAkB,UAAU;AACrC;AAGO,SAAS,2BACd,OACA,UAAiC,CAAC,GAClC;AACA,SAAO;AAAA,IACL,wBAAwB,OAAO,YAAY,QAAQ;AAAA,IACnD,wBAAwB,OAAO;AAAA,IAC/B,yBAAyB,OAAO;AAAA,IAChC,6BAA6B,OAAO;AAAA,IACpC,8BAA8B,OAAO;AAAA,IACrC,wCAAwC,OAAO;AAAA,EACjD;AACF;AAQO,SAAS,oBACd,OACA,UAAsC,CAAC,GACrB;AAClB,SAAO,MAAM,IAAI,CAAC,MAAM,UAAU;AAChC,UAAM,YAAY,KAAK,QAAQ,mBAAmB,KAAK,KAAK,IAAI;AAChE,UAAM,SAAS,QAAQ,eACnB,GAAG,QAAQ,YAAY,IAAI,QAAQ,CAAC,KACpC,KAAK;AAET,WAAO;AAAA,MACL,GAAI,SAAS,EAAE,IAAI,OAAO,IAAI,CAAC;AAAA,MAC/B,GAAI,QAAQ,UAAU,EAAE,SAAS,QAAQ,QAAQ,IAAI,CAAC;AAAA,MACtD,GAAI,QAAQ,WAAW,EAAE,UAAU,QAAQ,SAAS,IAAI,CAAC;AAAA,MACzD,MAAM,KAAK;AAAA,MACX,MAAM;AAAA,MACN,GAAI,KAAK,YAAY,EAAE,WAAW,KAAK,UAAU,IAAI,CAAC;AAAA,MACtD,GAAI,KAAK,aAAa,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,MACzD,GAAI,KAAK,eAAe,SAAY,EAAE,YAAY,KAAK,WAAW,IAAI,CAAC;AAAA,MACvE,QAAQ,YAAY,UAAU;AAAA,MAC9B,GAAI,YAAY,EAAE,OAAO,UAAU,IAAI,CAAC;AAAA,MACxC,YAAY,wBAAwB;AAAA,QAClC,yBAAyB;AAAA,QACzB,oBAAoB,KAAK;AAAA,QACzB,oBAAoB;AAAA,QACpB,GAAI,KAAK,KAAK,EAAE,uBAAuB,KAAK,GAAG,IAAI,CAAC;AAAA,QACpD,GAAI,KAAK,cAAc,SACnB,EAAE,8BAA8B,KAAK,UAAU,IAC/C,CAAC;AAAA,QACL,GAAI,KAAK,WAAW,SAChB,EAAE,2BAA2B,KAAK,OAAO,IACzC,CAAC;AAAA,MACP,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AACH;AAQO,SAAS,eACd,KACA,SAC6B;AAC7B,UAAI,mBAAM,GAAG,EAAE,SAAS,GAAG;AACzB,WAAO;AAAA,EACT;AAEA,QAAM,UAAU,QAAQ,MAAM,uBAAuB;AACrD,QAAM,aAAa,GAAG,OAAO;AAC7B,QAAM,aAAa,QAAQ,WAAW,QAAQ,IAAI,QAAQ,UAAU,QAAQ;AAC5E,QAAM,YACJ,IAAI,OAAO,SAAS,IAAI,mBAAmB,IAAI,OAAO,CAAC,CAAC,IAAI;AAC9D,QAAM,UAA0B;AAAA,IAC9B,IAAI;AAAA,IACJ;AAAA,IACA,MAAM,QAAQ;AAAA,IACd,MAAM;AAAA,IACN,WAAW,QAAQ,UAAU,YAAY;AAAA,IACzC,YAAY,QAAQ,WAAW,YAAY;AAAA,IAC3C;AAAA,IACA,QAAQ,YAAY,UAAU;AAAA,IAC9B,GAAI,YAAY,EAAE,OAAO,UAAU,IAAI,CAAC;AAAA,IACxC,YAAY,wBAAwB;AAAA,MAClC,yBAAyB,QAAQ,iBAAiB;AAAA,MAClD,wBAAwB,QAAQ;AAAA,MAChC,GAAG,2BAA2B,IAAI,KAAK;AAAA,IACzC,CAAC;AAAA,EACH;AACA,QAAM,YAAY,wBAAoB,uBAAU,IAAI,OAAO,GAAG;AAAA,IAC5D;AAAA,IACA,UAAU;AAAA,IACV,cAAc,GAAG,OAAO;AAAA,EAC1B,CAAC;AACD,QAAM,QAAyB;AAAA,IAC7B,IAAI;AAAA,IACJ,MAAM,QAAQ;AAAA,IACd,WAAW,QAAQ,UAAU,YAAY;AAAA,IACzC,YAAY,QAAQ,WAAW,YAAY;AAAA,IAC3C;AAAA,IACA,GAAI,QAAQ,SAAS,EAAE,UAAU,EAAE,QAAQ,QAAQ,OAAO,EAAE,IAAI,CAAC;AAAA,IACjE,OAAO,CAAC,SAAS,GAAG,SAAS;AAAA,EAC/B;AAEA,MAAI,SAAS,CAAC,KAAK;AACnB,SAAO;AACT;AAEA,IAAI,uBAAuB;AAE3B,SAAS,yBAAyB;AAChC,0BAAwB;AACxB,SAAO,SAAS,oBAAoB;AACtC;AAiBO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAgBO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["import_core","normalized","toolCalls","spans","details","message","type"]}
package/dist/harness.mjs CHANGED
@@ -91,7 +91,6 @@ function createHarness(options) {
91
91
  try {
92
92
  const result = await options.run({
93
93
  input,
94
- metadata: context.metadata,
95
94
  signal: context.signal,
96
95
  artifacts: context.artifacts,
97
96
  setArtifact: context.setArtifact