vitest-evals 0.9.0-beta.4 → 0.9.0-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,11 +18,9 @@ npm install -D @vitest-evals/harness-ai-sdk
18
18
  npm install -D @vitest-evals/harness-openai-agents
19
19
  ```
20
20
 
21
- For GitHub Actions summaries and annotations, install the JSON post-processor:
22
-
23
- ```sh
24
- npm install -D @vitest-evals/github-reporter
25
- ```
21
+ For GitHub Actions summaries and annotations, emit Vitest JSON and use the
22
+ native `getsentry/vitest-evals` action. No extra npm package is needed in the
23
+ workflow.
26
24
 
27
25
  ## Core Model
28
26
 
@@ -159,13 +157,18 @@ vitest run evals \
159
157
  --reporter=vitest-evals/reporter \
160
158
  --reporter=json \
161
159
  --outputFile.json=vitest-results.json
160
+ ```
162
161
 
163
- vitest-evals-github-report
162
+ ```yaml
163
+ - uses: getsentry/vitest-evals@v0
164
+ if: always()
165
+ with:
166
+ results: vitest-results.json
164
167
  ```
165
168
 
166
- The GitHub reporter writes a job summary when `GITHUB_STEP_SUMMARY` is present,
167
- emits short failure annotations in Actions, and can publish a separate Check Run
168
- with `--check-run` when `checks: write` permission is configured.
169
+ The GitHub reporter action writes a job summary, emits short failure
170
+ annotations, can publish a separate Check Run, and can reduce sharded eval JSON
171
+ artifacts into one combined report.
169
172
 
170
173
  ## Existing Agents
171
174
 
@@ -35,7 +35,6 @@ type UsageSummary = {
35
35
  outputTokens?: number;
36
36
  reasoningTokens?: number;
37
37
  totalTokens?: number;
38
- estimatedCost?: number;
39
38
  toolCalls?: number;
40
39
  retries?: number;
41
40
  metadata?: Record<string, JsonValue>;
package/dist/harness.d.ts CHANGED
@@ -35,7 +35,6 @@ type UsageSummary = {
35
35
  outputTokens?: number;
36
36
  reasoningTokens?: number;
37
37
  totalTokens?: number;
38
- estimatedCost?: number;
39
38
  toolCalls?: number;
40
39
  retries?: number;
41
40
  metadata?: Record<string, JsonValue>;
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/harness.ts"],"sourcesContent":["/** Primitive scalar values allowed in normalized JSON-safe eval data. */\nexport type JsonPrimitive = string | number | boolean | null;\n\n/** JSON-safe value shape used by normalized sessions, artifacts, and errors. */\nexport type JsonValue =\n | JsonPrimitive\n | JsonValue[]\n | { [key: string]: JsonValue };\n\n/** Normalized record for one tool call observed during a harness run. */\nexport type ToolCallRecord = {\n id?: string;\n name: string;\n arguments?: Record<string, JsonValue>;\n result?: JsonValue;\n error?: {\n message: string;\n type?: string;\n [key: string]: JsonValue | undefined;\n };\n startedAt?: string;\n finishedAt?: string;\n durationMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Normalized message recorded in a harness session transcript. */\nexport type NormalizedMessage = {\n role: \"system\" | \"user\" | \"assistant\" | \"tool\";\n content?: JsonValue;\n toolCalls?: ToolCallRecord[];\n metadata?: Record<string, JsonValue>;\n};\n\n/** Provider usage summary attached to a normalized harness run. */\nexport type UsageSummary = {\n provider?: string;\n model?: string;\n inputTokens?: number;\n outputTokens?: number;\n reasoningTokens?: number;\n totalTokens?: number;\n estimatedCost?: number;\n toolCalls?: number;\n retries?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Timing summary attached to a normalized harness run. */\nexport type TimingSummary = {\n totalMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** JSON-serializable transcript produced by the system under test. */\nexport type NormalizedSession = {\n messages: NormalizedMessage[];\n provider?: string;\n model?: string;\n metadata?: Record<string, JsonValue>;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/** Normalized result returned by every harness execution. */\nexport type HarnessRun<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n session: NormalizedSession;\n usage: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, JsonValue>;\n errors: Array<Record<string, JsonValue>>;\n};\n\n/** Error value with an attached partial or complete normalized harness run. */\nexport type HarnessRunError = Error & {\n vitestEvalsRun: HarnessRun;\n};\n\n/** Per-run metadata shape accepted by harnesses and eval tests. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/** Runtime context passed from the eval fixture into a harness run. */\nexport type HarnessContext<\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: Record<string, JsonValue>;\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/** Adapter that executes the system under test and returns a normalized run. */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n input: TInput,\n context: HarnessContext<TMetadata>,\n ) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n arguments?: unknown;\n result?: unknown;\n error?: unknown;\n metadata?: Record<string, unknown>;\n};\n\n/** Lightweight result shape normalized by `createHarness(...)`. */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n messages?: NormalizedMessage[];\n toolCalls?: SimpleToolCallRecord[];\n usage?: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, unknown>;\n metadata?: Record<string, unknown>;\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {\n input: TInput;\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: HarnessContext<TMetadata>[\"artifacts\"];\n setArtifact: HarnessContext<TMetadata>[\"setArtifact\"];\n};\n\n/** Options for creating a lightweight custom application harness. */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n args: CreateHarnessRunArgs<TInput, TMetadata>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"number\" ||\n typeof value === \"boolean\"\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[]): JsonValue[] {\n return value.map((item) => {\n const normalized = toJsonValue(item);\n return normalized === undefined ? null : normalized;\n });\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValue(entryValue);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value);\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/** Creates a harness from the common \"run app code and return output\" shape. */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata> {\n const harness: Harness<TInput, TOutput, TMetadata> = {\n name: options.name,\n run: async (input, context) => {\n const result = await options.run({\n input,\n metadata: context.metadata,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n\n return normalizeHarnessRun(input, result, context);\n },\n };\n\n return harness;\n}\n\n/** Normalizes a lightweight harness result into the reporter-facing run shape. */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext<TMetadata>,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\n/** Flattens every recorded tool call from a normalized session. */\nexport function toolCalls(session: NormalizedSession): ToolCallRecord[] {\n return session.messages.flatMap((message) => message.toolCalls ?? []);\n}\n\n/** Filters normalized session messages by role. */\nexport function messagesByRole(\n session: NormalizedSession,\n role: NormalizedMessage[\"role\"],\n): NormalizedMessage[] {\n return session.messages.filter((message) => message.role === role);\n}\n\n/** Returns every normalized system message from a session. */\nexport function systemMessages(session: NormalizedSession) {\n return messagesByRole(session, \"system\");\n}\n\n/** Returns every normalized user message from a session. */\nexport function userMessages(session: NormalizedSession) {\n return messagesByRole(session, \"user\");\n}\n\n/** Returns every normalized assistant message from a session. */\nexport function assistantMessages(session: NormalizedSession) {\n return messagesByRole(session, \"assistant\");\n}\n\n/** Returns every normalized tool message from a session. */\nexport function toolMessages(session: NormalizedSession) {\n return messagesByRole(session, \"tool\");\n}\n\n/** Attaches a partial or complete harness run to an arbitrary thrown error. */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/** Reads an attached harness run back off a previously wrapped error value. */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAgKA,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,YACjB,OAAO,UAAU;AAErB;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAA+B;AACzD,SAAO,MAAM,IAAI,CAAC,SAAS;AACzB,UAAM,aAAa,YAAY,IAAI;AACnC,WAAO,eAAe,SAAY,OAAO;AAAA,EAC3C,CAAC;AACH;AAEA,SAAS,oBACP,OAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,aAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,UAAM,QAAQ,YAAY,UAAU;AACpC,QAAI,UAAU,QAAW;AACvB,iBAAW,GAAG,IAAI;AAAA,IACpB;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,KAAK;AAAA,EACjC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,KAAK;AAAA,EAClC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,KAAK;AAClC;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAUO,SAAS,cAKd,SACqC;AACrC,QAAM,UAA+C;AAAA,IACnD,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,QAC/B;AAAA,QACA,UAAU,QAAQ;AAAA,QAClB,QAAQ,QAAQ;AAAA,QAChB,WAAW,QAAQ;AAAA,QACnB,aAAa,QAAQ;AAAA,MACvB,CAAC;AAED,aAAO,oBAAoB,OAAO,QAAQ,OAAO;AAAA,IACnD;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,oBAKd,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMA,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAGO,SAAS,UAAU,SAA8C;AACtE,SAAO,QAAQ,SAAS,QAAQ,CAAC,YAAY,QAAQ,aAAa,CAAC,CAAC;AACtE;AAGO,SAAS,eACd,SACA,MACqB;AACrB,SAAO,QAAQ,SAAS,OAAO,CAAC,YAAY,QAAQ,SAAS,IAAI;AACnE;AAGO,SAAS,eAAe,SAA4B;AACzD,SAAO,eAAe,SAAS,QAAQ;AACzC;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,kBAAkB,SAA4B;AAC5D,SAAO,eAAe,SAAS,WAAW;AAC5C;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAGO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["toolCalls"]}
1
+ {"version":3,"sources":["../src/harness.ts"],"sourcesContent":["/** Primitive scalar values allowed in normalized JSON-safe eval data. */\nexport type JsonPrimitive = string | number | boolean | null;\n\n/** JSON-safe value shape used by normalized sessions, artifacts, and errors. */\nexport type JsonValue =\n | JsonPrimitive\n | JsonValue[]\n | { [key: string]: JsonValue };\n\n/** Normalized record for one tool call observed during a harness run. */\nexport type ToolCallRecord = {\n id?: string;\n name: string;\n arguments?: Record<string, JsonValue>;\n result?: JsonValue;\n error?: {\n message: string;\n type?: string;\n [key: string]: JsonValue | undefined;\n };\n startedAt?: string;\n finishedAt?: string;\n durationMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Normalized message recorded in a harness session transcript. */\nexport type NormalizedMessage = {\n role: \"system\" | \"user\" | \"assistant\" | \"tool\";\n content?: JsonValue;\n toolCalls?: ToolCallRecord[];\n metadata?: Record<string, JsonValue>;\n};\n\n/** Provider usage summary attached to a normalized harness run. */\nexport type UsageSummary = {\n provider?: string;\n model?: string;\n inputTokens?: number;\n outputTokens?: number;\n reasoningTokens?: number;\n totalTokens?: number;\n toolCalls?: number;\n retries?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Timing summary attached to a normalized harness run. */\nexport type TimingSummary = {\n totalMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** JSON-serializable transcript produced by the system under test. */\nexport type NormalizedSession = {\n messages: NormalizedMessage[];\n provider?: string;\n model?: string;\n metadata?: Record<string, JsonValue>;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/** Normalized result returned by every harness execution. */\nexport type HarnessRun<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n session: NormalizedSession;\n usage: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, JsonValue>;\n errors: Array<Record<string, JsonValue>>;\n};\n\n/** Error value with an attached partial or complete normalized harness run. */\nexport type HarnessRunError = Error & {\n vitestEvalsRun: HarnessRun;\n};\n\n/** Per-run metadata shape accepted by harnesses and eval tests. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/** Runtime context passed from the eval fixture into a harness run. */\nexport type HarnessContext<\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: Record<string, JsonValue>;\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/** Adapter that executes the system under test and returns a normalized run. */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n input: TInput,\n context: HarnessContext<TMetadata>,\n ) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n arguments?: unknown;\n result?: unknown;\n error?: unknown;\n metadata?: Record<string, unknown>;\n};\n\n/** Lightweight result shape normalized by `createHarness(...)`. */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n messages?: NormalizedMessage[];\n toolCalls?: SimpleToolCallRecord[];\n usage?: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, unknown>;\n metadata?: Record<string, unknown>;\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {\n input: TInput;\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: HarnessContext<TMetadata>[\"artifacts\"];\n setArtifact: HarnessContext<TMetadata>[\"setArtifact\"];\n};\n\n/** Options for creating a lightweight custom application harness. */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n args: CreateHarnessRunArgs<TInput, TMetadata>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"number\" ||\n typeof value === \"boolean\"\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[]): JsonValue[] {\n return value.map((item) => {\n const normalized = toJsonValue(item);\n return normalized === undefined ? null : normalized;\n });\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValue(entryValue);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value);\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/** Creates a harness from the common \"run app code and return output\" shape. */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata> {\n const harness: Harness<TInput, TOutput, TMetadata> = {\n name: options.name,\n run: async (input, context) => {\n const result = await options.run({\n input,\n metadata: context.metadata,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n\n return normalizeHarnessRun(input, result, context);\n },\n };\n\n return harness;\n}\n\n/** Normalizes a lightweight harness result into the reporter-facing run shape. */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext<TMetadata>,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\n/** Flattens every recorded tool call from a normalized session. */\nexport function toolCalls(session: NormalizedSession): ToolCallRecord[] {\n return session.messages.flatMap((message) => message.toolCalls ?? []);\n}\n\n/** Filters normalized session messages by role. */\nexport function messagesByRole(\n session: NormalizedSession,\n role: NormalizedMessage[\"role\"],\n): NormalizedMessage[] {\n return session.messages.filter((message) => message.role === role);\n}\n\n/** Returns every normalized system message from a session. */\nexport function systemMessages(session: NormalizedSession) {\n return messagesByRole(session, \"system\");\n}\n\n/** Returns every normalized user message from a session. */\nexport function userMessages(session: NormalizedSession) {\n return messagesByRole(session, \"user\");\n}\n\n/** Returns every normalized assistant message from a session. */\nexport function assistantMessages(session: NormalizedSession) {\n return messagesByRole(session, \"assistant\");\n}\n\n/** Returns every normalized tool message from a session. */\nexport function toolMessages(session: NormalizedSession) {\n return messagesByRole(session, \"tool\");\n}\n\n/** Attaches a partial or complete harness run to an arbitrary thrown error. */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/** Reads an attached harness run back off a previously wrapped error value. */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA+JA,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,YACjB,OAAO,UAAU;AAErB;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAA+B;AACzD,SAAO,MAAM,IAAI,CAAC,SAAS;AACzB,UAAM,aAAa,YAAY,IAAI;AACnC,WAAO,eAAe,SAAY,OAAO;AAAA,EAC3C,CAAC;AACH;AAEA,SAAS,oBACP,OAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,aAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,UAAM,QAAQ,YAAY,UAAU;AACpC,QAAI,UAAU,QAAW;AACvB,iBAAW,GAAG,IAAI;AAAA,IACpB;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,KAAK;AAAA,EACjC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,KAAK;AAAA,EAClC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,KAAK;AAClC;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAUO,SAAS,cAKd,SACqC;AACrC,QAAM,UAA+C;AAAA,IACnD,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,QAC/B;AAAA,QACA,UAAU,QAAQ;AAAA,QAClB,QAAQ,QAAQ;AAAA,QAChB,WAAW,QAAQ;AAAA,QACnB,aAAa,QAAQ;AAAA,MACvB,CAAC;AAED,aAAO,oBAAoB,OAAO,QAAQ,OAAO;AAAA,IACnD;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,oBAKd,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMA,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAGO,SAAS,UAAU,SAA8C;AACtE,SAAO,QAAQ,SAAS,QAAQ,CAAC,YAAY,QAAQ,aAAa,CAAC,CAAC;AACtE;AAGO,SAAS,eACd,SACA,MACqB;AACrB,SAAO,QAAQ,SAAS,OAAO,CAAC,YAAY,QAAQ,SAAS,IAAI;AACnE;AAGO,SAAS,eAAe,SAA4B;AACzD,SAAO,eAAe,SAAS,QAAQ;AACzC;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,kBAAkB,SAA4B;AAC5D,SAAO,eAAe,SAAS,WAAW;AAC5C;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAGO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["toolCalls"]}
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/harness.ts"],"sourcesContent":["/** Primitive scalar values allowed in normalized JSON-safe eval data. */\nexport type JsonPrimitive = string | number | boolean | null;\n\n/** JSON-safe value shape used by normalized sessions, artifacts, and errors. */\nexport type JsonValue =\n | JsonPrimitive\n | JsonValue[]\n | { [key: string]: JsonValue };\n\n/** Normalized record for one tool call observed during a harness run. */\nexport type ToolCallRecord = {\n id?: string;\n name: string;\n arguments?: Record<string, JsonValue>;\n result?: JsonValue;\n error?: {\n message: string;\n type?: string;\n [key: string]: JsonValue | undefined;\n };\n startedAt?: string;\n finishedAt?: string;\n durationMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Normalized message recorded in a harness session transcript. */\nexport type NormalizedMessage = {\n role: \"system\" | \"user\" | \"assistant\" | \"tool\";\n content?: JsonValue;\n toolCalls?: ToolCallRecord[];\n metadata?: Record<string, JsonValue>;\n};\n\n/** Provider usage summary attached to a normalized harness run. */\nexport type UsageSummary = {\n provider?: string;\n model?: string;\n inputTokens?: number;\n outputTokens?: number;\n reasoningTokens?: number;\n totalTokens?: number;\n estimatedCost?: number;\n toolCalls?: number;\n retries?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Timing summary attached to a normalized harness run. */\nexport type TimingSummary = {\n totalMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** JSON-serializable transcript produced by the system under test. */\nexport type NormalizedSession = {\n messages: NormalizedMessage[];\n provider?: string;\n model?: string;\n metadata?: Record<string, JsonValue>;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/** Normalized result returned by every harness execution. */\nexport type HarnessRun<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n session: NormalizedSession;\n usage: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, JsonValue>;\n errors: Array<Record<string, JsonValue>>;\n};\n\n/** Error value with an attached partial or complete normalized harness run. */\nexport type HarnessRunError = Error & {\n vitestEvalsRun: HarnessRun;\n};\n\n/** Per-run metadata shape accepted by harnesses and eval tests. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/** Runtime context passed from the eval fixture into a harness run. */\nexport type HarnessContext<\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: Record<string, JsonValue>;\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/** Adapter that executes the system under test and returns a normalized run. */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n input: TInput,\n context: HarnessContext<TMetadata>,\n ) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n arguments?: unknown;\n result?: unknown;\n error?: unknown;\n metadata?: Record<string, unknown>;\n};\n\n/** Lightweight result shape normalized by `createHarness(...)`. */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n messages?: NormalizedMessage[];\n toolCalls?: SimpleToolCallRecord[];\n usage?: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, unknown>;\n metadata?: Record<string, unknown>;\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {\n input: TInput;\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: HarnessContext<TMetadata>[\"artifacts\"];\n setArtifact: HarnessContext<TMetadata>[\"setArtifact\"];\n};\n\n/** Options for creating a lightweight custom application harness. */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n args: CreateHarnessRunArgs<TInput, TMetadata>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"number\" ||\n typeof value === \"boolean\"\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[]): JsonValue[] {\n return value.map((item) => {\n const normalized = toJsonValue(item);\n return normalized === undefined ? null : normalized;\n });\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValue(entryValue);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value);\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/** Creates a harness from the common \"run app code and return output\" shape. */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata> {\n const harness: Harness<TInput, TOutput, TMetadata> = {\n name: options.name,\n run: async (input, context) => {\n const result = await options.run({\n input,\n metadata: context.metadata,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n\n return normalizeHarnessRun(input, result, context);\n },\n };\n\n return harness;\n}\n\n/** Normalizes a lightweight harness result into the reporter-facing run shape. */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext<TMetadata>,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\n/** Flattens every recorded tool call from a normalized session. */\nexport function toolCalls(session: NormalizedSession): ToolCallRecord[] {\n return session.messages.flatMap((message) => message.toolCalls ?? []);\n}\n\n/** Filters normalized session messages by role. */\nexport function messagesByRole(\n session: NormalizedSession,\n role: NormalizedMessage[\"role\"],\n): NormalizedMessage[] {\n return session.messages.filter((message) => message.role === role);\n}\n\n/** Returns every normalized system message from a session. */\nexport function systemMessages(session: NormalizedSession) {\n return messagesByRole(session, \"system\");\n}\n\n/** Returns every normalized user message from a session. */\nexport function userMessages(session: NormalizedSession) {\n return messagesByRole(session, \"user\");\n}\n\n/** Returns every normalized assistant message from a session. */\nexport function assistantMessages(session: NormalizedSession) {\n return messagesByRole(session, \"assistant\");\n}\n\n/** Returns every normalized tool message from a session. */\nexport function toolMessages(session: NormalizedSession) {\n return messagesByRole(session, \"tool\");\n}\n\n/** Attaches a partial or complete harness run to an arbitrary thrown error. */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/** Reads an attached harness run back off a previously wrapped error value. */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";AAgKA,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,YACjB,OAAO,UAAU;AAErB;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAA+B;AACzD,SAAO,MAAM,IAAI,CAAC,SAAS;AACzB,UAAM,aAAa,YAAY,IAAI;AACnC,WAAO,eAAe,SAAY,OAAO;AAAA,EAC3C,CAAC;AACH;AAEA,SAAS,oBACP,OAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,aAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,UAAM,QAAQ,YAAY,UAAU;AACpC,QAAI,UAAU,QAAW;AACvB,iBAAW,GAAG,IAAI;AAAA,IACpB;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,KAAK;AAAA,EACjC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,KAAK;AAAA,EAClC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,KAAK;AAClC;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAUO,SAAS,cAKd,SACqC;AACrC,QAAM,UAA+C;AAAA,IACnD,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,QAC/B;AAAA,QACA,UAAU,QAAQ;AAAA,QAClB,QAAQ,QAAQ;AAAA,QAChB,WAAW,QAAQ;AAAA,QACnB,aAAa,QAAQ;AAAA,MACvB,CAAC;AAED,aAAO,oBAAoB,OAAO,QAAQ,OAAO;AAAA,IACnD;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,oBAKd,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMA,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAGO,SAAS,UAAU,SAA8C;AACtE,SAAO,QAAQ,SAAS,QAAQ,CAAC,YAAY,QAAQ,aAAa,CAAC,CAAC;AACtE;AAGO,SAAS,eACd,SACA,MACqB;AACrB,SAAO,QAAQ,SAAS,OAAO,CAAC,YAAY,QAAQ,SAAS,IAAI;AACnE;AAGO,SAAS,eAAe,SAA4B;AACzD,SAAO,eAAe,SAAS,QAAQ;AACzC;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,kBAAkB,SAA4B;AAC5D,SAAO,eAAe,SAAS,WAAW;AAC5C;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAGO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["toolCalls"]}
1
+ {"version":3,"sources":["../src/harness.ts"],"sourcesContent":["/** Primitive scalar values allowed in normalized JSON-safe eval data. */\nexport type JsonPrimitive = string | number | boolean | null;\n\n/** JSON-safe value shape used by normalized sessions, artifacts, and errors. */\nexport type JsonValue =\n | JsonPrimitive\n | JsonValue[]\n | { [key: string]: JsonValue };\n\n/** Normalized record for one tool call observed during a harness run. */\nexport type ToolCallRecord = {\n id?: string;\n name: string;\n arguments?: Record<string, JsonValue>;\n result?: JsonValue;\n error?: {\n message: string;\n type?: string;\n [key: string]: JsonValue | undefined;\n };\n startedAt?: string;\n finishedAt?: string;\n durationMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Normalized message recorded in a harness session transcript. */\nexport type NormalizedMessage = {\n role: \"system\" | \"user\" | \"assistant\" | \"tool\";\n content?: JsonValue;\n toolCalls?: ToolCallRecord[];\n metadata?: Record<string, JsonValue>;\n};\n\n/** Provider usage summary attached to a normalized harness run. */\nexport type UsageSummary = {\n provider?: string;\n model?: string;\n inputTokens?: number;\n outputTokens?: number;\n reasoningTokens?: number;\n totalTokens?: number;\n toolCalls?: number;\n retries?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** Timing summary attached to a normalized harness run. */\nexport type TimingSummary = {\n totalMs?: number;\n metadata?: Record<string, JsonValue>;\n};\n\n/** JSON-serializable transcript produced by the system under test. */\nexport type NormalizedSession = {\n messages: NormalizedMessage[];\n provider?: string;\n model?: string;\n metadata?: Record<string, JsonValue>;\n};\n\ntype OutputField<TOutput extends JsonValue | undefined> =\n undefined extends TOutput ? { output?: TOutput } : { output: TOutput };\n\n/** Normalized result returned by every harness execution. */\nexport type HarnessRun<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n session: NormalizedSession;\n usage: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, JsonValue>;\n errors: Array<Record<string, JsonValue>>;\n};\n\n/** Error value with an attached partial or complete normalized harness run. */\nexport type HarnessRunError = Error & {\n vitestEvalsRun: HarnessRun;\n};\n\n/** Per-run metadata shape accepted by harnesses and eval tests. */\nexport type HarnessMetadata = Record<string, unknown>;\n\n/** Runtime context passed from the eval fixture into a harness run. */\nexport type HarnessContext<\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: Record<string, JsonValue>;\n setArtifact: (name: string, value: JsonValue) => void;\n};\n\n/** Adapter that executes the system under test and returns a normalized run. */\nexport type Harness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n input: TInput,\n context: HarnessContext<TMetadata>,\n ) => Promise<HarnessRun<TOutput>>;\n};\n\n/** Value or promise accepted by lightweight harness callbacks. */\nexport type MaybePromise<T> = T | Promise<T>;\n\n/** Lightweight tool-call record accepted by `createHarness(...)` results. */\nexport type SimpleToolCallRecord = Omit<\n ToolCallRecord,\n \"arguments\" | \"result\" | \"error\" | \"metadata\"\n> & {\n arguments?: unknown;\n result?: unknown;\n error?: unknown;\n metadata?: Record<string, unknown>;\n};\n\n/** Lightweight result shape normalized by `createHarness(...)`. */\nexport type SimpleHarnessResult<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = OutputField<TOutput> & {\n messages?: NormalizedMessage[];\n toolCalls?: SimpleToolCallRecord[];\n usage?: UsageSummary;\n timings?: TimingSummary;\n artifacts?: Record<string, unknown>;\n metadata?: Record<string, unknown>;\n errors?: unknown[];\n};\n\n/** Either a complete normalized run or a lightweight result to normalize. */\nexport type HarnessResultLike<\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n> = HarnessRun<TOutput> | SimpleHarnessResult<TOutput>;\n\n/** Arguments passed to the `createHarness(...)` convenience callback. */\nexport type CreateHarnessRunArgs<TInput, TMetadata extends HarnessMetadata> = {\n input: TInput;\n metadata: Readonly<TMetadata>;\n signal?: AbortSignal;\n artifacts: HarnessContext<TMetadata>[\"artifacts\"];\n setArtifact: HarnessContext<TMetadata>[\"setArtifact\"];\n};\n\n/** Options for creating a lightweight custom application harness. */\nexport type CreateHarnessOptions<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n> = {\n name: string;\n run: (\n args: CreateHarnessRunArgs<TInput, TMetadata>,\n ) => MaybePromise<HarnessResultLike<TOutput>>;\n};\n\nfunction isJsonPrimitive(value: unknown): value is JsonPrimitive {\n return (\n value === null ||\n typeof value === \"string\" ||\n typeof value === \"number\" ||\n typeof value === \"boolean\"\n );\n}\n\nfunction isJsonRecord(value: unknown): value is Record<string, unknown> {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\nfunction normalizeJsonArray(value: unknown[]): JsonValue[] {\n return value.map((item) => {\n const normalized = toJsonValue(item);\n return normalized === undefined ? null : normalized;\n });\n}\n\nfunction normalizeJsonObject(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n const normalized: Record<string, JsonValue> = {};\n\n for (const [key, entryValue] of Object.entries(value)) {\n const entry = toJsonValue(entryValue);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n\n return normalized;\n}\n\n/** Returns true when a value exposes a callable method with the given name. */\nexport function hasCallableMethod(value: unknown, methodName: string) {\n return (\n value !== null &&\n (typeof value === \"object\" || typeof value === \"function\") &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === \"function\"\n );\n}\n\n/** Normalizes an unknown value into the JSON-safe shape used by harness runs. */\nexport function toJsonValue(value: unknown): JsonValue | undefined {\n if (isJsonPrimitive(value)) {\n return value;\n }\n\n if (Array.isArray(value)) {\n return normalizeJsonArray(value);\n }\n\n if (isJsonRecord(value)) {\n return normalizeJsonObject(value);\n }\n\n return undefined;\n}\n\n/** Drops non-JSON properties from a record while preserving valid values. */\nexport function normalizeRecord(\n value: Record<string, unknown>,\n): Record<string, JsonValue> {\n return normalizeJsonObject(value);\n}\n\n/** Normalizes metadata and omits the field entirely when nothing survives. */\nexport function normalizeMetadata(\n value: Record<string, unknown>,\n): Record<string, JsonValue> | undefined {\n const normalized = normalizeRecord(value);\n return Object.keys(normalized).length > 0 ? normalized : undefined;\n}\n\n/** Converts arbitrary content into the JSON-safe message content shape. */\nexport function normalizeContent(value: unknown): JsonValue {\n const normalized = toJsonValue(value);\n return normalized !== undefined ? normalized : String(value);\n}\n\n/** Creates a harness from the common \"run app code and return output\" shape. */\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata>;\nexport function createHarness<\n TInput = unknown,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n>(\n options: CreateHarnessOptions<TInput, TOutput, TMetadata>,\n): Harness<TInput, TOutput, TMetadata> {\n const harness: Harness<TInput, TOutput, TMetadata> = {\n name: options.name,\n run: async (input, context) => {\n const result = await options.run({\n input,\n metadata: context.metadata,\n signal: context.signal,\n artifacts: context.artifacts,\n setArtifact: context.setArtifact,\n });\n\n return normalizeHarnessRun(input, result, context);\n },\n };\n\n return harness;\n}\n\n/** Normalizes a lightweight harness result into the reporter-facing run shape. */\nexport function normalizeHarnessRun<\n TInput = unknown,\n TMetadata extends HarnessMetadata = HarnessMetadata,\n TOutput extends JsonValue | undefined = JsonValue | undefined,\n>(\n input: TInput,\n result: HarnessResultLike<TOutput>,\n context?: HarnessContext<TMetadata>,\n): HarnessRun<TOutput> {\n if (isHarnessRun(result)) {\n if (\n context &&\n Object.keys(context.artifacts).length > 0 &&\n !result.artifacts\n ) {\n return {\n ...result,\n artifacts: context.artifacts,\n };\n }\n\n return result;\n }\n\n const output = result.output;\n const toolCalls = normalizeSimpleToolCalls(result.toolCalls);\n const usage = result.usage ?? {};\n const messages =\n result.messages ??\n createDefaultSessionMessages({\n input,\n output,\n toolCalls,\n });\n const metadata = result.metadata\n ? normalizeMetadata(result.metadata)\n : undefined;\n const artifacts = normalizeMergedArtifacts(\n context?.artifacts,\n result.artifacts,\n );\n\n return {\n session: {\n messages,\n ...(usage.provider ? { provider: usage.provider } : {}),\n ...(usage.model ? { model: usage.model } : {}),\n ...(metadata ? { metadata } : {}),\n },\n ...(output !== undefined ? { output } : {}),\n usage,\n ...(result.timings ? { timings: result.timings } : {}),\n ...(artifacts ? { artifacts } : {}),\n errors: normalizeSimpleErrors(result.errors),\n } as HarnessRun<TOutput>;\n}\n\nfunction createDefaultSessionMessages<TInput>({\n input,\n output,\n toolCalls: normalizedToolCalls,\n}: {\n input: TInput;\n output: JsonValue | undefined;\n toolCalls: ToolCallRecord[];\n}): NormalizedMessage[] {\n const messages: NormalizedMessage[] = [\n {\n role: \"user\",\n content: normalizeContent(input),\n },\n ];\n\n if (output !== undefined || normalizedToolCalls.length > 0) {\n messages.push({\n role: \"assistant\",\n ...(output !== undefined ? { content: normalizeContent(output) } : {}),\n ...(normalizedToolCalls.length > 0\n ? { toolCalls: normalizedToolCalls }\n : {}),\n });\n }\n\n return messages;\n}\n\nfunction normalizeSimpleToolCalls(\n calls: SimpleToolCallRecord[] | undefined,\n): ToolCallRecord[] {\n return (calls ?? []).map((call) => {\n const {\n arguments: rawArguments,\n result: rawResult,\n error: rawError,\n metadata: rawMetadata,\n ...toolCall\n } = call;\n const args = normalizeToolCallArguments(rawArguments);\n const result = toJsonValue(rawResult);\n const error = normalizeToolCallError(rawError);\n const metadata = rawMetadata ? normalizeMetadata(rawMetadata) : undefined;\n\n return {\n ...toolCall,\n ...(args ? { arguments: args } : {}),\n ...(result !== undefined ? { result } : {}),\n ...(error ? { error } : {}),\n ...(metadata ? { metadata } : {}),\n };\n });\n}\n\nfunction normalizeToolCallArguments(\n value: unknown,\n): Record<string, JsonValue> | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const normalized = toJsonValue(value);\n return normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized)\n ? normalized\n : undefined;\n}\n\nfunction normalizeToolCallError(\n value: unknown,\n): ToolCallRecord[\"error\"] | undefined {\n if (value === undefined) {\n return undefined;\n }\n\n const serialized = serializeError(value);\n const { message, type, ...details } = serialized;\n\n return {\n ...details,\n message: typeof message === \"string\" ? message : String(message),\n ...(typeof type === \"string\" ? { type } : {}),\n };\n}\n\nfunction normalizeMergedArtifacts(\n contextArtifacts: Record<string, JsonValue> | undefined,\n resultArtifacts: Record<string, unknown> | undefined,\n) {\n const artifacts = {\n ...(contextArtifacts ?? {}),\n ...(resultArtifacts ? normalizeRecord(resultArtifacts) : {}),\n };\n\n return Object.keys(artifacts).length > 0 ? artifacts : undefined;\n}\n\nfunction normalizeSimpleErrors(\n errors: unknown[] | undefined,\n): Array<Record<string, JsonValue>> {\n return (errors ?? []).map((error) => {\n const normalized = toJsonValue(error);\n\n if (\n normalized &&\n typeof normalized === \"object\" &&\n !Array.isArray(normalized) &&\n Object.keys(normalized).length > 0\n ) {\n return normalized;\n }\n\n return serializeError(error);\n });\n}\n\n/** Flattens every recorded tool call from a normalized session. */\nexport function toolCalls(session: NormalizedSession): ToolCallRecord[] {\n return session.messages.flatMap((message) => message.toolCalls ?? []);\n}\n\n/** Filters normalized session messages by role. */\nexport function messagesByRole(\n session: NormalizedSession,\n role: NormalizedMessage[\"role\"],\n): NormalizedMessage[] {\n return session.messages.filter((message) => message.role === role);\n}\n\n/** Returns every normalized system message from a session. */\nexport function systemMessages(session: NormalizedSession) {\n return messagesByRole(session, \"system\");\n}\n\n/** Returns every normalized user message from a session. */\nexport function userMessages(session: NormalizedSession) {\n return messagesByRole(session, \"user\");\n}\n\n/** Returns every normalized assistant message from a session. */\nexport function assistantMessages(session: NormalizedSession) {\n return messagesByRole(session, \"assistant\");\n}\n\n/** Returns every normalized tool message from a session. */\nexport function toolMessages(session: NormalizedSession) {\n return messagesByRole(session, \"tool\");\n}\n\n/** Attaches a partial or complete harness run to an arbitrary thrown error. */\nexport function attachHarnessRunToError(\n error: unknown,\n run: HarnessRun,\n): HarnessRunError {\n const baseError =\n error instanceof Error\n ? error\n : new Error(String(error ?? \"Unknown error\"));\n return Object.assign(baseError, {\n vitestEvalsRun: run,\n });\n}\n\n/** Reads an attached harness run back off a previously wrapped error value. */\nexport function getHarnessRunFromError(error: unknown): HarnessRun | undefined {\n if (\n error &&\n typeof error === \"object\" &&\n \"vitestEvalsRun\" in error &&\n isHarnessRun((error as { vitestEvalsRun?: unknown }).vitestEvalsRun)\n ) {\n return (error as { vitestEvalsRun: HarnessRun }).vitestEvalsRun;\n }\n\n return undefined;\n}\n\n/** Returns true when a value matches the normalized `HarnessRun` contract. */\nexport function isHarnessRun(value: unknown): value is HarnessRun {\n if (!value || typeof value !== \"object\") {\n return false;\n }\n\n const candidate = value as {\n session?: unknown;\n usage?: unknown;\n errors?: unknown;\n };\n\n return (\n isNormalizedSession(candidate.session) &&\n Boolean(candidate.usage) &&\n typeof candidate.usage === \"object\" &&\n !Array.isArray(candidate.usage) &&\n Array.isArray(candidate.errors)\n );\n}\n\n/** Returns true when a value matches the normalized session contract. */\nexport function isNormalizedSession(\n value: unknown,\n): value is NormalizedSession {\n return (\n Boolean(value) &&\n typeof value === \"object\" &&\n value !== null &&\n \"messages\" in value &&\n Array.isArray((value as { messages?: unknown }).messages)\n );\n}\n\n/** Reuses pre-normalized harness errors when a runtime already returns them. */\nexport function resolveHarnessRunErrors(\n result: unknown,\n): Array<Record<string, JsonValue>> {\n if (\n result &&\n typeof result === \"object\" &&\n Array.isArray((result as Record<string, unknown>).errors)\n ) {\n return (result as { errors: Array<Record<string, JsonValue>> }).errors;\n }\n\n return [];\n}\n\n/** Serializes an arbitrary thrown value into the normalized error shape. */\nexport function serializeError(error: unknown): Record<string, JsonValue> {\n if (error instanceof Error) {\n return {\n type: error.name,\n message: error.message,\n };\n }\n\n return {\n type: \"Error\",\n message: String(error),\n };\n}\n"],"mappings":";AA+JA,SAAS,gBAAgB,OAAwC;AAC/D,SACE,UAAU,QACV,OAAO,UAAU,YACjB,OAAO,UAAU,YACjB,OAAO,UAAU;AAErB;AAEA,SAAS,aAAa,OAAkD;AACtE,SAAO,OAAO,UAAU,YAAY,UAAU,QAAQ,CAAC,MAAM,QAAQ,KAAK;AAC5E;AAEA,SAAS,mBAAmB,OAA+B;AACzD,SAAO,MAAM,IAAI,CAAC,SAAS;AACzB,UAAM,aAAa,YAAY,IAAI;AACnC,WAAO,eAAe,SAAY,OAAO;AAAA,EAC3C,CAAC;AACH;AAEA,SAAS,oBACP,OAC2B;AAC3B,QAAM,aAAwC,CAAC;AAE/C,aAAW,CAAC,KAAK,UAAU,KAAK,OAAO,QAAQ,KAAK,GAAG;AACrD,UAAM,QAAQ,YAAY,UAAU;AACpC,QAAI,UAAU,QAAW;AACvB,iBAAW,GAAG,IAAI;AAAA,IACpB;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,kBAAkB,OAAgB,YAAoB;AACpE,SACE,UAAU,SACT,OAAO,UAAU,YAAY,OAAO,UAAU,eAC/C,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAGO,SAAS,YAAY,OAAuC;AACjE,MAAI,gBAAgB,KAAK,GAAG;AAC1B,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,QAAQ,KAAK,GAAG;AACxB,WAAO,mBAAmB,KAAK;AAAA,EACjC;AAEA,MAAI,aAAa,KAAK,GAAG;AACvB,WAAO,oBAAoB,KAAK;AAAA,EAClC;AAEA,SAAO;AACT;AAGO,SAAS,gBACd,OAC2B;AAC3B,SAAO,oBAAoB,KAAK;AAClC;AAGO,SAAS,kBACd,OACuC;AACvC,QAAM,aAAa,gBAAgB,KAAK;AACxC,SAAO,OAAO,KAAK,UAAU,EAAE,SAAS,IAAI,aAAa;AAC3D;AAGO,SAAS,iBAAiB,OAA2B;AAC1D,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,eAAe,SAAY,aAAa,OAAO,KAAK;AAC7D;AAUO,SAAS,cAKd,SACqC;AACrC,QAAM,UAA+C;AAAA,IACnD,MAAM,QAAQ;AAAA,IACd,KAAK,OAAO,OAAO,YAAY;AAC7B,YAAM,SAAS,MAAM,QAAQ,IAAI;AAAA,QAC/B;AAAA,QACA,UAAU,QAAQ;AAAA,QAClB,QAAQ,QAAQ;AAAA,QAChB,WAAW,QAAQ;AAAA,QACnB,aAAa,QAAQ;AAAA,MACvB,CAAC;AAED,aAAO,oBAAoB,OAAO,QAAQ,OAAO;AAAA,IACnD;AAAA,EACF;AAEA,SAAO;AACT;AAGO,SAAS,oBAKd,OACA,QACA,SACqB;AACrB,MAAI,aAAa,MAAM,GAAG;AACxB,QACE,WACA,OAAO,KAAK,QAAQ,SAAS,EAAE,SAAS,KACxC,CAAC,OAAO,WACR;AACA,aAAO;AAAA,QACL,GAAG;AAAA,QACH,WAAW,QAAQ;AAAA,MACrB;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,OAAO;AACtB,QAAMA,aAAY,yBAAyB,OAAO,SAAS;AAC3D,QAAM,QAAQ,OAAO,SAAS,CAAC;AAC/B,QAAM,WACJ,OAAO,YACP,6BAA6B;AAAA,IAC3B;AAAA,IACA;AAAA,IACA,WAAAA;AAAA,EACF,CAAC;AACH,QAAM,WAAW,OAAO,WACpB,kBAAkB,OAAO,QAAQ,IACjC;AACJ,QAAM,YAAY;AAAA,IAChB,SAAS;AAAA,IACT,OAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,SAAS;AAAA,MACP;AAAA,MACA,GAAI,MAAM,WAAW,EAAE,UAAU,MAAM,SAAS,IAAI,CAAC;AAAA,MACrD,GAAI,MAAM,QAAQ,EAAE,OAAO,MAAM,MAAM,IAAI,CAAC;AAAA,MAC5C,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,IACA,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,IACzC;AAAA,IACA,GAAI,OAAO,UAAU,EAAE,SAAS,OAAO,QAAQ,IAAI,CAAC;AAAA,IACpD,GAAI,YAAY,EAAE,UAAU,IAAI,CAAC;AAAA,IACjC,QAAQ,sBAAsB,OAAO,MAAM;AAAA,EAC7C;AACF;AAEA,SAAS,6BAAqC;AAAA,EAC5C;AAAA,EACA;AAAA,EACA,WAAW;AACb,GAIwB;AACtB,QAAM,WAAgC;AAAA,IACpC;AAAA,MACE,MAAM;AAAA,MACN,SAAS,iBAAiB,KAAK;AAAA,IACjC;AAAA,EACF;AAEA,MAAI,WAAW,UAAa,oBAAoB,SAAS,GAAG;AAC1D,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,GAAI,WAAW,SAAY,EAAE,SAAS,iBAAiB,MAAM,EAAE,IAAI,CAAC;AAAA,MACpE,GAAI,oBAAoB,SAAS,IAC7B,EAAE,WAAW,oBAAoB,IACjC,CAAC;AAAA,IACP,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,OACkB;AAClB,UAAQ,SAAS,CAAC,GAAG,IAAI,CAAC,SAAS;AACjC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,QAAQ;AAAA,MACR,OAAO;AAAA,MACP,UAAU;AAAA,MACV,GAAG;AAAA,IACL,IAAI;AACJ,UAAM,OAAO,2BAA2B,YAAY;AACpD,UAAM,SAAS,YAAY,SAAS;AACpC,UAAM,QAAQ,uBAAuB,QAAQ;AAC7C,UAAM,WAAW,cAAc,kBAAkB,WAAW,IAAI;AAEhE,WAAO;AAAA,MACL,GAAG;AAAA,MACH,GAAI,OAAO,EAAE,WAAW,KAAK,IAAI,CAAC;AAAA,MAClC,GAAI,WAAW,SAAY,EAAE,OAAO,IAAI,CAAC;AAAA,MACzC,GAAI,QAAQ,EAAE,MAAM,IAAI,CAAC;AAAA,MACzB,GAAI,WAAW,EAAE,SAAS,IAAI,CAAC;AAAA,IACjC;AAAA,EACF,CAAC;AACH;AAEA,SAAS,2BACP,OACuC;AACvC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,YAAY,KAAK;AACpC,SAAO,cACL,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,IACvB,aACA;AACN;AAEA,SAAS,uBACP,OACqC;AACrC,MAAI,UAAU,QAAW;AACvB,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,EAAE,SAAS,MAAM,GAAG,QAAQ,IAAI;AAEtC,SAAO;AAAA,IACL,GAAG;AAAA,IACH,SAAS,OAAO,YAAY,WAAW,UAAU,OAAO,OAAO;AAAA,IAC/D,GAAI,OAAO,SAAS,WAAW,EAAE,KAAK,IAAI,CAAC;AAAA,EAC7C;AACF;AAEA,SAAS,yBACP,kBACA,iBACA;AACA,QAAM,YAAY;AAAA,IAChB,GAAI,oBAAoB,CAAC;AAAA,IACzB,GAAI,kBAAkB,gBAAgB,eAAe,IAAI,CAAC;AAAA,EAC5D;AAEA,SAAO,OAAO,KAAK,SAAS,EAAE,SAAS,IAAI,YAAY;AACzD;AAEA,SAAS,sBACP,QACkC;AAClC,UAAQ,UAAU,CAAC,GAAG,IAAI,CAAC,UAAU;AACnC,UAAM,aAAa,YAAY,KAAK;AAEpC,QACE,cACA,OAAO,eAAe,YACtB,CAAC,MAAM,QAAQ,UAAU,KACzB,OAAO,KAAK,UAAU,EAAE,SAAS,GACjC;AACA,aAAO;AAAA,IACT;AAEA,WAAO,eAAe,KAAK;AAAA,EAC7B,CAAC;AACH;AAGO,SAAS,UAAU,SAA8C;AACtE,SAAO,QAAQ,SAAS,QAAQ,CAAC,YAAY,QAAQ,aAAa,CAAC,CAAC;AACtE;AAGO,SAAS,eACd,SACA,MACqB;AACrB,SAAO,QAAQ,SAAS,OAAO,CAAC,YAAY,QAAQ,SAAS,IAAI;AACnE;AAGO,SAAS,eAAe,SAA4B;AACzD,SAAO,eAAe,SAAS,QAAQ;AACzC;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,kBAAkB,SAA4B;AAC5D,SAAO,eAAe,SAAS,WAAW;AAC5C;AAGO,SAAS,aAAa,SAA4B;AACvD,SAAO,eAAe,SAAS,MAAM;AACvC;AAGO,SAAS,wBACd,OACA,KACiB;AACjB,QAAM,YACJ,iBAAiB,QACb,QACA,IAAI,MAAM,OAAO,SAAS,eAAe,CAAC;AAChD,SAAO,OAAO,OAAO,WAAW;AAAA,IAC9B,gBAAgB;AAAA,EAClB,CAAC;AACH;AAGO,SAAS,uBAAuB,OAAwC;AAC7E,MACE,SACA,OAAO,UAAU,YACjB,oBAAoB,SACpB,aAAc,MAAuC,cAAc,GACnE;AACA,WAAQ,MAAyC;AAAA,EACnD;AAEA,SAAO;AACT;AAGO,SAAS,aAAa,OAAqC;AAChE,MAAI,CAAC,SAAS,OAAO,UAAU,UAAU;AACvC,WAAO;AAAA,EACT;AAEA,QAAM,YAAY;AAMlB,SACE,oBAAoB,UAAU,OAAO,KACrC,QAAQ,UAAU,KAAK,KACvB,OAAO,UAAU,UAAU,YAC3B,CAAC,MAAM,QAAQ,UAAU,KAAK,KAC9B,MAAM,QAAQ,UAAU,MAAM;AAElC;AAGO,SAAS,oBACd,OAC4B;AAC5B,SACE,QAAQ,KAAK,KACb,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,MAAM,QAAS,MAAiC,QAAQ;AAE5D;AAGO,SAAS,wBACd,QACkC;AAClC,MACE,UACA,OAAO,WAAW,YAClB,MAAM,QAAS,OAAmC,MAAM,GACxD;AACA,WAAQ,OAAwD;AAAA,EAClE;AAEA,SAAO,CAAC;AACV;AAGO,SAAS,eAAe,OAA2C;AACxE,MAAI,iBAAiB,OAAO;AAC1B,WAAO;AAAA,MACL,MAAM,MAAM;AAAA,MACZ,SAAS,MAAM;AAAA,IACjB;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM;AAAA,IACN,SAAS,OAAO,KAAK;AAAA,EACvB;AACF;","names":["toolCalls"]}