@alis-build/harness-eval 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +700 -0
- package/dist/adapters/claude-code/index.d.ts +3 -0
- package/dist/adapters/claude-code/index.js +2 -0
- package/dist/build-DsVJ_UeU.js +1396 -0
- package/dist/build-DsVJ_UeU.js.map +1 -0
- package/dist/cardinality-DlE44e-4.js +31 -0
- package/dist/cardinality-DlE44e-4.js.map +1 -0
- package/dist/claude-code-ycT0JQZF.js +563 -0
- package/dist/claude-code-ycT0JQZF.js.map +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +623 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/config/loader.d.ts +2 -0
- package/dist/config/loader.js +2 -0
- package/dist/index-6Z17eKZx.d.ts +72 -0
- package/dist/index.d.ts +725 -0
- package/dist/index.js +5 -0
- package/dist/loader-BCnFJ8rm.js +717 -0
- package/dist/loader-BCnFJ8rm.js.map +1 -0
- package/dist/loader-DTvoVfN0.d.ts +33 -0
- package/dist/rolldown-runtime-D7D4PA-g.js +13 -0
- package/dist/runner/suite.d.ts +2 -0
- package/dist/runner/suite.js +2 -0
- package/dist/suite-BoOvK_lq.d.ts +7 -0
- package/dist/suite-chj0j22j.js +684 -0
- package/dist/suite-chj0j22j.js.map +1 -0
- package/dist/types-B9H4IZtA.d.ts +305 -0
- package/dist/types-BQol062t.d.ts +292 -0
- package/package.json +74 -0
- package/schemas/eval-interchange-agent-trace.schema.json +322 -0
- package/schemas/eval-interchange-proto-instance.schema.json +106 -0
- package/schemas/eval-interchange.schema.json +140 -0
- package/schemas/eval-run-envelope.schema.json +2195 -0
- package/schemas/trajectory-view.schema.json +441 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bin.js","names":[],"sources":["../../src/cli/args.ts","../../src/cli/commands/envelope.ts","../../src/cli/commands/format.ts","../../src/cli/progress.ts","../../src/cli/commands/grade.ts","../../src/cli/commands/otel-output.ts","../../src/cli/commands/run.ts","../../src/cli/main.ts","../../src/cli/bin.ts"],"sourcesContent":["/**\n * Minimal argv parser — no external deps.\n */\n\nexport interface ParsedArgs {\n command?: string;\n positional: string[];\n options: Record<string, string | boolean>;\n}\n\nexport function parseArgs(argv: string[]): ParsedArgs {\n const positional: string[] = [];\n const options: Record<string, string | boolean> = {};\n let command: string | undefined;\n\n const args = [...argv];\n if (args.length > 0 && !args[0].startsWith(\"-\")) {\n command = args.shift();\n }\n\n for (let i = 0; i < args.length; i++) {\n const arg = args[i];\n if (arg === \"--\") {\n positional.push(...args.slice(i + 1));\n break;\n }\n if (arg.startsWith(\"--\")) {\n const key = arg.slice(2);\n const next = args[i + 1];\n if (next && !next.startsWith(\"-\")) {\n options[key] = next;\n i++;\n } else {\n options[key] = true;\n }\n } else if (arg.startsWith(\"-\") && arg.length === 2) {\n const key = arg.slice(1);\n const next = args[i + 1];\n if (next && !next.startsWith(\"-\")) {\n options[key] = next;\n i++;\n } else {\n options[key] = true;\n }\n } else {\n positional.push(arg);\n }\n }\n\n return { command, positional, options };\n}\n\nexport function getOption(\n options: Record<string, string | boolean>,\n name: string,\n): string | undefined {\n const v = options[name];\n return typeof v === \"string\" ? v : undefined;\n}\n\nexport function getOptionInt(\n options: Record<string, string | boolean>,\n name: string,\n defaultValue: number,\n): number {\n const v = getOption(options, name);\n if (v === undefined) return defaultValue;\n const n = Number.parseInt(v, 10);\n if (!Number.isFinite(n)) return defaultValue;\n return n;\n}\n\nexport function hasOption(\n options: Record<string, string | boolean>,\n name: string,\n): boolean {\n const v = options[name];\n return v === true || (typeof v === \"string\" && v === \"true\");\n}\n","/**\n * `harness-eval envelope` — build EvalRunEnvelope and interchange projections.\n */\n\nimport { readFile, writeFile } from \"node:fs/promises\";\nimport { dirname, join } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\n\nimport { buildEvalRunEnvelopeFromFiles } from \"../../eval-record/build\";\nimport {\n toAgentTrace,\n toProtoInstances,\n toTrajectory,\n} from \"../../eval-interchange/projections\";\nimport type { EvalRunEnvelope } from \"../../types/eval-record\";\nimport { getOption, hasOption, type ParsedArgs } from \"../args\";\n\nexport type EnvelopeProjection =\n | \"envelope\"\n | \"trajectory\"\n | \"instances\"\n | \"agent-trace\";\n\nconst PROJECTIONS = new Set<EnvelopeProjection>([\n \"envelope\",\n \"trajectory\",\n \"instances\",\n \"agent-trace\",\n]);\n\nexport function parseEnvelopeProjection(\n value: string | undefined,\n): EnvelopeProjection | undefined {\n if (value === undefined) return \"envelope\";\n if (PROJECTIONS.has(value as EnvelopeProjection)) {\n return value as EnvelopeProjection;\n }\n return undefined;\n}\n\nexport function serializeEnvelopeProjection(\n envelope: EvalRunEnvelope,\n projection: EnvelopeProjection,\n): string {\n switch (projection) {\n case \"trajectory\":\n return `${toTrajectory(envelope).map((row) => JSON.stringify(row)).join(\"\\n\")}\\n`;\n case \"instances\":\n return `${JSON.stringify(toProtoInstances(envelope), null, 2)}\\n`;\n case \"agent-trace\":\n return `${JSON.stringify(toAgentTrace(envelope), null, 2)}\\n`;\n case \"envelope\":\n default:\n return `${JSON.stringify(envelope, null, 2)}\\n`;\n }\n}\n\nasync function readFrameworkVersion(): Promise<string | undefined> {\n try {\n const packagePath = join(\n dirname(fileURLToPath(import.meta.url)),\n \"../../../package.json\",\n );\n const text = await readFile(packagePath, \"utf8\");\n const pkg = JSON.parse(text) as { version?: string };\n return pkg.version;\n } catch {\n return undefined;\n }\n}\n\nexport async function envelopeCommand(args: ParsedArgs): Promise<number> {\n const reportPath = args.positional[0];\n if (!reportPath) {\n console.error(\n \"usage: harness-eval envelope <report.json> [--output path] [--grading path] [--suite path] [--projection envelope|trajectory|instances|agent-trace] [--include-raw-stream-events] [--no-transcript]\",\n );\n return 2;\n }\n\n const outputPath = getOption(args.options, \"output\");\n const gradingPath = getOption(args.options, \"grading\");\n const suitePath = getOption(args.options, \"suite\");\n const projection = parseEnvelopeProjection(\n getOption(args.options, \"projection\"),\n );\n\n if (!projection) {\n console.error(\n \"invalid --projection; expected envelope, trajectory, instances, or agent-trace\",\n );\n return 2;\n }\n\n let envelope: EvalRunEnvelope;\n try {\n const frameworkVersion = await readFrameworkVersion();\n envelope = await buildEvalRunEnvelopeFromFiles(reportPath, {\n gradingPath,\n suitePath,\n includeTranscript: !hasOption(args.options, \"no-transcript\"),\n includeRawStreamEvents: hasOption(args.options, \"include-raw-stream-events\"),\n harness: { frameworkVersion },\n });\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err));\n return 2;\n }\n\n const serialized = serializeEnvelopeProjection(envelope, projection);\n\n if (outputPath) {\n await writeFile(outputPath, serialized, \"utf8\");\n } else {\n process.stdout.write(serialized);\n }\n\n return envelope.summary.behavioralPass ? 0 : 1;\n}\n","/**\n * `harness-eval format` command.\n */\n\nimport { readFile } from \"node:fs/promises\";\n\nimport { formatReport } from \"../../reporter/index\";\nimport type { SuiteReport } from \"../../runner/types\";\nimport { getOption, type ParsedArgs } from \"../args\";\n\nexport async function formatCommand(args: ParsedArgs): Promise<number> {\n const reportPath = args.positional[0];\n if (!reportPath) {\n console.error(\"usage: harness-eval format <report.json> [options]\");\n return 2;\n }\n\n const format = getOption(args.options, \"format\") ?? \"console\";\n const baselinePath = getOption(args.options, \"baseline\");\n\n let report: SuiteReport;\n try {\n report = JSON.parse(await readFile(reportPath, \"utf8\")) as SuiteReport;\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err));\n return 2;\n }\n\n let baseline: SuiteReport | undefined;\n if (baselinePath) {\n baseline = JSON.parse(await readFile(baselinePath, \"utf8\")) as SuiteReport;\n }\n\n const formatted = formatReport(report, {\n format:\n format === \"markdown\" || format === \"json\" ? format : \"console\",\n baseline,\n color: format === \"console\",\n });\n\n process.stdout.write(formatted);\n if (!formatted.endsWith(\"\\n\")) process.stdout.write(\"\\n\");\n return report.cells.every((c) => c.passed) ? 0 : 1;\n}\n","/**\n * CLI progress reporting for long-running harness and grade commands.\n */\n\nimport type { Writable } from \"node:stream\";\n\nimport { getOption, hasOption } from \"./args\";\nimport type { GradeProgressEvent as GraderGradeProgressEvent } from \"../grader/types\";\nimport type { AssertionResult } from \"../types/assertions\";\nimport type { CellReport, ProgressCallback } from \"../runner/types\";\n\nexport type ProgressMode = \"default\" | \"quiet\" | \"verbose\" | \"json\";\n\nconst GREEN = \"\\x1b[32m\";\nconst RED = \"\\x1b[31m\";\nconst YELLOW = \"\\x1b[33m\";\nconst DIM = \"\\x1b[2m\";\nconst RESET = \"\\x1b[0m\";\n\nexport interface RunProgressOptions {\n mode: ProgressMode;\n maxConcurrent?: number;\n color?: boolean;\n stream?: Writable;\n}\n\nexport interface GradeProgressOptions {\n mode: ProgressMode;\n maxConcurrent?: number;\n color?: boolean;\n stream?: Writable;\n}\n\nexport function resolveProgressMode(\n options: Record<string, string | boolean>,\n): ProgressMode {\n const progress = getOption(options, \"progress\");\n if (\n progress === \"json\" ||\n progress === \"quiet\" ||\n progress === \"verbose\" ||\n progress === \"default\"\n ) {\n return progress;\n }\n if (hasOption(options, \"quiet\")) return \"quiet\";\n if (hasOption(options, \"verbose\")) return \"verbose\";\n return \"default\";\n}\n\n/** Whether to emit ANSI colors on the progress stream (stderr). */\nexport function resolveProgressColor(\n options: Record<string, string | boolean>,\n stream: Writable = process.stderr,\n): boolean {\n if (hasOption(options, \"no-color\")) return false;\n if (hasOption(options, \"color\")) return true;\n if (process.env.NO_COLOR !== undefined && process.env.NO_COLOR !== \"\") {\n return false;\n }\n if (process.env.FORCE_COLOR !== undefined && process.env.FORCE_COLOR !== \"0\") {\n return true;\n }\n return (\n \"isTTY\" in stream &&\n (stream as { isTTY?: boolean }).isTTY === true\n );\n}\n\nfunction okMark(color: boolean): string {\n return color ? `${GREEN}✓${RESET}` : \"✓\";\n}\n\nfunction failMark(color: boolean): string {\n return color ? `${RED}✗${RESET}` : \"✗\";\n}\n\nfunction okStatus(color: boolean): string {\n return color ? `${GREEN}ok${RESET}` : \"ok\";\n}\n\nfunction failStatus(color: boolean): string {\n return color ? `${RED}FAIL${RESET}` : \"FAIL\";\n}\n\nfunction passLabel(color: boolean): string {\n return color ? `${GREEN}PASS${RESET}` : \"PASS\";\n}\n\nfunction failLabel(color: boolean): string {\n return color ? `${RED}FAIL${RESET}` : \"FAIL\";\n}\n\nexport function createRunProgressHandler(\n options: RunProgressOptions,\n): ProgressCallback {\n const stream = options.stream ?? process.stderr;\n const mode = options.mode;\n const color = options.color ?? false;\n\n let totalReps = 0;\n let completed = 0;\n let totalDurationMs = 0;\n\n return (event) => {\n switch (event.kind) {\n case \"suite-start\":\n totalReps = event.totalReps;\n completed = 0;\n totalDurationMs = 0;\n if (mode === \"quiet\") return;\n if (mode === \"json\") {\n writeJson(stream, {\n kind: \"suite-start\",\n totalReps: event.totalReps,\n maxConcurrent: options.maxConcurrent,\n });\n return;\n }\n const concurrent =\n options.maxConcurrent !== undefined\n ? ` (max-concurrent ${options.maxConcurrent})`\n : \"\";\n stream.write(`Running ${totalReps} repetitions${concurrent}...\\n\\n`);\n break;\n\n case \"rep-complete\":\n completed++;\n totalDurationMs += event.durationMs;\n if (mode === \"quiet\") {\n stream.write(event.ok ? (color ? `${GREEN}.${RESET}` : \".\") : (color ? `${RED}x${RESET}` : \"x\"));\n return;\n }\n if (mode === \"json\") {\n writeJson(stream, {\n kind: \"rep-complete\",\n index: completed,\n total: totalReps,\n caseId: event.caseId,\n cellLabel: event.cellLabel,\n repIndex: event.repIndex,\n ok: event.ok,\n durationMs: event.durationMs,\n toolCallCount: event.toolCallCount,\n errorMessage: event.errorMessage,\n });\n return;\n }\n\n const eta = formatEta(totalDurationMs, completed, totalReps);\n const icon = event.ok ? okMark(color) : failMark(color);\n const status = event.ok ? okStatus(color) : failStatus(color);\n let line = `${icon} [${completed}/${totalReps}] ${event.caseId} @ ${event.cellLabel} #${event.repIndex} ${status} ${formatDuration(event.durationMs)}`;\n if (eta) {\n line += color\n ? ` ${DIM}(${eta})${RESET}`\n : ` (${eta})`;\n }\n if (!event.ok && event.errorMessage) {\n line += color\n ? ` ${YELLOW}— ${truncate(event.errorMessage, 80)}${RESET}`\n : ` — ${truncate(event.errorMessage, 80)}`;\n }\n if (mode === \"verbose\") {\n if (event.toolCallCount !== undefined) {\n line += ` tools=${event.toolCallCount}`;\n }\n const summary = formatAssertionSummary(event.assertionResults, color);\n if (summary) line += ` ${summary}`;\n }\n stream.write(`${line}\\n`);\n break;\n\n case \"cell-complete\":\n if (mode === \"quiet\") return;\n if (mode === \"json\") {\n writeJson(stream, {\n kind: \"cell-complete\",\n caseId: event.report.caseId,\n cellLabel: event.report.cell.label,\n passed: event.report.passed,\n adapterErrors: event.report.adapterErrors,\n assertionStats: event.report.assertionStats.map((s) => ({\n description: s.description,\n passRate: s.passRate,\n meetsThreshold: s.meetsThreshold,\n })),\n });\n return;\n }\n stream.write(`${formatCellSummary(event.report, color)}\\n`);\n break;\n\n case \"suite-complete\":\n if (mode === \"quiet\") {\n stream.write(\"\\n\");\n return;\n }\n if (mode === \"json\") {\n writeJson(stream, {\n kind: \"suite-complete\",\n durationMs: event.report.durationMs,\n cellsTotal: event.report.cells.length,\n cellsPassed: event.report.cells.filter((c) => c.passed).length,\n });\n return;\n }\n const okReps = event.report.cells.reduce(\n (n, c) => n + c.repetitions.filter((r) => r.error === null).length,\n 0,\n );\n const totalRun = event.report.cells.reduce(\n (n, c) => n + c.repetitions.length,\n 0,\n );\n const adapterErrors = event.report.cells.reduce(\n (n, c) => n + c.adapterErrors,\n 0,\n );\n let footer = `\\nFinished in ${formatDuration(event.report.durationMs)} (${okReps}/${totalRun} reps ok`;\n if (adapterErrors > 0) {\n footer += `, ${adapterErrors} adapter error(s)`;\n }\n footer += \")\\n\\n\";\n stream.write(footer);\n break;\n\n default:\n break;\n }\n };\n}\n\nexport function createGradeProgressHandler(\n options: GradeProgressOptions,\n): (event: GraderGradeProgressEvent) => void {\n const stream = options.stream ?? process.stderr;\n const mode = options.mode;\n const color = options.color ?? false;\n\n let total = 0;\n let completed = 0;\n let totalDurationMs = 0;\n\n return (event) => {\n switch (event.kind) {\n case \"grade-start\":\n total = event.total;\n completed = 0;\n totalDurationMs = 0;\n if (mode === \"quiet\" || total === 0) return;\n if (mode === \"json\") {\n writeJson(stream, {\n kind: \"grade-start\",\n total: event.total,\n maxConcurrent: options.maxConcurrent,\n });\n return;\n }\n const concurrent =\n options.maxConcurrent !== undefined\n ? ` (max-concurrent ${options.maxConcurrent})`\n : \"\";\n stream.write(\n `Grading ${total} repetition(s)${concurrent}...\\n\\n`,\n );\n break;\n\n case \"grade-complete\":\n completed++;\n totalDurationMs += event.durationMs;\n if (mode === \"quiet\") {\n const allPassed = event.failed === 0 && !event.graderError;\n stream.write(\n allPassed\n ? color ? `${GREEN}.${RESET}` : \".\"\n : color ? `${RED}x${RESET}` : \"x\",\n );\n return;\n }\n if (mode === \"json\") {\n writeJson(stream, {\n kind: \"grade-complete\",\n index: completed,\n total,\n caseId: event.caseId,\n cellLabel: event.cellLabel,\n repetitionIndex: event.repetitionIndex,\n passed: event.passed,\n failed: event.failed,\n durationMs: event.durationMs,\n graderError: event.graderError,\n });\n return;\n }\n\n const eta = formatEta(totalDurationMs, completed, total);\n const ok = event.failed === 0 && !event.graderError;\n const icon = ok ? okMark(color) : failMark(color);\n const status = ok ? okStatus(color) : failStatus(color);\n let line = `${icon} [${completed}/${total}] ${event.caseId} @ ${event.cellLabel} #${event.repetitionIndex} ${status} ${formatDuration(event.durationMs)}`;\n line += ` expectations ${event.passed}/${event.passed + event.failed}`;\n if (eta) {\n line += color ? ` ${DIM}(${eta})${RESET}` : ` (${eta})`;\n }\n if (event.graderError) {\n line += color\n ? ` ${YELLOW}— ${truncate(event.graderError, 80)}${RESET}`\n : ` — ${truncate(event.graderError, 80)}`;\n }\n if (mode === \"verbose\" && event.failed && event.failed > 0) {\n line += color ? ` ${YELLOW}see grading output${RESET}` : \" see grading output\";\n }\n stream.write(`${line}\\n`);\n break;\n\n case \"grade-done\":\n if (mode === \"quiet\") {\n stream.write(\"\\n\");\n return;\n }\n if (mode === \"json\") {\n writeJson(stream, {\n kind: \"grade-done\",\n durationMs: event.durationMs,\n totalExpectations: event.totalExpectations,\n passedExpectations: event.passedExpectations,\n });\n return;\n }\n if (total === 0) return;\n stream.write(\n `\\nGraded in ${formatDuration(event.durationMs)} (${event.passedExpectations}/${event.totalExpectations} expectations passed)\\n\\n`,\n );\n break;\n\n default:\n break;\n }\n };\n}\n\nfunction writeJson(stream: Writable, value: unknown): void {\n stream.write(`${JSON.stringify(value)}\\n`);\n}\n\nexport function formatDuration(ms: number): string {\n if (ms < 1000) return `${ms}ms`;\n const sec = ms / 1000;\n if (sec < 60) return `${sec.toFixed(1)}s`;\n const min = Math.floor(sec / 60);\n const remSec = Math.round(sec % 60);\n if (min < 60) return `${min}m ${remSec}s`;\n const hr = Math.floor(min / 60);\n const remMin = min % 60;\n return `${hr}h ${remMin}m`;\n}\n\nfunction formatEta(\n totalDurationMs: number,\n completed: number,\n total: number,\n): string | undefined {\n if (completed === 0 || completed >= total) return undefined;\n const avg = totalDurationMs / completed;\n const remaining = (total - completed) * avg;\n return `~${formatDuration(Math.round(remaining))} remaining`;\n}\n\nfunction truncate(text: string, max: number): string {\n if (text.length <= max) return text;\n return `${text.slice(0, max - 1)}…`;\n}\n\nfunction formatAssertionSummary(\n results?: AssertionResult[],\n color = false,\n): string {\n if (!results || results.length === 0) return \"\";\n return results\n .map((r) =>\n `${r.passed ? okMark(color) : failMark(color)} ${r.description}`,\n )\n .join(\", \");\n}\n\nexport function formatCellSummary(cell: CellReport, color: boolean): string {\n const mark = cell.passed ? okMark(color) : failMark(color);\n const status = cell.passed ? passLabel(color) : failLabel(color);\n const parts = cell.assertionStats.map((s) => {\n const pct = (s.passRate * 100).toFixed(0);\n return `${s.description} ${s.passedCount}/${s.evaluatedCount} (${pct}%)`;\n });\n const crash =\n cell.adapterErrors > 0\n ? color\n ? ` ${YELLOW}[${cell.adapterErrors} adapter errors]${RESET}`\n : ` [${cell.adapterErrors} adapter errors]`\n : \"\";\n const stats = parts.length > 0 ? ` ${parts.join(\" · \")}` : \"\";\n return `${mark} ${cell.caseId} @ ${cell.cell.label} ${status}${crash}${stats}`;\n}\n","/**\n * `harness-eval grade` — LLM outcome grading on a suite report.\n */\n\nimport { writeFile } from \"node:fs/promises\";\n\nimport { loadGradingConfig } from \"../../config/grading-loader\";\nimport {\n formatGradingConsole,\n gradeReport,\n gradingReportPassed,\n loadSuiteReport,\n resolveGradeOptions,\n} from \"../../grader/index\";\nimport { getOption, getOptionInt, type ParsedArgs } from \"../args\";\nimport {\n createGradeProgressHandler,\n resolveProgressColor,\n resolveProgressMode,\n} from \"../progress\";\n\nfunction optionalOptionInt(\n options: Record<string, string | boolean>,\n name: string,\n): number | undefined {\n const raw = getOption(options, name);\n if (raw === undefined) return undefined;\n const n = Number.parseInt(raw, 10);\n return Number.isFinite(n) ? n : undefined;\n}\n\nexport async function gradeCommand(args: ParsedArgs): Promise<number> {\n const reportPath = args.positional[0];\n if (!reportPath) {\n console.error(\n \"usage: harness-eval grade <report.json> [--config grading.yaml] [--expectations path] [--output path] [--model id] [--timeout-ms N] [--max-concurrent N]\",\n );\n return 2;\n }\n\n const configPath = getOption(args.options, \"config\");\n const expectationsPath = getOption(args.options, \"expectations\");\n const outputPath = getOption(args.options, \"output\");\n const model = getOption(args.options, \"model\");\n const binary = getOption(args.options, \"binary\");\n const timeoutMs = optionalOptionInt(args.options, \"timeout-ms\");\n const maxConcurrentRaw = getOption(args.options, \"max-concurrent\");\n const maxConcurrent = maxConcurrentRaw\n ? getOptionInt(args.options, \"max-concurrent\", 2)\n : undefined;\n const format = getOption(args.options, \"format\") ?? \"console\";\n const progressMode = resolveProgressMode(args.options);\n const useProgressColor =\n progressMode !== \"json\" && resolveProgressColor(args.options);\n\n let fileConfig;\n if (configPath) {\n try {\n fileConfig = await loadGradingConfig(configPath);\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err));\n return 2;\n }\n }\n\n let report;\n try {\n report = await loadSuiteReport(reportPath);\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err));\n return 2;\n }\n\n let gradeOptions;\n try {\n gradeOptions = resolveGradeOptions(\n fileConfig,\n {\n sourceReport: reportPath,\n expectationsPath,\n model,\n binary,\n timeoutMs,\n maxConcurrent,\n },\n configPath,\n );\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err));\n return 2;\n }\n\n const onProgress = createGradeProgressHandler({\n mode: progressMode,\n maxConcurrent: gradeOptions.maxConcurrent ?? 2,\n color: useProgressColor,\n });\n\n const grading = await gradeReport(report, {\n ...gradeOptions,\n onProgress,\n });\n\n if (outputPath) {\n await writeFile(outputPath, JSON.stringify(grading, null, 2), \"utf8\");\n }\n\n if (format === \"json\") {\n process.stdout.write(JSON.stringify(grading, null, 2));\n process.stdout.write(\"\\n\");\n } else {\n const formatted = formatGradingConsole(grading, format === \"console\");\n process.stdout.write(formatted);\n if (!formatted.endsWith(\"\\n\")) process.stdout.write(\"\\n\");\n }\n\n if (grading.results.length === 0) {\n return 2;\n }\n\n return gradingReportPassed(grading) ? 0 : 1;\n}\n","/**\n * Write OTLP JSON artifacts from a suite report.\n */\n\nimport { mkdir, writeFile } from \"node:fs/promises\";\nimport { join } from \"node:path\";\n\nimport { trajectoryToOtlp } from \"../../otel/emitter\";\nimport type { SuiteReport, TestSuite } from \"../../runner/types\";\n\nfunction safeFilePart(value: string): string {\n return value.replace(/[^a-zA-Z0-9._-]+/g, \"_\");\n}\n\n/**\n * Write one OTLP JSON file per successful repetition.\n *\n * Files: `{caseId}__{cellLabel}__rep{N}.otlp.json`\n */\nexport async function writeOtelArtifacts(\n suite: TestSuite,\n report: SuiteReport,\n outputDir: string,\n): Promise<number> {\n await mkdir(outputDir, { recursive: true });\n\n let written = 0;\n for (const cellReport of report.cells) {\n const testCase = suite.cases.find((c) => c.id === cellReport.caseId);\n if (!testCase) continue;\n\n for (const rep of cellReport.repetitions) {\n if (!rep.adapterResult) continue;\n\n const otlp = trajectoryToOtlp(rep.adapterResult.view, {\n prompt: testCase.prompt,\n });\n\n const filename = `${safeFilePart(cellReport.caseId)}__${safeFilePart(\n cellReport.cell.label,\n )}__rep${rep.repetitionIndex}.otlp.json`;\n\n await writeFile(\n join(outputDir, filename),\n JSON.stringify(otlp, null, 2),\n \"utf8\",\n );\n written++;\n }\n }\n\n return written;\n}\n","/**\n * `harness-eval run` command.\n */\n\nimport { writeFile } from \"node:fs/promises\";\n\nimport { getAdapter } from \"../../adapters/registry\";\nimport { loadSuite } from \"../../config/loader\";\nimport { formatReport } from \"../../reporter/index\";\nimport { runSuite } from \"../../runner/suite\";\nimport type { SuiteReport } from \"../../runner/types\";\nimport { getOption, getOptionInt, type ParsedArgs } from \"../args\";\nimport {\n createRunProgressHandler,\n resolveProgressColor,\n resolveProgressMode,\n} from \"../progress\";\nimport { writeOtelArtifacts } from \"./otel-output\";\n\nexport async function runCommand(args: ParsedArgs): Promise<number> {\n const suitePath = args.positional[0];\n if (!suitePath) {\n console.error(\"usage: harness-eval run <suite.yaml> [options]\");\n return 2;\n }\n\n const format = getOption(args.options, \"format\") ?? \"console\";\n const outputPath = getOption(args.options, \"output\");\n const otelOutputDir = getOption(args.options, \"otel-output\");\n const baselinePath = getOption(args.options, \"baseline\");\n const maxConcurrent = getOptionInt(args.options, \"max-concurrent\", 4);\n const adapterId = getOption(args.options, \"adapter\");\n const progressMode = resolveProgressMode(args.options);\n const useProgressColor =\n progressMode !== \"json\" && resolveProgressColor(args.options);\n\n let suite;\n try {\n suite = await loadSuite(suitePath);\n } catch (err) {\n console.error(err instanceof Error ? err.message : String(err));\n return 2;\n }\n\n const adapter = getAdapter(adapterId ?? suite.adapter ?? \"claude-code\");\n\n const onProgress = createRunProgressHandler({\n mode: progressMode,\n maxConcurrent,\n color: useProgressColor,\n });\n\n const report = await runSuite(suite, {\n adapter,\n maxConcurrent,\n onProgress,\n });\n\n if (outputPath) {\n await writeFile(outputPath, JSON.stringify(report, null, 2), \"utf8\");\n }\n\n if (otelOutputDir) {\n const count = await writeOtelArtifacts(suite, report, otelOutputDir);\n process.stderr.write(`otel: wrote ${count} trace file(s) to ${otelOutputDir}\\n`);\n }\n\n let baseline: SuiteReport | undefined;\n if (baselinePath) {\n const { readFile } = await import(\"node:fs/promises\");\n baseline = JSON.parse(await readFile(baselinePath, \"utf8\")) as SuiteReport;\n }\n\n const formatted = formatReport(report, {\n format:\n format === \"markdown\" || format === \"json\" ? format : \"console\",\n baseline,\n color: format === \"console\",\n });\n\n process.stdout.write(formatted);\n if (!formatted.endsWith(\"\\n\")) process.stdout.write(\"\\n\");\n\n return report.cells.every((c) => c.passed) ? 0 : 1;\n}\n","/**\n * CLI entry point.\n */\n\nimport { envelopeCommand } from \"./commands/envelope\";\nimport { formatCommand } from \"./commands/format\";\nimport { gradeCommand } from \"./commands/grade\";\nimport { runCommand } from \"./commands/run\";\nimport { parseArgs } from \"./args\";\n\nconst USAGE = `harness-eval — harness-level eval framework\n\nUsage:\n harness-eval run <suite.yaml> [--max-concurrent N] [--baseline path] [--output path] [--otel-output dir] [--format console|markdown|json] [--adapter id] [--quiet] [--verbose] [--progress default|quiet|verbose|json]\n harness-eval grade <report.json> [--config grading.yaml] [--expectations path] [--output path] [--model id] [--timeout-ms N] [--max-concurrent N] [--format console|json] [--quiet] [--verbose] [--progress default|quiet|verbose|json]\n harness-eval envelope <report.json> [--output path] [--grading path] [--suite path] [--projection envelope|trajectory|instances|agent-trace] [--include-raw-stream-events] [--no-transcript]\n harness-eval format <report.json> [--format console|markdown|json] [--baseline path]\n harness-eval --help\n\n Progress (run & grade):\n default one line per repetition + per-cell summary (default)\n --quiet colored dots (. = ok, x = fail)\n --verbose per-rep details (tool counts, assertion summary)\n --progress json newline-delimited JSON events on stderr\n --no-color disable ANSI colors on progress output\n --color force ANSI colors on progress output\n`;\n\nexport async function main(argv: string[]): Promise<number> {\n const parsed = parseArgs(argv);\n\n if (parsed.options.help || parsed.command === \"help\" || parsed.options.h) {\n process.stdout.write(USAGE);\n return 0;\n }\n\n switch (parsed.command) {\n case \"run\":\n return await runCommand(parsed);\n case \"grade\":\n return await gradeCommand(parsed);\n case \"envelope\":\n return await envelopeCommand(parsed);\n case \"format\":\n return await formatCommand(parsed);\n case undefined:\n console.error(USAGE);\n return 2;\n default:\n console.error(`unknown command: ${parsed.command}\\n\\n${USAGE}`);\n return 2;\n }\n}\n","#!/usr/bin/env node\n\nimport { main } from \"./main\";\n\nconst code = await main(process.argv.slice(2));\nprocess.exit(code);\n"],"mappings":";;;;;;;;AAUA,SAAgB,UAAU,MAA4B;CACpD,MAAM,aAAuB,CAAC;CAC9B,MAAM,UAA4C,CAAC;CACnD,IAAI;CAEJ,MAAM,OAAO,CAAC,GAAG,IAAI;CACrB,IAAI,KAAK,SAAS,KAAK,CAAC,KAAK,EAAE,CAAC,WAAW,GAAG,GAC5C,UAAU,KAAK,MAAM;CAGvB,KAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;EACpC,MAAM,MAAM,KAAK;EACjB,IAAI,QAAQ,MAAM;GAChB,WAAW,KAAK,GAAG,KAAK,MAAM,IAAI,CAAC,CAAC;GACpC;EACF;EACA,IAAI,IAAI,WAAW,IAAI,GAAG;GACxB,MAAM,MAAM,IAAI,MAAM,CAAC;GACvB,MAAM,OAAO,KAAK,IAAI;GACtB,IAAI,QAAQ,CAAC,KAAK,WAAW,GAAG,GAAG;IACjC,QAAQ,OAAO;IACf;GACF,OACE,QAAQ,OAAO;EAEnB,OAAO,IAAI,IAAI,WAAW,GAAG,KAAK,IAAI,WAAW,GAAG;GAClD,MAAM,MAAM,IAAI,MAAM,CAAC;GACvB,MAAM,OAAO,KAAK,IAAI;GACtB,IAAI,QAAQ,CAAC,KAAK,WAAW,GAAG,GAAG;IACjC,QAAQ,OAAO;IACf;GACF,OACE,QAAQ,OAAO;EAEnB,OACE,WAAW,KAAK,GAAG;CAEvB;CAEA,OAAO;EAAE;EAAS;EAAY;CAAQ;AACxC;AAEA,SAAgB,UACd,SACA,MACoB;CACpB,MAAM,IAAI,QAAQ;CAClB,OAAO,OAAO,MAAM,WAAW,IAAI,KAAA;AACrC;AAEA,SAAgB,aACd,SACA,MACA,cACQ;CACR,MAAM,IAAI,UAAU,SAAS,IAAI;CACjC,IAAI,MAAM,KAAA,GAAW,OAAO;CAC5B,MAAM,IAAI,OAAO,SAAS,GAAG,EAAE;CAC/B,IAAI,CAAC,OAAO,SAAS,CAAC,GAAG,OAAO;CAChC,OAAO;AACT;AAEA,SAAgB,UACd,SACA,MACS;CACT,MAAM,IAAI,QAAQ;CAClB,OAAO,MAAM,QAAS,OAAO,MAAM,YAAY,MAAM;AACvD;;;;;;ACvDA,MAAM,8BAAc,IAAI,IAAwB;CAC9C;CACA;CACA;CACA;AACF,CAAC;AAED,SAAgB,wBACd,OACgC;CAChC,IAAI,UAAU,KAAA,GAAW,OAAO;CAChC,IAAI,YAAY,IAAI,KAA2B,GAC7C,OAAO;AAGX;AAEA,SAAgB,4BACd,UACA,YACQ;CACR,QAAQ,YAAR;EACE,KAAK,cACH,OAAO,GAAG,aAAa,QAAQ,CAAC,CAAC,KAAK,QAAQ,KAAK,UAAU,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE;EAChF,KAAK,aACH,OAAO,GAAG,KAAK,UAAU,iBAAiB,QAAQ,GAAG,MAAM,CAAC,EAAE;EAChE,KAAK,eACH,OAAO,GAAG,KAAK,UAAU,aAAa,QAAQ,GAAG,MAAM,CAAC,EAAE;EAE5D,SACE,OAAO,GAAG,KAAK,UAAU,UAAU,MAAM,CAAC,EAAE;CAChD;AACF;AAEA,eAAe,uBAAoD;CACjE,IAAI;EAKF,MAAM,OAAO,MAAM,SAJC,KAClB,QAAQ,cAAc,OAAO,KAAK,GAAG,CAAC,GACtC,uBAEoC,GAAG,MAAM;EAE/C,OADY,KAAK,MAAM,IACd,CAAC,CAAC;CACb,QAAQ;EACN;CACF;AACF;AAEA,eAAsB,gBAAgB,MAAmC;CACvE,MAAM,aAAa,KAAK,WAAW;CACnC,IAAI,CAAC,YAAY;EACf,QAAQ,MACN,qMACF;EACA,OAAO;CACT;CAEA,MAAM,aAAa,UAAU,KAAK,SAAS,QAAQ;CACnD,MAAM,cAAc,UAAU,KAAK,SAAS,SAAS;CACrD,MAAM,YAAY,UAAU,KAAK,SAAS,OAAO;CACjD,MAAM,aAAa,wBACjB,UAAU,KAAK,SAAS,YAAY,CACtC;CAEA,IAAI,CAAC,YAAY;EACf,QAAQ,MACN,gFACF;EACA,OAAO;CACT;CAEA,IAAI;CACJ,IAAI;EACF,MAAM,mBAAmB,MAAM,qBAAqB;EACpD,WAAW,MAAM,8BAA8B,YAAY;GACzD;GACA;GACA,mBAAmB,CAAC,UAAU,KAAK,SAAS,eAAe;GAC3D,wBAAwB,UAAU,KAAK,SAAS,2BAA2B;GAC3E,SAAS,EAAE,iBAAiB;EAC9B,CAAC;CACH,SAAS,KAAK;EACZ,QAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;EAC9D,OAAO;CACT;CAEA,MAAM,aAAa,4BAA4B,UAAU,UAAU;CAEnE,IAAI,YACF,MAAM,UAAU,YAAY,YAAY,MAAM;MAE9C,QAAQ,OAAO,MAAM,UAAU;CAGjC,OAAO,SAAS,QAAQ,iBAAiB,IAAI;AAC/C;;;;;;AC5GA,eAAsB,cAAc,MAAmC;CACrE,MAAM,aAAa,KAAK,WAAW;CACnC,IAAI,CAAC,YAAY;EACf,QAAQ,MAAM,oDAAoD;EAClE,OAAO;CACT;CAEA,MAAM,SAAS,UAAU,KAAK,SAAS,QAAQ,KAAK;CACpD,MAAM,eAAe,UAAU,KAAK,SAAS,UAAU;CAEvD,IAAI;CACJ,IAAI;EACF,SAAS,KAAK,MAAM,MAAM,SAAS,YAAY,MAAM,CAAC;CACxD,SAAS,KAAK;EACZ,QAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;EAC9D,OAAO;CACT;CAEA,IAAI;CACJ,IAAI,cACF,WAAW,KAAK,MAAM,MAAM,SAAS,cAAc,MAAM,CAAC;CAG5D,MAAM,YAAY,aAAa,QAAQ;EACrC,QACE,WAAW,cAAc,WAAW,SAAS,SAAS;EACxD;EACA,OAAO,WAAW;CACpB,CAAC;CAED,QAAQ,OAAO,MAAM,SAAS;CAC9B,IAAI,CAAC,UAAU,SAAS,IAAI,GAAG,QAAQ,OAAO,MAAM,IAAI;CACxD,OAAO,OAAO,MAAM,OAAO,MAAM,EAAE,MAAM,IAAI,IAAI;AACnD;;;AC9BA,MAAM,QAAQ;AACd,MAAM,MAAM;AACZ,MAAM,SAAS;AACf,MAAM,MAAM;AACZ,MAAM,QAAQ;AAgBd,SAAgB,oBACd,SACc;CACd,MAAM,WAAW,UAAU,SAAS,UAAU;CAC9C,IACE,aAAa,UACb,aAAa,WACb,aAAa,aACb,aAAa,WAEb,OAAO;CAET,IAAI,UAAU,SAAS,OAAO,GAAG,OAAO;CACxC,IAAI,UAAU,SAAS,SAAS,GAAG,OAAO;CAC1C,OAAO;AACT;;AAGA,SAAgB,qBACd,SACA,SAAmB,QAAQ,QAClB;CACT,IAAI,UAAU,SAAS,UAAU,GAAG,OAAO;CAC3C,IAAI,UAAU,SAAS,OAAO,GAAG,OAAO;CACxC,IAAI,QAAQ,IAAI,aAAa,KAAA,KAAa,QAAQ,IAAI,aAAa,IACjE,OAAO;CAET,IAAI,QAAQ,IAAI,gBAAgB,KAAA,KAAa,QAAQ,IAAI,gBAAgB,KACvE,OAAO;CAET,OACE,WAAW,UACV,OAA+B,UAAU;AAE9C;AAEA,SAAS,OAAO,OAAwB;CACtC,OAAO,QAAQ,GAAG,MAAM,GAAG,UAAU;AACvC;AAEA,SAAS,SAAS,OAAwB;CACxC,OAAO,QAAQ,GAAG,IAAI,GAAG,UAAU;AACrC;AAEA,SAAS,SAAS,OAAwB;CACxC,OAAO,QAAQ,GAAG,MAAM,IAAI,UAAU;AACxC;AAEA,SAAS,WAAW,OAAwB;CAC1C,OAAO,QAAQ,GAAG,IAAI,MAAM,UAAU;AACxC;AAEA,SAAS,UAAU,OAAwB;CACzC,OAAO,QAAQ,GAAG,MAAM,MAAM,UAAU;AAC1C;AAEA,SAAS,UAAU,OAAwB;CACzC,OAAO,QAAQ,GAAG,IAAI,MAAM,UAAU;AACxC;AAEA,SAAgB,yBACd,SACkB;CAClB,MAAM,SAAS,QAAQ,UAAU,QAAQ;CACzC,MAAM,OAAO,QAAQ;CACrB,MAAM,QAAQ,QAAQ,SAAS;CAE/B,IAAI,YAAY;CAChB,IAAI,YAAY;CAChB,IAAI,kBAAkB;CAEtB,QAAQ,UAAU;EAChB,QAAQ,MAAM,MAAd;GACE,KAAK;IACH,YAAY,MAAM;IAClB,YAAY;IACZ,kBAAkB;IAClB,IAAI,SAAS,SAAS;IACtB,IAAI,SAAS,QAAQ;KACnB,UAAU,QAAQ;MAChB,MAAM;MACN,WAAW,MAAM;MACjB,eAAe,QAAQ;KACzB,CAAC;KACD;IACF;IACA,MAAM,aACJ,QAAQ,kBAAkB,KAAA,IACtB,oBAAoB,QAAQ,cAAc,KAC1C;IACN,OAAO,MAAM,WAAW,UAAU,cAAc,WAAW,QAAQ;IACnE;GAEF,KAAK;IACH;IACA,mBAAmB,MAAM;IACzB,IAAI,SAAS,SAAS;KACpB,OAAO,MAAM,MAAM,KAAM,QAAQ,GAAG,MAAM,GAAG,UAAU,MAAQ,QAAQ,GAAG,IAAI,GAAG,UAAU,GAAI;KAC/F;IACF;IACA,IAAI,SAAS,QAAQ;KACnB,UAAU,QAAQ;MAChB,MAAM;MACN,OAAO;MACP,OAAO;MACP,QAAQ,MAAM;MACd,WAAW,MAAM;MACjB,UAAU,MAAM;MAChB,IAAI,MAAM;MACV,YAAY,MAAM;MAClB,eAAe,MAAM;MACrB,cAAc,MAAM;KACtB,CAAC;KACD;IACF;IAEA,MAAM,MAAM,UAAU,iBAAiB,WAAW,SAAS;IAC3D,MAAM,OAAO,MAAM,KAAK,OAAO,KAAK,IAAI,SAAS,KAAK;IACtD,MAAM,SAAS,MAAM,KAAK,SAAS,KAAK,IAAI,WAAW,KAAK;IAC5D,IAAI,OAAO,GAAG,KAAK,IAAI,UAAU,GAAG,UAAU,IAAI,MAAM,OAAO,KAAK,MAAM,UAAU,IAAI,MAAM,SAAS,IAAI,OAAO,IAAI,eAAe,MAAM,UAAU;IACrJ,IAAI,KACF,QAAQ,QACJ,KAAK,IAAI,GAAG,IAAI,GAAG,UACnB,MAAM,IAAI;IAEhB,IAAI,CAAC,MAAM,MAAM,MAAM,cACrB,QAAQ,QACJ,KAAK,OAAO,IAAI,SAAS,MAAM,cAAc,EAAE,IAAI,UACnD,OAAO,SAAS,MAAM,cAAc,EAAE;IAE5C,IAAI,SAAS,WAAW;KACtB,IAAI,MAAM,kBAAkB,KAAA,GAC1B,QAAQ,WAAW,MAAM;KAE3B,MAAM,UAAU,uBAAuB,MAAM,kBAAkB,KAAK;KACpE,IAAI,SAAS,QAAQ,KAAK;IAC5B;IACA,OAAO,MAAM,GAAG,KAAK,GAAG;IACxB;GAEF,KAAK;IACH,IAAI,SAAS,SAAS;IACtB,IAAI,SAAS,QAAQ;KACnB,UAAU,QAAQ;MAChB,MAAM;MACN,QAAQ,MAAM,OAAO;MACrB,WAAW,MAAM,OAAO,KAAK;MAC7B,QAAQ,MAAM,OAAO;MACrB,eAAe,MAAM,OAAO;MAC5B,gBAAgB,MAAM,OAAO,eAAe,KAAK,OAAO;OACtD,aAAa,EAAE;OACf,UAAU,EAAE;OACZ,gBAAgB,EAAE;MACpB,EAAE;KACJ,CAAC;KACD;IACF;IACA,OAAO,MAAM,GAAG,kBAAkB,MAAM,QAAQ,KAAK,EAAE,GAAG;IAC1D;GAEF,KAAK;IACH,IAAI,SAAS,SAAS;KACpB,OAAO,MAAM,IAAI;KACjB;IACF;IACA,IAAI,SAAS,QAAQ;KACnB,UAAU,QAAQ;MAChB,MAAM;MACN,YAAY,MAAM,OAAO;MACzB,YAAY,MAAM,OAAO,MAAM;MAC/B,aAAa,MAAM,OAAO,MAAM,QAAQ,MAAM,EAAE,MAAM,CAAC,CAAC;KAC1D,CAAC;KACD;IACF;IACA,MAAM,SAAS,MAAM,OAAO,MAAM,QAC/B,GAAG,MAAM,IAAI,EAAE,YAAY,QAAQ,MAAM,EAAE,UAAU,IAAI,CAAC,CAAC,QAC5D,CACF;IACA,MAAM,WAAW,MAAM,OAAO,MAAM,QACjC,GAAG,MAAM,IAAI,EAAE,YAAY,QAC5B,CACF;IACA,MAAM,gBAAgB,MAAM,OAAO,MAAM,QACtC,GAAG,MAAM,IAAI,EAAE,eAChB,CACF;IACA,IAAI,SAAS,iBAAiB,eAAe,MAAM,OAAO,UAAU,EAAE,IAAI,OAAO,GAAG,SAAS;IAC7F,IAAI,gBAAgB,GAClB,UAAU,KAAK,cAAc;IAE/B,UAAU;IACV,OAAO,MAAM,MAAM;IACnB;GAEF,SACE;EACJ;CACF;AACF;AAEA,SAAgB,2BACd,SAC2C;CAC3C,MAAM,SAAS,QAAQ,UAAU,QAAQ;CACzC,MAAM,OAAO,QAAQ;CACrB,MAAM,QAAQ,QAAQ,SAAS;CAE/B,IAAI,QAAQ;CACZ,IAAI,YAAY;CAChB,IAAI,kBAAkB;CAEtB,QAAQ,UAAU;EAChB,QAAQ,MAAM,MAAd;GACE,KAAK;IACH,QAAQ,MAAM;IACd,YAAY;IACZ,kBAAkB;IAClB,IAAI,SAAS,WAAW,UAAU,GAAG;IACrC,IAAI,SAAS,QAAQ;KACnB,UAAU,QAAQ;MAChB,MAAM;MACN,OAAO,MAAM;MACb,eAAe,QAAQ;KACzB,CAAC;KACD;IACF;IACA,MAAM,aACJ,QAAQ,kBAAkB,KAAA,IACtB,oBAAoB,QAAQ,cAAc,KAC1C;IACN,OAAO,MACL,WAAW,MAAM,gBAAgB,WAAW,QAC9C;IACA;GAEF,KAAK;IACH;IACA,mBAAmB,MAAM;IACzB,IAAI,SAAS,SAAS;KACpB,MAAM,YAAY,MAAM,WAAW,KAAK,CAAC,MAAM;KAC/C,OAAO,MACL,YACI,QAAQ,GAAG,MAAM,GAAG,UAAU,MAC9B,QAAQ,GAAG,IAAI,GAAG,UAAU,GAClC;KACA;IACF;IACA,IAAI,SAAS,QAAQ;KACnB,UAAU,QAAQ;MAChB,MAAM;MACN,OAAO;MACP;MACA,QAAQ,MAAM;MACd,WAAW,MAAM;MACjB,iBAAiB,MAAM;MACvB,QAAQ,MAAM;MACd,QAAQ,MAAM;MACd,YAAY,MAAM;MAClB,aAAa,MAAM;KACrB,CAAC;KACD;IACF;IAEA,MAAM,MAAM,UAAU,iBAAiB,WAAW,KAAK;IACvD,MAAM,KAAK,MAAM,WAAW,KAAK,CAAC,MAAM;IACxC,MAAM,OAAO,KAAK,OAAO,KAAK,IAAI,SAAS,KAAK;IAChD,MAAM,SAAS,KAAK,SAAS,KAAK,IAAI,WAAW,KAAK;IACtD,IAAI,OAAO,GAAG,KAAK,IAAI,UAAU,GAAG,MAAM,IAAI,MAAM,OAAO,KAAK,MAAM,UAAU,IAAI,MAAM,gBAAgB,IAAI,OAAO,IAAI,eAAe,MAAM,UAAU;IACxJ,QAAQ,kBAAkB,MAAM,OAAO,GAAG,MAAM,SAAS,MAAM;IAC/D,IAAI,KACF,QAAQ,QAAQ,KAAK,IAAI,GAAG,IAAI,GAAG,UAAU,MAAM,IAAI;IAEzD,IAAI,MAAM,aACR,QAAQ,QACJ,KAAK,OAAO,IAAI,SAAS,MAAM,aAAa,EAAE,IAAI,UAClD,OAAO,SAAS,MAAM,aAAa,EAAE;IAE3C,IAAI,SAAS,aAAa,MAAM,UAAU,MAAM,SAAS,GACvD,QAAQ,QAAQ,KAAK,OAAO,oBAAoB,UAAU;IAE5D,OAAO,MAAM,GAAG,KAAK,GAAG;IACxB;GAEF,KAAK;IACH,IAAI,SAAS,SAAS;KACpB,OAAO,MAAM,IAAI;KACjB;IACF;IACA,IAAI,SAAS,QAAQ;KACnB,UAAU,QAAQ;MAChB,MAAM;MACN,YAAY,MAAM;MAClB,mBAAmB,MAAM;MACzB,oBAAoB,MAAM;KAC5B,CAAC;KACD;IACF;IACA,IAAI,UAAU,GAAG;IACjB,OAAO,MACL,eAAe,eAAe,MAAM,UAAU,EAAE,IAAI,MAAM,mBAAmB,GAAG,MAAM,kBAAkB,0BAC1G;IACA;GAEF,SACE;EACJ;CACF;AACF;AAEA,SAAS,UAAU,QAAkB,OAAsB;CACzD,OAAO,MAAM,GAAG,KAAK,UAAU,KAAK,EAAE,GAAG;AAC3C;AAEA,SAAgB,eAAe,IAAoB;CACjD,IAAI,KAAK,KAAM,OAAO,GAAG,GAAG;CAC5B,MAAM,MAAM,KAAK;CACjB,IAAI,MAAM,IAAI,OAAO,GAAG,IAAI,QAAQ,CAAC,EAAE;CACvC,MAAM,MAAM,KAAK,MAAM,MAAM,EAAE;CAC/B,MAAM,SAAS,KAAK,MAAM,MAAM,EAAE;CAClC,IAAI,MAAM,IAAI,OAAO,GAAG,IAAI,IAAI,OAAO;CAGvC,OAAO,GAFI,KAAK,MAAM,MAAM,EAEjB,EAAE,IADE,MAAM,GACG;AAC1B;AAEA,SAAS,UACP,iBACA,WACA,OACoB;CACpB,IAAI,cAAc,KAAK,aAAa,OAAO,OAAO,KAAA;CAClD,MAAM,MAAM,kBAAkB;CAC9B,MAAM,aAAa,QAAQ,aAAa;CACxC,OAAO,IAAI,eAAe,KAAK,MAAM,SAAS,CAAC,EAAE;AACnD;AAEA,SAAS,SAAS,MAAc,KAAqB;CACnD,IAAI,KAAK,UAAU,KAAK,OAAO;CAC/B,OAAO,GAAG,KAAK,MAAM,GAAG,MAAM,CAAC,EAAE;AACnC;AAEA,SAAS,uBACP,SACA,QAAQ,OACA;CACR,IAAI,CAAC,WAAW,QAAQ,WAAW,GAAG,OAAO;CAC7C,OAAO,QACJ,KAAK,MACJ,GAAG,EAAE,SAAS,OAAO,KAAK,IAAI,SAAS,KAAK,EAAE,GAAG,EAAE,aACrD,CAAC,CACA,KAAK,IAAI;AACd;AAEA,SAAgB,kBAAkB,MAAkB,OAAwB;CAC1E,MAAM,OAAO,KAAK,SAAS,OAAO,KAAK,IAAI,SAAS,KAAK;CACzD,MAAM,SAAS,KAAK,SAAS,UAAU,KAAK,IAAI,UAAU,KAAK;CAC/D,MAAM,QAAQ,KAAK,eAAe,KAAK,MAAM;EAC3C,MAAM,OAAO,EAAE,WAAW,IAAA,CAAK,QAAQ,CAAC;EACxC,OAAO,GAAG,EAAE,YAAY,GAAG,EAAE,YAAY,GAAG,EAAE,eAAe,IAAI,IAAI;CACvE,CAAC;CACD,MAAM,QACJ,KAAK,gBAAgB,IACjB,QACE,IAAI,OAAO,GAAG,KAAK,cAAc,kBAAkB,UACnD,KAAK,KAAK,cAAc,oBAC1B;CACN,MAAM,QAAQ,MAAM,SAAS,IAAI,KAAK,MAAM,KAAK,KAAK,MAAM;CAC5D,OAAO,GAAG,KAAK,GAAG,KAAK,OAAO,KAAK,KAAK,KAAK,MAAM,IAAI,SAAS,QAAQ;AAC1E;;;;;;AC5XA,SAAS,kBACP,SACA,MACoB;CACpB,MAAM,MAAM,UAAU,SAAS,IAAI;CACnC,IAAI,QAAQ,KAAA,GAAW,OAAO,KAAA;CAC9B,MAAM,IAAI,OAAO,SAAS,KAAK,EAAE;CACjC,OAAO,OAAO,SAAS,CAAC,IAAI,IAAI,KAAA;AAClC;AAEA,eAAsB,aAAa,MAAmC;CACpE,MAAM,aAAa,KAAK,WAAW;CACnC,IAAI,CAAC,YAAY;EACf,QAAQ,MACN,0JACF;EACA,OAAO;CACT;CAEA,MAAM,aAAa,UAAU,KAAK,SAAS,QAAQ;CACnD,MAAM,mBAAmB,UAAU,KAAK,SAAS,cAAc;CAC/D,MAAM,aAAa,UAAU,KAAK,SAAS,QAAQ;CACnD,MAAM,QAAQ,UAAU,KAAK,SAAS,OAAO;CAC7C,MAAM,SAAS,UAAU,KAAK,SAAS,QAAQ;CAC/C,MAAM,YAAY,kBAAkB,KAAK,SAAS,YAAY;CAE9D,MAAM,gBADmB,UAAU,KAAK,SAAS,gBACZ,IACjC,aAAa,KAAK,SAAS,kBAAkB,CAAC,IAC9C,KAAA;CACJ,MAAM,SAAS,UAAU,KAAK,SAAS,QAAQ,KAAK;CACpD,MAAM,eAAe,oBAAoB,KAAK,OAAO;CACrD,MAAM,mBACJ,iBAAiB,UAAU,qBAAqB,KAAK,OAAO;CAE9D,IAAI;CACJ,IAAI,YACF,IAAI;EACF,aAAa,MAAM,kBAAkB,UAAU;CACjD,SAAS,KAAK;EACZ,QAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;EAC9D,OAAO;CACT;CAGF,IAAI;CACJ,IAAI;EACF,SAAS,MAAM,gBAAgB,UAAU;CAC3C,SAAS,KAAK;EACZ,QAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;EAC9D,OAAO;CACT;CAEA,IAAI;CACJ,IAAI;EACF,eAAe,oBACb,YACA;GACE,cAAc;GACd;GACA;GACA;GACA;GACA;EACF,GACA,UACF;CACF,SAAS,KAAK;EACZ,QAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;EAC9D,OAAO;CACT;CAEA,MAAM,aAAa,2BAA2B;EAC5C,MAAM;EACN,eAAe,aAAa,iBAAiB;EAC7C,OAAO;CACT,CAAC;CAED,MAAM,UAAU,MAAM,YAAY,QAAQ;EACxC,GAAG;EACH;CACF,CAAC;CAED,IAAI,YACF,MAAM,UAAU,YAAY,KAAK,UAAU,SAAS,MAAM,CAAC,GAAG,MAAM;CAGtE,IAAI,WAAW,QAAQ;EACrB,QAAQ,OAAO,MAAM,KAAK,UAAU,SAAS,MAAM,CAAC,CAAC;EACrD,QAAQ,OAAO,MAAM,IAAI;CAC3B,OAAO;EACL,MAAM,YAAY,qBAAqB,SAAS,WAAW,SAAS;EACpE,QAAQ,OAAO,MAAM,SAAS;EAC9B,IAAI,CAAC,UAAU,SAAS,IAAI,GAAG,QAAQ,OAAO,MAAM,IAAI;CAC1D;CAEA,IAAI,QAAQ,QAAQ,WAAW,GAC7B,OAAO;CAGT,OAAO,oBAAoB,OAAO,IAAI,IAAI;AAC5C;;;;;;AC/GA,SAAS,aAAa,OAAuB;CAC3C,OAAO,MAAM,QAAQ,qBAAqB,GAAG;AAC/C;;;;;;AAOA,eAAsB,mBACpB,OACA,QACA,WACiB;CACjB,MAAM,MAAM,WAAW,EAAE,WAAW,KAAK,CAAC;CAE1C,IAAI,UAAU;CACd,KAAK,MAAM,cAAc,OAAO,OAAO;EACrC,MAAM,WAAW,MAAM,MAAM,MAAM,MAAM,EAAE,OAAO,WAAW,MAAM;EACnE,IAAI,CAAC,UAAU;EAEf,KAAK,MAAM,OAAO,WAAW,aAAa;GACxC,IAAI,CAAC,IAAI,eAAe;GAExB,MAAM,OAAO,iBAAiB,IAAI,cAAc,MAAM,EACpD,QAAQ,SAAS,OACnB,CAAC;GAMD,MAAM,UACJ,KAAK,WAAW,GALE,aAAa,WAAW,MAAM,EAAE,IAAI,aACtD,WAAW,KAAK,KAClB,EAAE,OAAO,IAAI,gBAAgB,WAGH,GACxB,KAAK,UAAU,MAAM,MAAM,CAAC,GAC5B,MACF;GACA;EACF;CACF;CAEA,OAAO;AACT;;;;;;ACjCA,eAAsB,WAAW,MAAmC;CAClE,MAAM,YAAY,KAAK,WAAW;CAClC,IAAI,CAAC,WAAW;EACd,QAAQ,MAAM,gDAAgD;EAC9D,OAAO;CACT;CAEA,MAAM,SAAS,UAAU,KAAK,SAAS,QAAQ,KAAK;CACpD,MAAM,aAAa,UAAU,KAAK,SAAS,QAAQ;CACnD,MAAM,gBAAgB,UAAU,KAAK,SAAS,aAAa;CAC3D,MAAM,eAAe,UAAU,KAAK,SAAS,UAAU;CACvD,MAAM,gBAAgB,aAAa,KAAK,SAAS,kBAAkB,CAAC;CACpE,MAAM,YAAY,UAAU,KAAK,SAAS,SAAS;CACnD,MAAM,eAAe,oBAAoB,KAAK,OAAO;CACrD,MAAM,mBACJ,iBAAiB,UAAU,qBAAqB,KAAK,OAAO;CAE9D,IAAI;CACJ,IAAI;EACF,QAAQ,MAAM,UAAU,SAAS;CACnC,SAAS,KAAK;EACZ,QAAQ,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;EAC9D,OAAO;CACT;CAEA,MAAM,UAAU,WAAW,aAAa,MAAM,WAAW,aAAa;CAEtE,MAAM,aAAa,yBAAyB;EAC1C,MAAM;EACN;EACA,OAAO;CACT,CAAC;CAED,MAAM,SAAS,MAAM,SAAS,OAAO;EACnC;EACA;EACA;CACF,CAAC;CAED,IAAI,YACF,MAAM,UAAU,YAAY,KAAK,UAAU,QAAQ,MAAM,CAAC,GAAG,MAAM;CAGrE,IAAI,eAAe;EACjB,MAAM,QAAQ,MAAM,mBAAmB,OAAO,QAAQ,aAAa;EACnE,QAAQ,OAAO,MAAM,eAAe,MAAM,oBAAoB,cAAc,GAAG;CACjF;CAEA,IAAI;CACJ,IAAI,cAAc;EAChB,MAAM,EAAE,aAAa,MAAM,OAAO;EAClC,WAAW,KAAK,MAAM,MAAM,SAAS,cAAc,MAAM,CAAC;CAC5D;CAEA,MAAM,YAAY,aAAa,QAAQ;EACrC,QACE,WAAW,cAAc,WAAW,SAAS,SAAS;EACxD;EACA,OAAO,WAAW;CACpB,CAAC;CAED,QAAQ,OAAO,MAAM,SAAS;CAC9B,IAAI,CAAC,UAAU,SAAS,IAAI,GAAG,QAAQ,OAAO,MAAM,IAAI;CAExD,OAAO,OAAO,MAAM,OAAO,MAAM,EAAE,MAAM,IAAI,IAAI;AACnD;;;;;;AC1EA,MAAM,QAAQ;;;;;;;;;;;;;;;;;AAkBd,eAAsB,KAAK,MAAiC;CAC1D,MAAM,SAAS,UAAU,IAAI;CAE7B,IAAI,OAAO,QAAQ,QAAQ,OAAO,YAAY,UAAU,OAAO,QAAQ,GAAG;EACxE,QAAQ,OAAO,MAAM,KAAK;EAC1B,OAAO;CACT;CAEA,QAAQ,OAAO,SAAf;EACE,KAAK,OACH,OAAO,MAAM,WAAW,MAAM;EAChC,KAAK,SACH,OAAO,MAAM,aAAa,MAAM;EAClC,KAAK,YACH,OAAO,MAAM,gBAAgB,MAAM;EACrC,KAAK,UACH,OAAO,MAAM,cAAc,MAAM;EACnC,KAAK,KAAA;GACH,QAAQ,MAAM,KAAK;GACnB,OAAO;EACT;GACE,QAAQ,MAAM,oBAAoB,OAAO,QAAQ,MAAM,OAAO;GAC9D,OAAO;CACX;AACF;;;AChDA,MAAM,OAAO,MAAM,KAAK,QAAQ,KAAK,MAAM,CAAC,CAAC;AAC7C,QAAQ,KAAK,IAAI"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
import { a as loadGradingConfig, i as GradingConfig, n as parseCasesFile, o as parseGradingConfig, r as parseSuite, s as ConfigError, t as loadSuite } from "../loader-DTvoVfN0.js";
|
|
2
|
+
export { ConfigError, type GradingConfig, loadGradingConfig, loadSuite, parseCasesFile, parseGradingConfig, parseSuite };
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { a as HarnessAdapter, i as BaseAdapterConfig, n as AdapterError, o as ParseErrorRecord, r as AdapterResult, t as AdapterDiagnostics, x as StreamEvent } from "./types-B9H4IZtA.js";
|
|
2
|
+
|
|
3
|
+
//#region src/adapters/claude-code/types.d.ts
|
|
4
|
+
/** Claude Code permission modes (`--permission-mode`). */
|
|
5
|
+
type PermissionMode = "default" | "acceptEdits" | "plan" | "auto" | "dontAsk" | "bypassPermissions";
|
|
6
|
+
/** Effort levels (`--effort`). Availability depends on model. */
|
|
7
|
+
type EffortLevel = "low" | "medium" | "high" | "xhigh" | "max";
|
|
8
|
+
/** Claude Code-specific options (nested under `claudeCode` in YAML). */
|
|
9
|
+
interface ClaudeCodeOptions {
|
|
10
|
+
binary?: string;
|
|
11
|
+
pluginDirs?: string[];
|
|
12
|
+
pluginUrls?: string[];
|
|
13
|
+
addDirs?: string[];
|
|
14
|
+
mcpConfig?: string;
|
|
15
|
+
strictMcpConfig?: boolean;
|
|
16
|
+
model?: string;
|
|
17
|
+
permissionMode?: PermissionMode;
|
|
18
|
+
effort?: EffortLevel;
|
|
19
|
+
agent?: string;
|
|
20
|
+
fallbackModel?: string;
|
|
21
|
+
tools?: string;
|
|
22
|
+
allowedTools?: string[];
|
|
23
|
+
disallowedTools?: string[];
|
|
24
|
+
maxTurns?: number;
|
|
25
|
+
maxBudgetUsd?: number;
|
|
26
|
+
settings?: string;
|
|
27
|
+
settingSources?: string;
|
|
28
|
+
systemPrompt?: string;
|
|
29
|
+
systemPromptFile?: string;
|
|
30
|
+
appendSystemPrompt?: string;
|
|
31
|
+
appendSystemPromptFile?: string;
|
|
32
|
+
debug?: string | boolean;
|
|
33
|
+
debugFile?: string;
|
|
34
|
+
/** Emit hook lifecycle events in stream-json (requires verbose). */
|
|
35
|
+
includeHookEvents?: boolean;
|
|
36
|
+
noSessionPersistence?: boolean;
|
|
37
|
+
disableSlashCommands?: boolean;
|
|
38
|
+
bare?: boolean;
|
|
39
|
+
safeMode?: boolean;
|
|
40
|
+
allowDangerouslySkipPermissions?: boolean;
|
|
41
|
+
dangerouslySkipPermissions?: boolean;
|
|
42
|
+
/**
|
|
43
|
+
* When true (default), each run uses a fresh `CLAUDE_CONFIG_DIR` temp dir for
|
|
44
|
+
* isolation. When false, the child inherits the caller's Claude config
|
|
45
|
+
* (login tokens, installed plugins, MCP servers).
|
|
46
|
+
*/
|
|
47
|
+
isolateConfig?: boolean;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Configuration for a single Claude Code run.
|
|
51
|
+
*
|
|
52
|
+
* Authentication: by default the adapter isolates CLAUDE_CONFIG_DIR to a fresh
|
|
53
|
+
* temp dir per run, so cached Pro/Max login tokens are not available unless
|
|
54
|
+
* `isolateConfig: false`. With isolation, provide `ANTHROPIC_API_KEY` via `env`
|
|
55
|
+
* (or in the inherited process env).
|
|
56
|
+
*/
|
|
57
|
+
interface ClaudeCodeAdapterConfig extends BaseAdapterConfig, ClaudeCodeOptions {}
|
|
58
|
+
/** Claude Code run result — includes raw stream events for debugging. */
|
|
59
|
+
interface ClaudeCodeAdapterResult extends AdapterResult {
|
|
60
|
+
rawEvents: StreamEvent[];
|
|
61
|
+
}
|
|
62
|
+
declare namespace index_d_exports {
|
|
63
|
+
export { AdapterDiagnostics, AdapterError, AdapterResult, ClaudeCodeAdapterConfig, ClaudeCodeAdapterResult, ClaudeCodeOptions, ParseErrorRecord, PermissionMode, claudeCodeAdapter, runClaudeCode };
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Run Claude Code in headless mode and return a trajectory.
|
|
67
|
+
*/
|
|
68
|
+
declare function runClaudeCode(config: ClaudeCodeAdapterConfig): Promise<ClaudeCodeAdapterResult>;
|
|
69
|
+
declare const claudeCodeAdapter: HarnessAdapter<ClaudeCodeAdapterConfig>;
|
|
70
|
+
//#endregion
|
|
71
|
+
export { ClaudeCodeAdapterResult as a, ClaudeCodeAdapterConfig as i, index_d_exports as n, ClaudeCodeOptions as o, runClaudeCode as r, PermissionMode as s, claudeCodeAdapter as t };
|
|
72
|
+
//# sourceMappingURL=index-6Z17eKZx.d.ts.map
|