npm - @alis-build/harness-eval - Versions diffs - 0.1.2 → 0.1.3 - Mend

@alis-build/harness-eval 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +92 -8
package/dist/adapters/claude-code/index.d.ts +2 -2
package/dist/adapters/claude-code/index.js +2 -1
package/dist/adapters/codex/index.d.ts +68 -0
package/dist/adapters/codex/index.js +3 -0
package/dist/{claude-code-DZ4Vkgp6.js → claude-code-C_7hxC8z.js} +3 -245
package/dist/claude-code-C_7hxC8z.js.map +1 -0
package/dist/cli/bin.js +131 -151
package/dist/cli/bin.js.map +1 -1
package/dist/codex-0cHO2te9.js +496 -0
package/dist/codex-0cHO2te9.js.map +1 -0
package/dist/config/loader.d.ts +2 -2
package/dist/config/loader.js +2 -2
package/dist/{index-V22PrR0p.d.ts → index-DnvP1UBl.d.ts} +2 -2
package/dist/index.d.ts +132 -6
package/dist/index.js +6 -5
package/dist/index.js.map +1 -1
package/dist/loader-B1WmGGzf.d.ts +107 -0
package/dist/{loader-DcI0KfRX.js → loader-DnQ6Jt0i.js} +472 -209
package/dist/loader-DnQ6Jt0i.js.map +1 -0
package/dist/{projections-BcX7w-f6.js → reporter-Biy-5-9M.js} +1335 -758
package/dist/reporter-Biy-5-9M.js.map +1 -0
package/dist/runner/suite.d.ts +1 -1
package/dist/runner/suite.js +1 -1
package/dist/{suite-DPJMIEbu.d.ts → suite-BEShV0by.d.ts} +2 -2
package/dist/{suite-Dlzl-HI0.js → suite-BcP64nlb.js} +16 -2
package/dist/{suite-Dlzl-HI0.js.map → suite-BcP64nlb.js.map} +1 -1
package/dist/{types-CD3TwOtZ.d.ts → types-0QkNVyp9.d.ts} +2 -2
package/dist/types-Bac8_Ixb.js +246 -0
package/dist/types-Bac8_Ixb.js.map +1 -0
package/dist/types-Bu8uOZZN.d.ts +77 -0
package/dist/{types-B9H4IZtA.d.ts → types-C0gBkl0-.d.ts} +3 -2
package/package.json +6 -2
package/dist/claude-code-DZ4Vkgp6.js.map +0 -1
package/dist/loader-C9yQHUPC.d.ts +0 -50
package/dist/loader-DcI0KfRX.js.map +0 -1
package/dist/projections-BcX7w-f6.js.map +0 -1

package/dist/{suite-Dlzl-HI0.js.map → suite-BcP64nlb.js.map} RENAMED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"suite-Dlzl-HI0.js","names":["predicateMatches","_exhaustive"],"sources":["../src/assertions/patterns.ts","../src/assertions/predicates.ts","../src/assertions/tool-calls.ts","../src/assertions/behavior.ts","../src/assertions/compound.ts","../src/assertions/evaluator.ts","../src/adapters/registry.ts","../src/config/resolve-config.ts","../src/runner/case.ts","../src/runner/limit.ts","../src/runner/suite.ts"],"sourcesContent":["/*\n Tool name pattern matching.\n \n Tool names follow conventions:\n * - Built-in tools: `Bash`, `Read`, `Edit`, `WebSearch`, etc.\n * - MCP tools: `mcp__<server>__<tool>`, e.g. `mcp__api__search_skills`.\n \n Patterns support `` as a glob wildcard. The most useful patterns for\n the skills-loading problem are namespace globs like `mcp__api__` —\n \"did any tool from the alis MCP server get called.\"\n /\n\nimport type { ToolPattern } from \"../types/assertions\";\n\n/\n Test whether a fully-qualified tool name matches a pattern.\n \n Literal patterns (no ``) match by string equality. Glob patterns are\n compiled to a regex on each call — fine for our scale (dozens of patterns,\n * thousands of calls per run). If this becomes a hot path, memoize.\n /\nexport function toolMatches(toolName: string, pattern: ToolPattern): boolean {\n const p = patternString(pattern);\n if (!p.includes(\"\")) return toolName === p;\n return globToRegex(p).test(toolName);\n}\n\n/** Extract the underlying string from either pattern form. /\nexport function patternString(pattern: ToolPattern): string {\n return typeof pattern === \"string\" ? pattern : pattern.pattern;\n}\n\n/* Human-readable representation for diagnostic messages. /\nexport function describePattern(pattern: ToolPattern): string {\n return patternString(pattern);\n}\n\n/\n Convert a glob (with `` wildcards only) to an anchored regex.\n Other regex metacharacters in the input are escaped.\n /\nfunction globToRegex(glob: string): RegExp {\n const escaped = glob\n .replace(/[.+?^${}()\|[\\]\\\\]/g, \"\\\\$&\") // escape regex specials\n .replace(/\\/g, \".\"); // → .\n return new RegExp(`^${escaped}$`);\n}\n","/\n Predicate engine for matching tool call arguments.\n \n Conceptually similar to MongoDB query selectors: a predicate is a tree\n * of conditions, applied recursively to a value. Examples:\n \n matches(\"hello world\", { contains: \"world\" }) // true\n * matches({ a: 1 }, { a: { gte: 0 } }) // true\n * matches({ a: { b: \"x\" } }, { a: { b: \"x\" } }) // true (scalar shortcut)\n * matches({ q: \"ab\" }, { any_of: [{equals:\"x\"}, {contains:\"a\"}] }) // ???\n \n Last example: the `any_of` applies to the value (`{q:\"ab\"}`), not to a\n * field. `equals:\"x\"` and `contains:\"a\"` are both leaf predicates that\n * apply to the whole value. `contains` requires a string, so it returns\n * false for the object. The whole thing returns false. That's deliberate.\n \n Disambiguation rule (single-key objects): a single-key object is interpreted as a leaf or compound predicate IF\n * the key matches a known operator name. Otherwise it falls through to\n * being treated as an object predicate (field name = key).\n \n This means a tool argument schema cannot have a top-level field named\n * `equals`, `contains`, `regex`, `any_of`, `all_of`, `not`, etc. — those\n * fields would be shadowed by predicate operators. For MCP tools, this\n * has never been a problem in practice; document it and move on.\n /\n\nimport type { Predicate } from \"../types/assertions\";\n\nconst LEAF_OPS = new Set([\n \"equals\",\n \"contains\",\n \"not_contains\",\n \"regex\",\n \"gte\",\n \"lte\",\n \"gt\",\n \"lt\",\n \"one_of\",\n]);\nconst COMPOUND_OPS = new Set([\"any_of\", \"all_of\", \"not\"]);\n\n/\n Apply a predicate to a value. Returns true if the value satisfies the\n * predicate, false otherwise.\n \n The `predicate` parameter is typed as `unknown` because YAML deserialization\n * produces unconstrained shapes; runtime dispatch is the validation.\n /\nexport function matches(value: unknown, predicate: unknown): boolean {\n // Scalar shortcut: anything that isn't a plain object (or is an array) is\n // treated as an equality target.\n if (!isPlainObject(predicate)) {\n return deepEquals(value, predicate);\n }\n\n const obj = predicate as Record<string, unknown>;\n const keys = Object.keys(obj);\n\n // Single-key object: check if it's a known operator.\n if (keys.length === 1) {\n const key = keys[0];\n\n if (COMPOUND_OPS.has(key)) {\n switch (key) {\n case \"any_of\":\n return (obj.any_of as Predicate[]).some((sub) => matches(value, sub));\n case \"all_of\":\n return (obj.all_of as Predicate[]).every((sub) =>\n matches(value, sub),\n );\n case \"not\":\n return !matches(value, obj.not);\n }\n }\n\n if (LEAF_OPS.has(key)) {\n return matchesLeaf(value, key, obj[key]);\n }\n\n // Single key but not a known operator → object predicate (field match).\n }\n\n // Object predicate: every key is a field on `value`, every key's value is\n // a sub-predicate that must hold for the corresponding field.\n if (!isPlainObject(value)) return false;\n const valueObj = value as Record<string, unknown>;\n\n for (const [field, subPred] of Object.entries(obj)) {\n if (!matches(valueObj[field], subPred)) return false;\n }\n return true;\n}\n\n/* Apply a single leaf operator to a value. Caller guarantees `op` is in LEAF_OPS. /\nfunction matchesLeaf(value: unknown, op: string, target: unknown): boolean {\n switch (op) {\n case \"equals\":\n return deepEquals(value, target);\n case \"contains\":\n return typeof value === \"string\" && value.includes(target as string);\n case \"not_contains\":\n return typeof value === \"string\" && !value.includes(target as string);\n case \"regex\":\n if (typeof value !== \"string\" \|\| typeof target !== \"string\") {\n return false;\n }\n try {\n return new RegExp(target).test(value);\n } catch {\n return false;\n }\n case \"gte\":\n return typeof value === \"number\" && value >= (target as number);\n case \"lte\":\n return typeof value === \"number\" && value <= (target as number);\n case \"gt\":\n return typeof value === \"number\" && value > (target as number);\n case \"lt\":\n return typeof value === \"number\" && value < (target as number);\n case \"one_of\":\n return (target as unknown[]).some((t) => deepEquals(value, t));\n default:\n throw new Error(`unknown leaf operator: ${op}`);\n }\n}\n\n/* True for non-null, non-array objects. /\nfunction isPlainObject(x: unknown): x is Record<string, unknown> {\n return typeof x === \"object\" && x !== null && !Array.isArray(x);\n}\n\n/\n Structural equality for unknown values. Used by `equals` and `one_of`.\n * Strict — no coercions, no NaN-equals-NaN special case (matches `===`).\n /\nfunction deepEquals(a: unknown, b: unknown): boolean {\n if (a === b) return true;\n if (typeof a !== typeof b) return false;\n if (a === null \|\| b === null) return false;\n if (typeof a !== \"object\") return false;\n\n if (Array.isArray(a) !== Array.isArray(b)) return false;\n if (Array.isArray(a) && Array.isArray(b)) {\n if (a.length !== b.length) return false;\n return a.every((v, i) => deepEquals(v, b[i]));\n }\n\n const aObj = a as Record<string, unknown>;\n const bObj = b as Record<string, unknown>;\n const aKeys = Object.keys(aObj);\n const bKeys = Object.keys(bObj);\n if (aKeys.length !== bKeys.length) return false;\n return aKeys.every((k) => deepEquals(aObj[k], bObj[k]));\n}\n","/\n Tool-call assertion evaluators.\n \n These assertions query the `toolCalls` array on the trajectory view:\n * presence, cardinality, ordering, and argument matching.\n \n Ordering is done on `turnIndex`, not wall-clock time. Parallel tool calls\n * within a single assistant turn share a turnIndex, which means \"A came\n * before B\" requires A's turn to strictly precede B's turn — calls within\n * the same turn are considered unordered. This is the right default\n * because Claude Code dispatches parallel calls concurrently and the\n * wall-clock ordering is non-deterministic.\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { ToolCall, TrajectoryView } from \"../types/trajectory\";\nimport { describeCardinality, parseCardinality } from \"./cardinality\";\nimport { describePattern, toolMatches } from \"./patterns\";\nimport { matches as predicateMatches } from \"./predicates\";\n\n// presence\n\n/* Assert a tool was called with optional cardinality (`times`). /\nexport function evaluateCalled(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called\" }>,\n): AssertionResult {\n const matching = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.tool),\n );\n const check = parseCardinality(assertion.times);\n const passed = check(matching.length);\n\n return {\n passed,\n description: `called(${describePattern(assertion.tool)}, ${describeCardinality(assertion.times)})`,\n details: passed\n ? `found ${matching.length} matching call(s)`\n : `found ${matching.length} call(s), expected ${describeCardinality(assertion.times)}`,\n matches: matching,\n };\n}\n\n/* Assert a tool was never called. /\nexport function evaluateNotCalled(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"not_called\" }>,\n): AssertionResult {\n const matching = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.tool),\n );\n const passed = matching.length === 0;\n\n return {\n passed,\n description: `not_called(${describePattern(assertion.tool)})`,\n details: passed\n ? \"no matching calls\"\n : `found ${matching.length} forbidden call(s)`,\n matches: matching,\n };\n}\n\n/* Assert at least one of the listed tools was called. /\nexport function evaluateCalledAnyOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_any_of\" }>,\n): AssertionResult {\n const allMatches: ToolCall[] = [];\n for (const pattern of assertion.tools) {\n allMatches.push(\n ...view.toolCalls.filter((c) => toolMatches(c.name, pattern)),\n );\n }\n const passed = allMatches.length > 0;\n return {\n passed,\n description: `called_any_of(${assertion.tools.map(describePattern).join(\", \")})`,\n details: passed\n ? `${allMatches.length} matching call(s)`\n : \"no calls matched any pattern\",\n matches: allMatches,\n };\n}\n\n/* Assert every listed tool was called at least once. /\nexport function evaluateCalledAllOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_all_of\" }>,\n): AssertionResult {\n const perPattern = assertion.tools.map((p) => ({\n pattern: p,\n matches: view.toolCalls.filter((c) => toolMatches(c.name, p)),\n }));\n const missing = perPattern.filter((p) => p.matches.length === 0);\n const passed = missing.length === 0;\n\n return {\n passed,\n description: `called_all_of(${assertion.tools.map(describePattern).join(\", \")})`,\n details: passed\n ? \"all patterns matched\"\n : `missing: ${missing.map((m) => describePattern(m.pattern)).join(\", \")}`,\n matches: perPattern.flatMap((p) => p.matches),\n };\n}\n\n// ordering\n\n/* Assert `first` tool's earliest turn strictly precedes `then` tool's earliest turn. /\nexport function evaluateCalledBefore(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_before\" }>,\n): AssertionResult {\n const firsts = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.first),\n );\n const thens = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.then),\n );\n const desc = `called_before(${describePattern(assertion.first)} → ${describePattern(assertion.then)})`;\n\n if (firsts.length === 0) {\n return {\n passed: false,\n description: desc,\n details: `no calls matching first`,\n };\n }\n if (thens.length === 0) {\n return {\n passed: false,\n description: desc,\n details: `no calls matching then`,\n };\n }\n\n // Earliest occurrence of each side, by turn. Strictly less than = \"before\".\n const earliestFirst = Math.min(...firsts.map((c) => c.turnIndex));\n const earliestThen = Math.min(...thens.map((c) => c.turnIndex));\n const passed = earliestFirst < earliestThen;\n\n return {\n passed,\n description: desc,\n details: passed\n ? `first @ turn ${earliestFirst}, then @ turn ${earliestThen}`\n : `first @ turn ${earliestFirst}, then @ turn ${earliestThen} (not before)`,\n matches: [...firsts, ...thens],\n };\n}\n\n/\n Assert tools appear in order.\n \n Non-strict mode allows interleaved calls; strict mode requires a contiguous subsequence.\n /\nexport function evaluateSequence(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"sequence\" }>,\n): AssertionResult {\n const { tools, strict = false } = assertion;\n const desc = `sequence([${tools.map(describePattern).join(\" → \")}]${strict ? \", strict\" : \"\"})`;\n\n if (tools.length === 0) {\n return {\n passed: true,\n description: desc,\n details: \"empty sequence trivially matches\",\n };\n }\n\n if (strict) {\n // Strict: the tools must appear in exact order with no other tool calls\n // interleaved. We look for a contiguous subsequence of the right shape.\n if (view.toolCalls.length < tools.length) {\n return {\n passed: false,\n description: desc,\n details: \"not enough tool calls\",\n };\n }\n for (\n let start = 0;\n start <= view.toolCalls.length - tools.length;\n start++\n ) {\n let ok = true;\n for (let i = 0; i < tools.length; i++) {\n if (!toolMatches(view.toolCalls[start + i].name, tools[i])) {\n ok = false;\n break;\n }\n }\n if (ok) {\n return {\n passed: true,\n description: desc,\n details: `matched at positions ${start}..${start + tools.length - 1}`,\n matches: view.toolCalls.slice(start, start + tools.length),\n };\n }\n }\n return { passed: false, description: desc, details: \"no contiguous match\" };\n }\n\n // Non-strict: tools must appear in order, interleaved calls allowed.\n // Walk the tool call list once, advancing the sequence pointer on each match.\n let idx = 0;\n const matched: ToolCall[] = [];\n for (const call of view.toolCalls) {\n if (idx < tools.length && toolMatches(call.name, tools[idx])) {\n matched.push(call);\n idx++;\n }\n }\n const passed = idx === tools.length;\n return {\n passed,\n description: desc,\n details: passed ? \"matched in order\" : `matched ${idx}/${tools.length}`,\n matches: matched,\n };\n}\n\n// arguments\n\n/* Assert at least one call to `tool` had arguments matching the predicate. /\nexport function evaluateCalledWith(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_with\" }>,\n): AssertionResult {\n const candidates = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.tool),\n );\n const matching = candidates.filter((c) =>\n predicateMatches(c.args, assertion.args),\n );\n const passed = matching.length > 0;\n\n let details: string;\n if (passed) {\n details = `${matching.length} call(s) with matching args`;\n } else if (candidates.length === 0) {\n details = `no calls to ${describePattern(assertion.tool)} at all`;\n } else {\n details = `${candidates.length} call(s) but none with matching args`;\n }\n\n return {\n passed,\n description: `called_with(${describePattern(assertion.tool)}, args matching predicate)`,\n details,\n matches: matching,\n };\n}\n","/\n Behavior and response-text assertions.\n \n Cover everything that isn't a tool-call query:\n * - Did the agent answer without using any tool? (the \"blind answer\" case)\n * - Did it stay within iteration / cost / time budget?\n * - What did it say its stop reason was?\n * - Does the response text contain expected substrings or match a regex?\n * - Arbitrary user-supplied predicate (escape hatch).\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { TrajectoryView } from \"../types/trajectory\";\n\n// behavior\n\n/\n Was the response delivered without using any tool? This is the primary\n * failure mode detector for the skills-loading problem: when the harness\n * ignores the MCP, the trace shows zero tool calls and one terminal\n * assistant turn with finish reason `end_turn`.\n \n \"Without tool calls\" is defined as `toolCalls.length === 0` AND the\n * response text is non-empty (so we don't confuse \"answered blind\" with\n * \"session died before producing anything\").\n /\nexport function evaluateRespondedWithoutToolCalls(\n view: TrajectoryView,\n _assertion: Extract<Assertion, { type: \"responded_without_tool_calls\" }>,\n): AssertionResult {\n const passed = view.toolCalls.length === 0 && view.finalResponse.length > 0;\n return {\n passed,\n description: \"responded_without_tool_calls\",\n details: passed\n ? \"no tools called, response non-empty\"\n : view.toolCalls.length > 0\n ? `${view.toolCalls.length} tool call(s) made`\n : \"response was empty (session probably aborted)\",\n };\n}\n\n/* Assert the session stayed within the reported turn count. /\nexport function evaluateIterationsWithin(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"iterations_within\" }>,\n): AssertionResult {\n const n = view.usage.numTurns;\n const passed = n <= assertion.max;\n return {\n passed,\n description: `iterations_within(${assertion.max})`,\n details: `used ${n} turn(s)`,\n };\n}\n\n/* Assert total session cost in USD is within budget. /\nexport function evaluateCostWithinUsd(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"cost_within_usd\" }>,\n): AssertionResult {\n const cost = view.usage.totalCostUsd;\n const passed = cost <= assertion.max;\n return {\n passed,\n description: `cost_within_usd(${assertion.max.toFixed(4)})`,\n details: `used $${cost.toFixed(4)}`,\n };\n}\n\n/* Assert wall-clock session duration is within budget. /\nexport function evaluateDurationWithinMs(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"duration_within_ms\" }>,\n): AssertionResult {\n const ms = view.usage.durationMs;\n const passed = ms <= assertion.max;\n return {\n passed,\n description: `duration_within_ms(${assertion.max})`,\n details: `took ${ms}ms`,\n };\n}\n\n/* Assert the final stop reason matches one of the allowed values. /\nexport function evaluateFinishedWith(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"finished_with\" }>,\n): AssertionResult {\n const allowed = Array.isArray(assertion.reasons)\n ? assertion.reasons\n : [assertion.reasons];\n const actual = view.finalStopReason;\n const passed = actual !== null && allowed.includes(actual);\n return {\n passed,\n description: `finished_with(${allowed.join(\"\|\")})`,\n details: `actual: ${actual ?? \"(none)\"}`,\n };\n}\n\n// response text\n\n/* Assert `finalResponse` contains the given substring. /\nexport function evaluateResponseContains(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"response_contains\" }>,\n): AssertionResult {\n const passed = view.finalResponse.includes(assertion.text);\n return {\n passed,\n description: `response_contains(${JSON.stringify(assertion.text)})`,\n details: passed ? \"text found\" : \"text not in response\",\n };\n}\n\n/* Assert `finalResponse` does not contain the given substring. /\nexport function evaluateResponseNotContains(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"response_not_contains\" }>,\n): AssertionResult {\n const passed = !view.finalResponse.includes(assertion.text);\n return {\n passed,\n description: `response_not_contains(${JSON.stringify(assertion.text)})`,\n details: passed ? \"text absent\" : \"forbidden text found\",\n };\n}\n\n/* Assert `finalResponse` matches a regular expression. /\nexport function evaluateResponseMatches(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"response_matches\" }>,\n): AssertionResult {\n // Construction may throw on a malformed regex; surface that as a failure\n // rather than crashing the whole eval run.\n let passed: boolean;\n let details: string;\n try {\n const re = new RegExp(assertion.pattern, assertion.flags);\n passed = re.test(view.finalResponse);\n details = passed ? \"pattern matched\" : \"pattern did not match\";\n } catch (err) {\n passed = false;\n details = `invalid regex: ${err instanceof Error ? err.message : String(err)}`;\n }\n return {\n passed,\n description: `response_matches(/${assertion.pattern}/${assertion.flags ?? \"\"})`,\n details,\n };\n}\n\n// escape hatch\n\n/\n Run an arbitrary user-supplied predicate against the view.\n \n Only available from programmatic test definition (the YAML loader cannot\n * produce functions). Catches thrown errors and reports them as failures so\n * one bad predicate doesn't take down a whole eval run.\n /\nexport function evaluatePredicate(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"predicate\" }>,\n): AssertionResult {\n let passed = false;\n let details: string;\n try {\n passed = assertion.fn(view);\n details = passed ? \"predicate returned true\" : \"predicate returned false\";\n } catch (err) {\n details = `predicate threw: ${err instanceof Error ? err.message : String(err)}`;\n }\n return {\n passed,\n description: assertion.description ?? \"predicate(...)\",\n details,\n };\n}\n","/\n Compound assertion evaluators: `any_of`, `all_of`, `not`.\n \n These recurse into the main evaluator. To avoid a circular import between\n * this file and `evaluator.ts`, the dispatcher is passed in as a function\n * parameter rather than imported directly. The evaluator binds itself when\n * dispatching to these.\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { TrajectoryView } from \"../types/trajectory\";\n\n/\n Signature of the top-level dispatcher. Passed into compound evaluators so\n * they can recursively evaluate child assertions without a circular import.\n /\nexport type Evaluator = (\n view: TrajectoryView,\n assertion: Assertion,\n) => AssertionResult;\n\n/* Evaluate `all_of`: every child assertion must pass. /\nexport function evaluateAllOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"all_of\" }>,\n evaluate: Evaluator,\n): AssertionResult {\n const children = assertion.assertions.map((a) => evaluate(view, a));\n const passed = children.every((c) => c.passed);\n const failedCount = children.filter((c) => !c.passed).length;\n\n return {\n passed,\n description: `all_of (${children.length} child${children.length === 1 ? \"\" : \"ren\"})`,\n details: passed\n ? \"all passed\"\n : `${failedCount} of ${children.length} failed`,\n children,\n };\n}\n\n/* Evaluate `any_of`: at least one child assertion must pass. /\nexport function evaluateAnyOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"any_of\" }>,\n evaluate: Evaluator,\n): AssertionResult {\n const children = assertion.assertions.map((a) => evaluate(view, a));\n const passedCount = children.filter((c) => c.passed).length;\n const passed = passedCount > 0;\n\n return {\n passed,\n description: `any_of (${children.length} child${children.length === 1 ? \"\" : \"ren\"})`,\n details: passed ? `${passedCount} passed` : \"all failed\",\n children,\n };\n}\n\n/* Evaluate `not`: invert the inner assertion result. /\nexport function evaluateNot(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"not\" }>,\n evaluate: Evaluator,\n): AssertionResult {\n const child = evaluate(view, assertion.assertion);\n return {\n passed: !child.passed,\n description: `not(${child.description})`,\n details: child.passed\n ? \"inner passed (so outer fails)\"\n : \"inner failed (so outer passes)\",\n children: [child],\n };\n}\n","/\n Top-level assertion evaluator.\n \n Dispatches on the discriminant of the `Assertion` tagged union, delegating\n * to the per-kind evaluators in the sibling modules. This file deliberately\n * contains no logic of its own — keep it boring so adding a new assertion\n * type is just (a) extend the union in `types/assertions.ts`, (b) add an\n * evaluator function in the appropriate sibling, (c) add one case here.\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { TrajectoryView } from \"../types/trajectory\";\n\nimport {\n evaluateCalled,\n evaluateCalledAllOf,\n evaluateCalledAnyOf,\n evaluateCalledBefore,\n evaluateCalledWith,\n evaluateNotCalled,\n evaluateSequence,\n} from \"./tool-calls\";\n\nimport {\n evaluateCostWithinUsd,\n evaluateDurationWithinMs,\n evaluateFinishedWith,\n evaluateIterationsWithin,\n evaluatePredicate,\n evaluateRespondedWithoutToolCalls,\n evaluateResponseContains,\n evaluateResponseMatches,\n evaluateResponseNotContains,\n} from \"./behavior\";\n\nimport { evaluateAllOf, evaluateAnyOf, evaluateNot } from \"./compound\";\n\n/\n Evaluate one assertion against a trajectory view.\n \n The switch is exhaustive — TypeScript's `never` check at the end will\n * flag any new variant added to the `Assertion` union that hasn't been\n * wired up here.\n /\nexport function evaluate(\n view: TrajectoryView,\n assertion: Assertion,\n): AssertionResult {\n switch (assertion.type) {\n // tool-call presence and ordering\n case \"called\":\n return evaluateCalled(view, assertion);\n case \"not_called\":\n return evaluateNotCalled(view, assertion);\n case \"called_any_of\":\n return evaluateCalledAnyOf(view, assertion);\n case \"called_all_of\":\n return evaluateCalledAllOf(view, assertion);\n case \"called_before\":\n return evaluateCalledBefore(view, assertion);\n case \"sequence\":\n return evaluateSequence(view, assertion);\n\n // tool-call arguments\n case \"called_with\":\n return evaluateCalledWith(view, assertion);\n\n // behavior\n case \"responded_without_tool_calls\":\n return evaluateRespondedWithoutToolCalls(view, assertion);\n case \"iterations_within\":\n return evaluateIterationsWithin(view, assertion);\n case \"cost_within_usd\":\n return evaluateCostWithinUsd(view, assertion);\n case \"duration_within_ms\":\n return evaluateDurationWithinMs(view, assertion);\n case \"finished_with\":\n return evaluateFinishedWith(view, assertion);\n\n // response text\n case \"response_contains\":\n return evaluateResponseContains(view, assertion);\n case \"response_not_contains\":\n return evaluateResponseNotContains(view, assertion);\n case \"response_matches\":\n return evaluateResponseMatches(view, assertion);\n\n // compound — pass the dispatcher in so they can recurse without\n // creating a circular import\n case \"all_of\":\n return evaluateAllOf(view, assertion, evaluate);\n case \"any_of\":\n return evaluateAnyOf(view, assertion, evaluate);\n case \"not\":\n return evaluateNot(view, assertion, evaluate);\n\n // escape hatch\n case \"predicate\":\n return evaluatePredicate(view, assertion);\n\n default: {\n // Exhaustiveness guard. If a new assertion variant is added to the\n // union and not wired into the switch above, TypeScript will fail\n // here at compile time. Don't remove this case.\n const _exhaustive: never = assertion;\n throw new Error(`unknown assertion: ${JSON.stringify(_exhaustive)}`);\n }\n }\n}\n\n/\n Evaluate a list of assertions independently. Used at the test-case level\n * where each top-level assertion is reported separately (and thresholded\n * separately, in the runner layer).\n /\nexport function evaluateAll(\n view: TrajectoryView,\n assertions: Assertion[],\n): AssertionResult[] {\n return assertions.map((a) => evaluate(view, a));\n}\n","/\n Default harness adapter registry.\n \n New adapters register here so the CLI and runner can resolve `adapter`\n * names from YAML without hard-coding imports at every call site.\n \n ## Adding a new harness adapter\n \n 1. Create an adapter module under `src/adapters/<id>/` implementing\n * {@link HarnessAdapter} from `./types`. Set `id` to match the YAML\n * `adapter` field (e.g. `\"codex\"`).\n * 2. Nest suite config under a camelCase key in {@link SuiteConfig}\n * (e.g. `codex: { ... }`) so each harness keeps its own options.\n * 3. Register at startup via {@link registerAdapter} — either in this\n * module for built-in adapters or from plugin/bootstrap code for\n * runtime extensions.\n * 4. Reference in suite YAML with `adapter: <id>` and the nested config\n * block; the runner calls `getAdapter(id).run(resolvedConfig)`.\n \n Built-in adapters are registered when this module loads. Only `claude-code`\n * ships today; future harnesses (Codex, Gemini CLI, Antigravity CLI) follow\n * the same pattern in separate tracks.\n /\n\nimport type { HarnessAdapter } from \"./types\";\nimport { claudeCodeAdapter } from \"./claude-code/index\";\n\nconst ADAPTERS: Record<string, HarnessAdapter> = {};\n\nfunction registerBuiltIn(id: string, adapter: HarnessAdapter): void {\n ADAPTERS[id] = adapter;\n}\n\nregisterBuiltIn(\"claude-code\", claudeCodeAdapter);\n\n/\n Register a harness adapter by id.\n \n Duplicate ids throw — registration is explicit so accidental overrides\n * surface immediately during startup or test setup.\n /\nexport function registerAdapter(id: string, adapter: HarnessAdapter): void {\n if (ADAPTERS[id]) {\n throw new Error(`adapter \"${id}\" is already registered`);\n }\n ADAPTERS[id] = adapter;\n}\n\n/* Return all registered adapter ids (built-in and runtime). /\nexport function listAdapters(): string[] {\n return Object.keys(ADAPTERS);\n}\n\n/* Resolve an adapter by id. Throws if unknown. /\nexport function getAdapter(id: string): HarnessAdapter {\n const adapter = ADAPTERS[id];\n if (!adapter) {\n throw new Error(\n `unknown adapter \"${id}\". Available: ${listAdapters().join(\", \")}`,\n );\n }\n return adapter;\n}\n\n/* Default adapter when YAML omits `adapter`. /\nexport const DEFAULT_ADAPTER_ID = \"claude-code\";\n\nexport function getDefaultAdapter(): HarnessAdapter {\n return getAdapter(DEFAULT_ADAPTER_ID);\n}\n","/\n Flatten nested suite config into harness-specific adapter config.\n \n Suite YAML nests adapter options under keys like `claudeCode`; adapters\n * expect a flat config object. This module merges layers and flattens per\n * adapter id.\n /\n\nimport { DEFAULT_ADAPTER_ID } from \"../adapters/registry\";\nimport type { BaseAdapterConfig } from \"../adapters/types\";\nimport type { ClaudeCodeAdapterConfig } from \"../adapters/claude-code/types\";\nimport type { SuiteConfig } from \"../adapters/types\";\n\n/* Merged config passed to {@link HarnessAdapter.run}. /\nexport type ResolvedRunConfig = BaseAdapterConfig & Record<string, unknown>;\n\n/* Merge generic suite config layers into a flat {@link ClaudeCodeAdapterConfig}. /\nexport function toClaudeCodeConfig(\n layers: SuiteConfig[],\n prompt: string,\n): ClaudeCodeAdapterConfig {\n const merged: Record<string, unknown> = {};\n for (const layer of layers) {\n const { claudeCode, ...generic } = layer;\n Object.assign(merged, generic);\n if (claudeCode && typeof claudeCode === \"object\") {\n Object.assign(merged, claudeCode);\n }\n }\n merged.prompt = prompt;\n return merged as unknown as ClaudeCodeAdapterConfig;\n}\n\n/\n Resolve merged suite layers into the flat config shape expected by the\n * selected harness adapter.\n /\nexport function resolveRunConfig(\n adapterId: string,\n layers: SuiteConfig[],\n prompt: string,\n): ResolvedRunConfig {\n if (adapterId === DEFAULT_ADAPTER_ID \|\| adapterId === \"claude-code\") {\n return toClaudeCodeConfig(layers, prompt) as ResolvedRunConfig;\n }\n\n const merged: Record<string, unknown> = {};\n for (const layer of layers) {\n Object.assign(merged, layer);\n }\n merged.prompt = prompt;\n return merged as ResolvedRunConfig;\n}\n","/\n Case-level runner — config merge, single-repetition execution, and cell aggregation.\n \n The suite runner (`suite.ts`) fans out work; this module owns the per-rep\n * lifecycle: merge config layers, invoke the adapter, evaluate assertions, and\n * compute thresholded pass rates for one matrix cell.\n /\n\nimport type { AdapterDiagnostics, AdapterResult, BaseAdapterConfig } from \"../adapters/types\";\nimport { getDefaultAdapter } from \"../adapters/registry\";\nimport { resolveRunConfig } from \"../config/resolve-config\";\nimport { evaluateAll } from \"../assertions/evaluator\";\nimport type {\n AssertionStat,\n CellReport,\n MatrixCell,\n RepetitionError,\n RepetitionResult,\n TestCase,\n TestSuite,\n} from \"./types\";\n\n/* Default repetition count when `case.repetitions` is omitted. /\nexport const DEFAULT_REPETITIONS = 5;\n\n/* Default assertion pass-rate threshold when `threshold` is omitted. /\nexport const DEFAULT_THRESHOLD = 1.0;\n\n/* Injectable adapter run function (used by tests to stub harness I/O). /\nexport type AdapterRunFn = (\n config: BaseAdapterConfig & Record<string, unknown>,\n) => Promise<AdapterResult>;\n\n/\n Build the effective adapter config for one (suite, case, cell).\n \n Merge order (later wins): defaultConfig < case.config < cell.config.\n /\nexport function mergeConfig(\n suite: TestSuite,\n testCase: TestCase,\n cell: MatrixCell,\n): BaseAdapterConfig & Record<string, unknown> {\n const adapterId = suite.adapter ?? getDefaultAdapter().id;\n const layers = [\n suite.defaultConfig ?? {},\n testCase.config ?? {},\n cell.config,\n ];\n return resolveRunConfig(adapterId, layers, testCase.prompt);\n}\n\n/* Effective repetition count for a case (`case.repetitions` or default). /\nexport function getRepetitions(testCase: TestCase): number {\n return testCase.repetitions ?? DEFAULT_REPETITIONS;\n}\n\n/\n Run one repetition: invoke the adapter, evaluate assertions, capture errors.\n \n Adapter failures are returned as {@link RepetitionResult.error} rather than\n * thrown so the suite runner can continue other reps and report adapter error counts.\n /\nexport async function runRepetition(\n testCase: TestCase,\n _cell: MatrixCell,\n config: BaseAdapterConfig & Record<string, unknown>,\n repetitionIndex: number,\n run: AdapterRunFn,\n signal?: AbortSignal,\n): Promise<RepetitionResult> {\n const startTs = Date.now();\n\n try {\n const adapterResult = await run({\n ...config,\n signal: signal ?? config.signal,\n });\n\n const assertionResults = evaluateAll(\n adapterResult.view,\n testCase.assertions.map((t) => t.assertion),\n );\n\n return {\n repetitionIndex,\n adapterResult,\n error: null,\n assertionResults,\n durationMs: Date.now() - startTs,\n };\n } catch (err) {\n return {\n repetitionIndex,\n adapterResult: null,\n error: extractError(err),\n assertionResults: [],\n durationMs: Date.now() - startTs,\n };\n }\n}\n\n/\n Normalize thrown values into a {@link RepetitionError}.\n \n Preserves {@link AdapterDiagnostics} when the thrown value is an\n * {@link AdapterError} or carries a `diagnostics` property.\n /\nfunction extractError(err: unknown): RepetitionError {\n const message = err instanceof Error ? err.message : String(err);\n\n let diagnostics: Partial<AdapterDiagnostics> = {};\n if (err !== null && typeof err === \"object\" && \"diagnostics\" in err) {\n const d = (err as { diagnostics: unknown }).diagnostics;\n if (d !== null && typeof d === \"object\") {\n diagnostics = d as Partial<AdapterDiagnostics>;\n }\n }\n\n return { message, diagnostics };\n}\n\n/\n Roll up repetition results into a {@link CellReport}.\n \n Adapter errors reduce `evaluatedCount` but do not fail the cell by\n * themselves — only assertion threshold misses mark a cell as failed.\n /\nexport function aggregateCell(\n testCase: TestCase,\n cell: MatrixCell,\n repetitions: RepetitionResult[],\n): CellReport {\n const adapterErrors = repetitions.filter((r) => r.error !== null).length;\n const evaluatedReps = repetitions.filter((r) => r.error === null);\n\n const assertionStats: AssertionStat[] = testCase.assertions.map(\n (thresholded, i) => {\n const threshold = thresholded.threshold ?? DEFAULT_THRESHOLD;\n const passedCount = evaluatedReps.filter(\n (r) => r.assertionResults[i]?.passed,\n ).length;\n const evaluatedCount = evaluatedReps.length;\n const passRate = evaluatedCount === 0 ? 0 : passedCount / evaluatedCount;\n\n const description =\n evaluatedReps[0]?.assertionResults[i]?.description ??\n `(${thresholded.assertion.type})`;\n\n return {\n description,\n threshold,\n passedCount,\n evaluatedCount,\n passRate,\n meetsThreshold: evaluatedCount > 0 && passRate >= threshold,\n };\n },\n );\n\n const passed = assertionStats.every((s) => s.meetsThreshold);\n\n return {\n caseId: testCase.id,\n category: testCase.category,\n notes: testCase.notes,\n prompt: testCase.prompt,\n expectations: testCase.expectations,\n reference_trajectory: testCase.reference_trajectory,\n human_ratings: testCase.human_ratings,\n cell,\n repetitions,\n assertionStats,\n adapterErrors,\n passed,\n };\n}\n","/\n Promise-based concurrency limiter.\n \n Functionally equivalent to the `p-limit` package, inlined to avoid an\n * external dependency for ~20 lines of code.\n \n Usage:\n \n const limit = createLimit(4);\n * const results = await Promise.all(tasks.map(t => limit(() => run(t))));\n \n The limiter is unbounded in queue depth — it doesn't push back on the\n * caller. If you need bounded enqueue, wrap it.\n /\n\n/* A function that runs an async task under the concurrency limit. /\nexport type LimitedRunner = <T>(fn: () => Promise<T>) => Promise<T>;\n\nexport function createLimit(max: number): LimitedRunner {\n if (!Number.isInteger(max) \|\| max < 1) {\n throw new Error(`createLimit: max must be a positive integer, got ${max}`);\n }\n\n let running = 0;\n /\n FIFO list of resolvers belonging to tasks waiting for a slot. When a\n * running task finishes, the next resolver is invoked to wake one waiter.\n /\n const waiters: (() => void)[] = [];\n\n return async <T>(fn: () => Promise<T>): Promise<T> => {\n // Wait for a slot. The loop guards a race where another waiter could\n // grab the slot between our `await` resolving and our increment — in\n // single-threaded JS this is theoretical, but `while` is the right shape.\n while (running >= max) {\n await new Promise<void>((resolve) => waiters.push(resolve));\n }\n running++;\n\n try {\n return await fn();\n } finally {\n running--;\n // Wake exactly one waiter per finished task. Shifting from the front\n // gives FIFO behaviour — earlier callers get slots first.\n const next = waiters.shift();\n if (next) next();\n }\n };\n}\n","/\n Suite-level runner — fans out (case × cell × repetition) tasks with concurrency control.\n \n Tasks run under a {@link createLimit} pool; results are bucketed by case and\n * cell label, sorted by repetition index, then aggregated into a\n * {@link SuiteReport}.\n /\n\nimport { getAdapter, getDefaultAdapter } from \"../adapters/registry\";\nimport {\n aggregateCell,\n getRepetitions,\n mergeConfig,\n runRepetition,\n type AdapterRunFn,\n} from \"./case\";\nimport { createLimit } from \"./limit\";\nimport type {\n CellReport,\n MatrixCell,\n RepetitionResult,\n RunSuiteOptions,\n SuiteReport,\n TestCase,\n TestSuite,\n} from \"./types\";\n\nconst DEFAULT_MAX_CONCURRENT = 4;\n\n/* One unit of concurrent work: a single repetition for a (case, cell) pair. /\ninterface Task {\n testCase: TestCase;\n cell: MatrixCell;\n repetitionIndex: number;\n}\n\n/\n Execute an entire test suite and return an aggregated report.\n \n @throws When `suite.matrix` or `suite.cases` is empty.\n */\nexport async function runSuite(\n suite: TestSuite,\n options: RunSuiteOptions = {},\n): Promise<SuiteReport> {\n if (suite.matrix.length === 0) {\n throw new Error(\"runSuite: suite.matrix must contain at least one cell\");\n }\n if (suite.cases.length === 0) {\n throw new Error(\"runSuite: suite.cases must contain at least one case\");\n }\n\n const adapter =\n options.adapter ?? getAdapter(suite.adapter ?? getDefaultAdapter().id);\n\n const run: AdapterRunFn = (config) => adapter.run(config);\n\n const maxConcurrent = options.maxConcurrent ?? DEFAULT_MAX_CONCURRENT;\n const limit = createLimit(maxConcurrent);\n const onProgress = options.onProgress;\n\n const startTs = Date.now();\n const startedAt = new Date(startTs).toISOString();\n\n const tasks: Task[] = [];\n for (const testCase of suite.cases) {\n const reps = getRepetitions(testCase);\n for (const cell of suite.matrix) {\n for (let i = 0; i < reps; i++) {\n tasks.push({ testCase, cell, repetitionIndex: i });\n }\n }\n }\n\n onProgress?.({ kind: \"suite-start\", totalReps: tasks.length });\n\n const buckets = new Map<string, RepetitionResult[]>();\n // Stable key for grouping reps belonging to the same (case, cell).\n const bucketKey = (caseId: string, cellLabel: string) =>\n `${caseId}::${cellLabel}`;\n\n for (const testCase of suite.cases) {\n for (const cell of suite.matrix) {\n buckets.set(bucketKey(testCase.id, cell.label), []);\n }\n }\n\n await Promise.all(\n tasks.map((task) =>\n limit(async () => {\n if (options.signal?.aborted) return;\n\n onProgress?.({\n kind: \"rep-start\",\n caseId: task.testCase.id,\n cellLabel: task.cell.label,\n repIndex: task.repetitionIndex,\n });\n\n const config = mergeConfig(suite, task.testCase, task.cell);\n const result = await runRepetition(\n task.testCase,\n task.cell,\n config,\n task.repetitionIndex,\n run,\n options.signal,\n );\n\n buckets.get(bucketKey(task.testCase.id, task.cell.label))!.push(result);\n\n onProgress?.({\n kind: \"rep-complete\",\n caseId: task.testCase.id,\n cellLabel: task.cell.label,\n repIndex: task.repetitionIndex,\n ok: result.error === null,\n durationMs: result.durationMs,\n toolCallCount: result.adapterResult?.view.toolCalls.length,\n assertionResults: result.assertionResults,\n errorMessage: result.error?.message,\n });\n }),\n ),\n );\n\n const cells: CellReport[] = [];\n for (const testCase of suite.cases) {\n for (const cell of suite.matrix) {\n const reps = buckets.get(bucketKey(testCase.id, cell.label)) ?? [];\n reps.sort((a, b) => a.repetitionIndex - b.repetitionIndex);\n\n const cellReport = aggregateCell(testCase, cell, reps);\n cells.push(cellReport);\n\n onProgress?.({ kind: \"cell-complete\", report: cellReport });\n }\n }\n\n const report: SuiteReport = {\n startedAt,\n durationMs: Date.now() - startTs,\n cells,\n };\n\n onProgress?.({ kind: \"suite-complete\", report });\n\n return report;\n}\n"],"mappings":";;;;;;;;;;AAqBA,SAAgB,YAAY,UAAkB,SAA+B;CAC3E,MAAM,IAAI,cAAc,OAAO;CAC/B,IAAI,CAAC,EAAE,SAAS,GAAG,GAAG,OAAO,aAAa;CAC1C,OAAO,YAAY,CAAC,CAAC,CAAC,KAAK,QAAQ;AACrC;;AAGA,SAAgB,cAAc,SAA8B;CAC1D,OAAO,OAAO,YAAY,WAAW,UAAU,QAAQ;AACzD;;AAGA,SAAgB,gBAAgB,SAA8B;CAC5D,OAAO,cAAc,OAAO;AAC9B;;;;;AAMA,SAAS,YAAY,MAAsB;CACzC,MAAM,UAAU,KACb,QAAQ,sBAAsB,MAAM,CAAC,CACrC,QAAQ,OAAO,IAAI;CACtB,OAAO,IAAI,OAAO,IAAI,QAAQ,EAAE;AAClC;;;AClBA,MAAM,2BAAW,IAAI,IAAI;CACvB;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;AACF,CAAC;AACD,MAAM,+BAAe,IAAI,IAAI;CAAC;CAAU;CAAU;AAAK,CAAC;;;;;;;;AASxD,SAAgB,QAAQ,OAAgB,WAA6B;CAGnE,IAAI,CAAC,cAAc,SAAS,GAC1B,OAAO,WAAW,OAAO,SAAS;CAGpC,MAAM,MAAM;CACZ,MAAM,OAAO,OAAO,KAAK,GAAG;CAG5B,IAAI,KAAK,WAAW,GAAG;EACrB,MAAM,MAAM,KAAK;EAEjB,IAAI,aAAa,IAAI,GAAG,GACtB,QAAQ,KAAR;GACE,KAAK,UACH,OAAQ,IAAI,OAAuB,MAAM,QAAQ,QAAQ,OAAO,GAAG,CAAC;GACtE,KAAK,UACH,OAAQ,IAAI,OAAuB,OAAO,QACxC,QAAQ,OAAO,GAAG,CACpB;GACF,KAAK,OACH,OAAO,CAAC,QAAQ,OAAO,IAAI,GAAG;EAClC;EAGF,IAAI,SAAS,IAAI,GAAG,GAClB,OAAO,YAAY,OAAO,KAAK,IAAI,IAAI;CAI3C;CAIA,IAAI,CAAC,cAAc,KAAK,GAAG,OAAO;CAClC,MAAM,WAAW;CAEjB,KAAK,MAAM,CAAC,OAAO,YAAY,OAAO,QAAQ,GAAG,GAC/C,IAAI,CAAC,QAAQ,SAAS,QAAQ,OAAO,GAAG,OAAO;CAEjD,OAAO;AACT;;AAGA,SAAS,YAAY,OAAgB,IAAY,QAA0B;CACzE,QAAQ,IAAR;EACE,KAAK,UACH,OAAO,WAAW,OAAO,MAAM;EACjC,KAAK,YACH,OAAO,OAAO,UAAU,YAAY,MAAM,SAAS,MAAgB;EACrE,KAAK,gBACH,OAAO,OAAO,UAAU,YAAY,CAAC,MAAM,SAAS,MAAgB;EACtE,KAAK;GACH,IAAI,OAAO,UAAU,YAAY,OAAO,WAAW,UACjD,OAAO;GAET,IAAI;IACF,OAAO,IAAI,OAAO,MAAM,CAAC,CAAC,KAAK,KAAK;GACtC,QAAQ;IACN,OAAO;GACT;EACF,KAAK,OACH,OAAO,OAAO,UAAU,YAAY,SAAU;EAChD,KAAK,OACH,OAAO,OAAO,UAAU,YAAY,SAAU;EAChD,KAAK,MACH,OAAO,OAAO,UAAU,YAAY,QAAS;EAC/C,KAAK,MACH,OAAO,OAAO,UAAU,YAAY,QAAS;EAC/C,KAAK,UACH,OAAQ,OAAqB,MAAM,MAAM,WAAW,OAAO,CAAC,CAAC;EAC/D,SACE,MAAM,IAAI,MAAM,0BAA0B,IAAI;CAClD;AACF;;AAGA,SAAS,cAAc,GAA0C;CAC/D,OAAO,OAAO,MAAM,YAAY,MAAM,QAAQ,CAAC,MAAM,QAAQ,CAAC;AAChE;;;;;AAMA,SAAS,WAAW,GAAY,GAAqB;CACnD,IAAI,MAAM,GAAG,OAAO;CACpB,IAAI,OAAO,MAAM,OAAO,GAAG,OAAO;CAClC,IAAI,MAAM,QAAQ,MAAM,MAAM,OAAO;CACrC,IAAI,OAAO,MAAM,UAAU,OAAO;CAElC,IAAI,MAAM,QAAQ,CAAC,MAAM,MAAM,QAAQ,CAAC,GAAG,OAAO;CAClD,IAAI,MAAM,QAAQ,CAAC,KAAK,MAAM,QAAQ,CAAC,GAAG;EACxC,IAAI,EAAE,WAAW,EAAE,QAAQ,OAAO;EAClC,OAAO,EAAE,OAAO,GAAG,MAAM,WAAW,GAAG,EAAE,EAAE,CAAC;CAC9C;CAEA,MAAM,OAAO;CACb,MAAM,OAAO;CACb,MAAM,QAAQ,OAAO,KAAK,IAAI;CAC9B,MAAM,QAAQ,OAAO,KAAK,IAAI;CAC9B,IAAI,MAAM,WAAW,MAAM,QAAQ,OAAO;CAC1C,OAAO,MAAM,OAAO,MAAM,WAAW,KAAK,IAAI,KAAK,EAAE,CAAC;AACxD;;;;AClIA,SAAgB,eACd,MACA,WACiB;CACjB,MAAM,WAAW,KAAK,UAAU,QAAQ,MACtC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CAEA,MAAM,SADQ,iBAAiB,UAAU,KACtB,CAAC,CAAC,SAAS,MAAM;CAEpC,OAAO;EACL;EACA,aAAa,UAAU,gBAAgB,UAAU,IAAI,EAAE,IAAI,oBAAoB,UAAU,KAAK,EAAE;EAChG,SAAS,SACL,SAAS,SAAS,OAAO,qBACzB,SAAS,SAAS,OAAO,qBAAqB,oBAAoB,UAAU,KAAK;EACrF,SAAS;CACX;AACF;;AAGA,SAAgB,kBACd,MACA,WACiB;CACjB,MAAM,WAAW,KAAK,UAAU,QAAQ,MACtC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CACA,MAAM,SAAS,SAAS,WAAW;CAEnC,OAAO;EACL;EACA,aAAa,cAAc,gBAAgB,UAAU,IAAI,EAAE;EAC3D,SAAS,SACL,sBACA,SAAS,SAAS,OAAO;EAC7B,SAAS;CACX;AACF;;AAGA,SAAgB,oBACd,MACA,WACiB;CACjB,MAAM,aAAyB,CAAC;CAChC,KAAK,MAAM,WAAW,UAAU,OAC9B,WAAW,KACT,GAAG,KAAK,UAAU,QAAQ,MAAM,YAAY,EAAE,MAAM,OAAO,CAAC,CAC9D;CAEF,MAAM,SAAS,WAAW,SAAS;CACnC,OAAO;EACL;EACA,aAAa,iBAAiB,UAAU,MAAM,IAAI,eAAe,CAAC,CAAC,KAAK,IAAI,EAAE;EAC9E,SAAS,SACL,GAAG,WAAW,OAAO,qBACrB;EACJ,SAAS;CACX;AACF;;AAGA,SAAgB,oBACd,MACA,WACiB;CACjB,MAAM,aAAa,UAAU,MAAM,KAAK,OAAO;EAC7C,SAAS;EACT,SAAS,KAAK,UAAU,QAAQ,MAAM,YAAY,EAAE,MAAM,CAAC,CAAC;CAC9D,EAAE;CACF,MAAM,UAAU,WAAW,QAAQ,MAAM,EAAE,QAAQ,WAAW,CAAC;CAC/D,MAAM,SAAS,QAAQ,WAAW;CAElC,OAAO;EACL;EACA,aAAa,iBAAiB,UAAU,MAAM,IAAI,eAAe,CAAC,CAAC,KAAK,IAAI,EAAE;EAC9E,SAAS,SACL,yBACA,YAAY,QAAQ,KAAK,MAAM,gBAAgB,EAAE,OAAO,CAAC,CAAC,CAAC,KAAK,IAAI;EACxE,SAAS,WAAW,SAAS,MAAM,EAAE,OAAO;CAC9C;AACF;;AAKA,SAAgB,qBACd,MACA,WACiB;CACjB,MAAM,SAAS,KAAK,UAAU,QAAQ,MACpC,YAAY,EAAE,MAAM,UAAU,KAAK,CACrC;CACA,MAAM,QAAQ,KAAK,UAAU,QAAQ,MACnC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CACA,MAAM,OAAO,iBAAiB,gBAAgB,UAAU,KAAK,EAAE,KAAK,gBAAgB,UAAU,IAAI,EAAE;CAEpG,IAAI,OAAO,WAAW,GACpB,OAAO;EACL,QAAQ;EACR,aAAa;EACb,SAAS;CACX;CAEF,IAAI,MAAM,WAAW,GACnB,OAAO;EACL,QAAQ;EACR,aAAa;EACb,SAAS;CACX;CAIF,MAAM,gBAAgB,KAAK,IAAI,GAAG,OAAO,KAAK,MAAM,EAAE,SAAS,CAAC;CAChE,MAAM,eAAe,KAAK,IAAI,GAAG,MAAM,KAAK,MAAM,EAAE,SAAS,CAAC;CAC9D,MAAM,SAAS,gBAAgB;CAE/B,OAAO;EACL;EACA,aAAa;EACb,SAAS,SACL,gBAAgB,cAAc,gBAAgB,iBAC9C,gBAAgB,cAAc,gBAAgB,aAAa;EAC/D,SAAS,CAAC,GAAG,QAAQ,GAAG,KAAK;CAC/B;AACF;;;;;;AAOA,SAAgB,iBACd,MACA,WACiB;CACjB,MAAM,EAAE,OAAO,SAAS,UAAU;CAClC,MAAM,OAAO,aAAa,MAAM,IAAI,eAAe,CAAC,CAAC,KAAK,KAAK,EAAE,GAAG,SAAS,aAAa,GAAG;CAE7F,IAAI,MAAM,WAAW,GACnB,OAAO;EACL,QAAQ;EACR,aAAa;EACb,SAAS;CACX;CAGF,IAAI,QAAQ;EAGV,IAAI,KAAK,UAAU,SAAS,MAAM,QAChC,OAAO;GACL,QAAQ;GACR,aAAa;GACb,SAAS;EACX;EAEF,KACE,IAAI,QAAQ,GACZ,SAAS,KAAK,UAAU,SAAS,MAAM,QACvC,SACA;GACA,IAAI,KAAK;GACT,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAChC,IAAI,CAAC,YAAY,KAAK,UAAU,QAAQ,EAAE,CAAC,MAAM,MAAM,EAAE,GAAG;IAC1D,KAAK;IACL;GACF;GAEF,IAAI,IACF,OAAO;IACL,QAAQ;IACR,aAAa;IACb,SAAS,wBAAwB,MAAM,IAAI,QAAQ,MAAM,SAAS;IAClE,SAAS,KAAK,UAAU,MAAM,OAAO,QAAQ,MAAM,MAAM;GAC3D;EAEJ;EACA,OAAO;GAAE,QAAQ;GAAO,aAAa;GAAM,SAAS;EAAsB;CAC5E;CAIA,IAAI,MAAM;CACV,MAAM,UAAsB,CAAC;CAC7B,KAAK,MAAM,QAAQ,KAAK,WACtB,IAAI,MAAM,MAAM,UAAU,YAAY,KAAK,MAAM,MAAM,IAAI,GAAG;EAC5D,QAAQ,KAAK,IAAI;EACjB;CACF;CAEF,MAAM,SAAS,QAAQ,MAAM;CAC7B,OAAO;EACL;EACA,aAAa;EACb,SAAS,SAAS,qBAAqB,WAAW,IAAI,GAAG,MAAM;EAC/D,SAAS;CACX;AACF;;AAKA,SAAgB,mBACd,MACA,WACiB;CACjB,MAAM,aAAa,KAAK,UAAU,QAAQ,MACxC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CACA,MAAM,WAAW,WAAW,QAAQ,MAClCA,QAAiB,EAAE,MAAM,UAAU,IAAI,CACzC;CACA,MAAM,SAAS,SAAS,SAAS;CAEjC,IAAI;CACJ,IAAI,QACF,UAAU,GAAG,SAAS,OAAO;MACxB,IAAI,WAAW,WAAW,GAC/B,UAAU,eAAe,gBAAgB,UAAU,IAAI,EAAE;MAEzD,UAAU,GAAG,WAAW,OAAO;CAGjC,OAAO;EACL;EACA,aAAa,eAAe,gBAAgB,UAAU,IAAI,EAAE;EAC5D;EACA,SAAS;CACX;AACF;;;;;;;;;;;;;ACrOA,SAAgB,kCACd,MACA,YACiB;CACjB,MAAM,SAAS,KAAK,UAAU,WAAW,KAAK,KAAK,cAAc,SAAS;CAC1E,OAAO;EACL;EACA,aAAa;EACb,SAAS,SACL,wCACA,KAAK,UAAU,SAAS,IACtB,GAAG,KAAK,UAAU,OAAO,sBACzB;CACR;AACF;;AAGA,SAAgB,yBACd,MACA,WACiB;CACjB,MAAM,IAAI,KAAK,MAAM;CAErB,OAAO;EACL,QAFa,KAAK,UAAU;EAG5B,aAAa,qBAAqB,UAAU,IAAI;EAChD,SAAS,QAAQ,EAAE;CACrB;AACF;;AAGA,SAAgB,sBACd,MACA,WACiB;CACjB,MAAM,OAAO,KAAK,MAAM;CAExB,OAAO;EACL,QAFa,QAAQ,UAAU;EAG/B,aAAa,mBAAmB,UAAU,IAAI,QAAQ,CAAC,EAAE;EACzD,SAAS,SAAS,KAAK,QAAQ,CAAC;CAClC;AACF;;AAGA,SAAgB,yBACd,MACA,WACiB;CACjB,MAAM,KAAK,KAAK,MAAM;CAEtB,OAAO;EACL,QAFa,MAAM,UAAU;EAG7B,aAAa,sBAAsB,UAAU,IAAI;EACjD,SAAS,QAAQ,GAAG;CACtB;AACF;;AAGA,SAAgB,qBACd,MACA,WACiB;CACjB,MAAM,UAAU,MAAM,QAAQ,UAAU,OAAO,IAC3C,UAAU,UACV,CAAC,UAAU,OAAO;CACtB,MAAM,SAAS,KAAK;CAEpB,OAAO;EACL,QAFa,WAAW,QAAQ,QAAQ,SAAS,MAAM;EAGvD,aAAa,iBAAiB,QAAQ,KAAK,GAAG,EAAE;EAChD,SAAS,WAAW,UAAU;CAChC;AACF;;AAKA,SAAgB,yBACd,MACA,WACiB;CACjB,MAAM,SAAS,KAAK,cAAc,SAAS,UAAU,IAAI;CACzD,OAAO;EACL;EACA,aAAa,qBAAqB,KAAK,UAAU,UAAU,IAAI,EAAE;EACjE,SAAS,SAAS,eAAe;CACnC;AACF;;AAGA,SAAgB,4BACd,MACA,WACiB;CACjB,MAAM,SAAS,CAAC,KAAK,cAAc,SAAS,UAAU,IAAI;CAC1D,OAAO;EACL;EACA,aAAa,yBAAyB,KAAK,UAAU,UAAU,IAAI,EAAE;EACrE,SAAS,SAAS,gBAAgB;CACpC;AACF;;AAGA,SAAgB,wBACd,MACA,WACiB;CAGjB,IAAI;CACJ,IAAI;CACJ,IAAI;EAEF,SAAS,IADM,OAAO,UAAU,SAAS,UAAU,KACzC,CAAC,CAAC,KAAK,KAAK,aAAa;EACnC,UAAU,SAAS,oBAAoB;CACzC,SAAS,KAAK;EACZ,SAAS;EACT,UAAU,kBAAkB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;CAC7E;CACA,OAAO;EACL;EACA,aAAa,qBAAqB,UAAU,QAAQ,GAAG,UAAU,SAAS,GAAG;EAC7E;CACF;AACF;;;;;;;;AAWA,SAAgB,kBACd,MACA,WACiB;CACjB,IAAI,SAAS;CACb,IAAI;CACJ,IAAI;EACF,SAAS,UAAU,GAAG,IAAI;EAC1B,UAAU,SAAS,4BAA4B;CACjD,SAAS,KAAK;EACZ,UAAU,oBAAoB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;CAC/E;CACA,OAAO;EACL;EACA,aAAa,UAAU,eAAe;EACtC;CACF;AACF;;;;AC7JA,SAAgB,cACd,MACA,WACA,UACiB;CACjB,MAAM,WAAW,UAAU,WAAW,KAAK,MAAM,SAAS,MAAM,CAAC,CAAC;CAClE,MAAM,SAAS,SAAS,OAAO,MAAM,EAAE,MAAM;CAC7C,MAAM,cAAc,SAAS,QAAQ,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC;CAEtD,OAAO;EACL;EACA,aAAa,WAAW,SAAS,OAAO,QAAQ,SAAS,WAAW,IAAI,KAAK,MAAM;EACnF,SAAS,SACL,eACA,GAAG,YAAY,MAAM,SAAS,OAAO;EACzC;CACF;AACF;;AAGA,SAAgB,cACd,MACA,WACA,UACiB;CACjB,MAAM,WAAW,UAAU,WAAW,KAAK,MAAM,SAAS,MAAM,CAAC,CAAC;CAClE,MAAM,cAAc,SAAS,QAAQ,MAAM,EAAE,MAAM,CAAC,CAAC;CACrD,MAAM,SAAS,cAAc;CAE7B,OAAO;EACL;EACA,aAAa,WAAW,SAAS,OAAO,QAAQ,SAAS,WAAW,IAAI,KAAK,MAAM;EACnF,SAAS,SAAS,GAAG,YAAY,WAAW;EAC5C;CACF;AACF;;AAGA,SAAgB,YACd,MACA,WACA,UACiB;CACjB,MAAM,QAAQ,SAAS,MAAM,UAAU,SAAS;CAChD,OAAO;EACL,QAAQ,CAAC,MAAM;EACf,aAAa,OAAO,MAAM,YAAY;EACtC,SAAS,MAAM,SACX,kCACA;EACJ,UAAU,CAAC,KAAK;CAClB;AACF;;;;;;;;;;AC9BA,SAAgB,SACd,MACA,WACiB;CACjB,QAAQ,UAAU,MAAlB;EAEE,KAAK,UACH,OAAO,eAAe,MAAM,SAAS;EACvC,KAAK,cACH,OAAO,kBAAkB,MAAM,SAAS;EAC1C,KAAK,iBACH,OAAO,oBAAoB,MAAM,SAAS;EAC5C,KAAK,iBACH,OAAO,oBAAoB,MAAM,SAAS;EAC5C,KAAK,iBACH,OAAO,qBAAqB,MAAM,SAAS;EAC7C,KAAK,YACH,OAAO,iBAAiB,MAAM,SAAS;EAGzC,KAAK,eACH,OAAO,mBAAmB,MAAM,SAAS;EAG3C,KAAK,gCACH,OAAO,kCAAkC,MAAM,SAAS;EAC1D,KAAK,qBACH,OAAO,yBAAyB,MAAM,SAAS;EACjD,KAAK,mBACH,OAAO,sBAAsB,MAAM,SAAS;EAC9C,KAAK,sBACH,OAAO,yBAAyB,MAAM,SAAS;EACjD,KAAK,iBACH,OAAO,qBAAqB,MAAM,SAAS;EAG7C,KAAK,qBACH,OAAO,yBAAyB,MAAM,SAAS;EACjD,KAAK,yBACH,OAAO,4BAA4B,MAAM,SAAS;EACpD,KAAK,oBACH,OAAO,wBAAwB,MAAM,SAAS;EAIhD,KAAK,UACH,OAAO,cAAc,MAAM,WAAW,QAAQ;EAChD,KAAK,UACH,OAAO,cAAc,MAAM,WAAW,QAAQ;EAChD,KAAK,OACH,OAAO,YAAY,MAAM,WAAW,QAAQ;EAG9C,KAAK,aACH,OAAO,kBAAkB,MAAM,SAAS;EAE1C,SAKE,MAAM,IAAI,MAAM,sBAAsB,KAAK,UAAUC,SAAW,GAAG;CAEvE;AACF;;;;;;AAOA,SAAgB,YACd,MACA,YACmB;CACnB,OAAO,WAAW,KAAK,MAAM,SAAS,MAAM,CAAC,CAAC;AAChD;;;AC7FA,MAAM,WAA2C,CAAC;AAElD,SAAS,gBAAgB,IAAY,SAA+B;CAClE,SAAS,MAAM;AACjB;AAEA,gBAAgB,eAAe,iBAAiB;;;;;;;AAQhD,SAAgB,gBAAgB,IAAY,SAA+B;CACzE,IAAI,SAAS,KACX,MAAM,IAAI,MAAM,YAAY,GAAG,wBAAwB;CAEzD,SAAS,MAAM;AACjB;;AAGA,SAAgB,eAAyB;CACvC,OAAO,OAAO,KAAK,QAAQ;AAC7B;;AAGA,SAAgB,WAAW,IAA4B;CACrD,MAAM,UAAU,SAAS;CACzB,IAAI,CAAC,SACH,MAAM,IAAI,MACR,oBAAoB,GAAG,gBAAgB,aAAa,CAAC,CAAC,KAAK,IAAI,GACjE;CAEF,OAAO;AACT;;AAGA,MAAa,qBAAqB;AAElC,SAAgB,oBAAoC;CAClD,OAAO,WAAW,kBAAkB;AACtC;;;;;;;;;;;ACpDA,SAAgB,mBACd,QACA,QACyB;CACzB,MAAM,SAAkC,CAAC;CACzC,KAAK,MAAM,SAAS,QAAQ;EAC1B,MAAM,EAAE,YAAY,GAAG,YAAY;EACnC,OAAO,OAAO,QAAQ,OAAO;EAC7B,IAAI,cAAc,OAAO,eAAe,UACtC,OAAO,OAAO,QAAQ,UAAU;CAEpC;CACA,OAAO,SAAS;CAChB,OAAO;AACT;;;;;AAMA,SAAgB,iBACd,WACA,QACA,QACmB;CACnB,IAAI,cAAA,iBAAoC,cAAc,eACpD,OAAO,mBAAmB,QAAQ,MAAM;CAG1C,MAAM,SAAkC,CAAC;CACzC,KAAK,MAAM,SAAS,QAClB,OAAO,OAAO,QAAQ,KAAK;CAE7B,OAAO,SAAS;CAChB,OAAO;AACT;;;;AC7BA,MAAa,sBAAsB;;AAGnC,MAAa,oBAAoB;;;;;;AAYjC,SAAgB,YACd,OACA,UACA,MAC6C;CAO7C,OAAO,iBANW,MAAM,WAAW,kBAAkB,CAAC,CAAC,IAMpB;EAJjC,MAAM,iBAAiB,CAAC;EACxB,SAAS,UAAU,CAAC;EACpB,KAAK;CAEiC,GAAG,SAAS,MAAM;AAC5D;;AAGA,SAAgB,eAAe,UAA4B;CACzD,OAAO,SAAS,eAAA;AAClB;;;;;;;AAQA,eAAsB,cACpB,UACA,OACA,QACA,iBACA,KACA,QAC2B;CAC3B,MAAM,UAAU,KAAK,IAAI;CAEzB,IAAI;EACF,MAAM,gBAAgB,MAAM,IAAI;GAC9B,GAAG;GACH,QAAQ,UAAU,OAAO;EAC3B,CAAC;EAOD,OAAO;GACL;GACA;GACA,OAAO;GACP,kBATuB,YACvB,cAAc,MACd,SAAS,WAAW,KAAK,MAAM,EAAE,SAAS,CAO3B;GACf,YAAY,KAAK,IAAI,IAAI;EAC3B;CACF,SAAS,KAAK;EACZ,OAAO;GACL;GACA,eAAe;GACf,OAAO,aAAa,GAAG;GACvB,kBAAkB,CAAC;GACnB,YAAY,KAAK,IAAI,IAAI;EAC3B;CACF;AACF;;;;;;;AAQA,SAAS,aAAa,KAA+B;CACnD,MAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;CAE/D,IAAI,cAA2C,CAAC;CAChD,IAAI,QAAQ,QAAQ,OAAO,QAAQ,YAAY,iBAAiB,KAAK;EACnE,MAAM,IAAK,IAAiC;EAC5C,IAAI,MAAM,QAAQ,OAAO,MAAM,UAC7B,cAAc;CAElB;CAEA,OAAO;EAAE;EAAS;CAAY;AAChC;;;;;;;AAQA,SAAgB,cACd,UACA,MACA,aACY;CACZ,MAAM,gBAAgB,YAAY,QAAQ,MAAM,EAAE,UAAU,IAAI,CAAC,CAAC;CAClE,MAAM,gBAAgB,YAAY,QAAQ,MAAM,EAAE,UAAU,IAAI;CAEhE,MAAM,iBAAkC,SAAS,WAAW,KACzD,aAAa,MAAM;EAClB,MAAM,YAAY,YAAY,aAAA;EAC9B,MAAM,cAAc,cAAc,QAC/B,MAAM,EAAE,iBAAiB,EAAE,EAAE,MAChC,CAAC,CAAC;EACF,MAAM,iBAAiB,cAAc;EACrC,MAAM,WAAW,mBAAmB,IAAI,IAAI,cAAc;EAM1D,OAAO;GACL,aAJA,cAAc,EAAE,EAAE,iBAAiB,EAAE,EAAE,eACvC,IAAI,YAAY,UAAU,KAAK;GAI/B;GACA;GACA;GACA;GACA,gBAAgB,iBAAiB,KAAK,YAAY;EACpD;CACF,CACF;CAEA,MAAM,SAAS,eAAe,OAAO,MAAM,EAAE,cAAc;CAE3D,OAAO;EACL,QAAQ,SAAS;EACjB,UAAU,SAAS;EACnB,OAAO,SAAS;EAChB,QAAQ,SAAS;EACjB,cAAc,SAAS;EACvB,sBAAsB,SAAS;EAC/B,eAAe,SAAS;EACxB;EACA;EACA;EACA;EACA;CACF;AACF;;;AC9JA,SAAgB,YAAY,KAA4B;CACtD,IAAI,CAAC,OAAO,UAAU,GAAG,KAAK,MAAM,GAClC,MAAM,IAAI,MAAM,oDAAoD,KAAK;CAG3E,IAAI,UAAU;;;;;CAKd,MAAM,UAA0B,CAAC;CAEjC,OAAO,OAAU,OAAqC;EAIpD,OAAO,WAAW,KAChB,MAAM,IAAI,SAAe,YAAY,QAAQ,KAAK,OAAO,CAAC;EAE5D;EAEA,IAAI;GACF,OAAO,MAAM,GAAG;EAClB,UAAU;GACR;GAGA,MAAM,OAAO,QAAQ,MAAM;GAC3B,IAAI,MAAM,KAAK;EACjB;CACF;AACF;;;;;;;;;;ACtBA,MAAM,yBAAyB;;;;;;AAc/B,eAAsB,SACpB,OACA,UAA2B,CAAC,GACN;CACtB,IAAI,MAAM,OAAO,WAAW,GAC1B,MAAM,IAAI,MAAM,uDAAuD;CAEzE,IAAI,MAAM,MAAM,WAAW,GACzB,MAAM,IAAI,MAAM,sDAAsD;CAGxE,MAAM,UACJ,QAAQ,WAAW,WAAW,MAAM,WAAW,kBAAkB,CAAC,CAAC,EAAE;CAEvE,MAAM,OAAqB,WAAW,QAAQ,IAAI,MAAM;CAGxD,MAAM,QAAQ,YADQ,QAAQ,iBAAiB,sBACR;CACvC,MAAM,aAAa,QAAQ;CAE3B,MAAM,UAAU,KAAK,IAAI;CACzB,MAAM,YAAY,IAAI,KAAK,OAAO,CAAC,CAAC,YAAY;CAEhD,MAAM,QAAgB,CAAC;CACvB,KAAK,MAAM,YAAY,MAAM,OAAO;EAClC,MAAM,OAAO,eAAe,QAAQ;EACpC,KAAK,MAAM,QAAQ,MAAM,QACvB,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,KACxB,MAAM,KAAK;GAAE;GAAU;GAAM,iBAAiB;EAAE,CAAC;CAGvD;CAEA,aAAa;EAAE,MAAM;EAAe,WAAW,MAAM;CAAO,CAAC;CAE7D,MAAM,0BAAU,IAAI,IAAgC;CAEpD,MAAM,aAAa,QAAgB,cACjC,GAAG,OAAO,IAAI;CAEhB,KAAK,MAAM,YAAY,MAAM,OAC3B,KAAK,MAAM,QAAQ,MAAM,QACvB,QAAQ,IAAI,UAAU,SAAS,IAAI,KAAK,KAAK,GAAG,CAAC,CAAC;CAItD,MAAM,QAAQ,IACZ,MAAM,KAAK,SACT,MAAM,YAAY;EAChB,IAAI,QAAQ,QAAQ,SAAS;EAE7B,aAAa;GACX,MAAM;GACN,QAAQ,KAAK,SAAS;GACtB,WAAW,KAAK,KAAK;GACrB,UAAU,KAAK;EACjB,CAAC;EAED,MAAM,SAAS,YAAY,OAAO,KAAK,UAAU,KAAK,IAAI;EAC1D,MAAM,SAAS,MAAM,cACnB,KAAK,UACL,KAAK,MACL,QACA,KAAK,iBACL,KACA,QAAQ,MACV;EAEA,QAAQ,IAAI,UAAU,KAAK,SAAS,IAAI,KAAK,KAAK,KAAK,CAAC,CAAC,CAAE,KAAK,MAAM;EAEtE,aAAa;GACX,MAAM;GACN,QAAQ,KAAK,SAAS;GACtB,WAAW,KAAK,KAAK;GACrB,UAAU,KAAK;GACf,IAAI,OAAO,UAAU;GACrB,YAAY,OAAO;GACnB,eAAe,OAAO,eAAe,KAAK,UAAU;GACpD,kBAAkB,OAAO;GACzB,cAAc,OAAO,OAAO;EAC9B,CAAC;CACH,CAAC,CACH,CACF;CAEA,MAAM,QAAsB,CAAC;CAC7B,KAAK,MAAM,YAAY,MAAM,OAC3B,KAAK,MAAM,QAAQ,MAAM,QAAQ;EAC/B,MAAM,OAAO,QAAQ,IAAI,UAAU,SAAS,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC;EACjE,KAAK,MAAM,GAAG,MAAM,EAAE,kBAAkB,EAAE,eAAe;EAEzD,MAAM,aAAa,cAAc,UAAU,MAAM,IAAI;EACrD,MAAM,KAAK,UAAU;EAErB,aAAa;GAAE,MAAM;GAAiB,QAAQ;EAAW,CAAC;CAC5D;CAGF,MAAM,SAAsB;EAC1B;EACA,YAAY,KAAK,IAAI,IAAI;EACzB;CACF;CAEA,aAAa;EAAE,MAAM;EAAkB;CAAO,CAAC;CAE/C,OAAO;AACT"}
1	+ {"version":3,"file":"suite-BcP64nlb.js","names":["predicateMatches","_exhaustive"],"sources":["../src/assertions/patterns.ts","../src/assertions/predicates.ts","../src/assertions/tool-calls.ts","../src/assertions/behavior.ts","../src/assertions/compound.ts","../src/assertions/evaluator.ts","../src/adapters/registry.ts","../src/config/resolve-config.ts","../src/runner/case.ts","../src/runner/limit.ts","../src/runner/suite.ts"],"sourcesContent":["/*\n Tool name pattern matching.\n \n Tool names follow conventions:\n * - Built-in tools: `Bash`, `Read`, `Edit`, `WebSearch`, etc.\n * - MCP tools: `mcp__<server>__<tool>`, e.g. `mcp__api__search_skills`.\n \n Patterns support `` as a glob wildcard. The most useful patterns for\n the skills-loading problem are namespace globs like `mcp__api__` —\n \"did any tool from the alis MCP server get called.\"\n /\n\nimport type { ToolPattern } from \"../types/assertions\";\n\n/\n Test whether a fully-qualified tool name matches a pattern.\n \n Literal patterns (no ``) match by string equality. Glob patterns are\n compiled to a regex on each call — fine for our scale (dozens of patterns,\n * thousands of calls per run). If this becomes a hot path, memoize.\n /\nexport function toolMatches(toolName: string, pattern: ToolPattern): boolean {\n const p = patternString(pattern);\n if (!p.includes(\"\")) return toolName === p;\n return globToRegex(p).test(toolName);\n}\n\n/** Extract the underlying string from either pattern form. /\nexport function patternString(pattern: ToolPattern): string {\n return typeof pattern === \"string\" ? pattern : pattern.pattern;\n}\n\n/* Human-readable representation for diagnostic messages. /\nexport function describePattern(pattern: ToolPattern): string {\n return patternString(pattern);\n}\n\n/\n Convert a glob (with `` wildcards only) to an anchored regex.\n Other regex metacharacters in the input are escaped.\n /\nfunction globToRegex(glob: string): RegExp {\n const escaped = glob\n .replace(/[.+?^${}()\|[\\]\\\\]/g, \"\\\\$&\") // escape regex specials\n .replace(/\\/g, \".\"); // → .\n return new RegExp(`^${escaped}$`);\n}\n","/\n Predicate engine for matching tool call arguments.\n \n Conceptually similar to MongoDB query selectors: a predicate is a tree\n * of conditions, applied recursively to a value. Examples:\n \n matches(\"hello world\", { contains: \"world\" }) // true\n * matches({ a: 1 }, { a: { gte: 0 } }) // true\n * matches({ a: { b: \"x\" } }, { a: { b: \"x\" } }) // true (scalar shortcut)\n * matches({ q: \"ab\" }, { any_of: [{equals:\"x\"}, {contains:\"a\"}] }) // ???\n \n Last example: the `any_of` applies to the value (`{q:\"ab\"}`), not to a\n * field. `equals:\"x\"` and `contains:\"a\"` are both leaf predicates that\n * apply to the whole value. `contains` requires a string, so it returns\n * false for the object. The whole thing returns false. That's deliberate.\n \n Disambiguation rule (single-key objects): a single-key object is interpreted as a leaf or compound predicate IF\n * the key matches a known operator name. Otherwise it falls through to\n * being treated as an object predicate (field name = key).\n \n This means a tool argument schema cannot have a top-level field named\n * `equals`, `contains`, `regex`, `any_of`, `all_of`, `not`, etc. — those\n * fields would be shadowed by predicate operators. For MCP tools, this\n * has never been a problem in practice; document it and move on.\n /\n\nimport type { Predicate } from \"../types/assertions\";\n\nconst LEAF_OPS = new Set([\n \"equals\",\n \"contains\",\n \"not_contains\",\n \"regex\",\n \"gte\",\n \"lte\",\n \"gt\",\n \"lt\",\n \"one_of\",\n]);\nconst COMPOUND_OPS = new Set([\"any_of\", \"all_of\", \"not\"]);\n\n/\n Apply a predicate to a value. Returns true if the value satisfies the\n * predicate, false otherwise.\n \n The `predicate` parameter is typed as `unknown` because YAML deserialization\n * produces unconstrained shapes; runtime dispatch is the validation.\n /\nexport function matches(value: unknown, predicate: unknown): boolean {\n // Scalar shortcut: anything that isn't a plain object (or is an array) is\n // treated as an equality target.\n if (!isPlainObject(predicate)) {\n return deepEquals(value, predicate);\n }\n\n const obj = predicate as Record<string, unknown>;\n const keys = Object.keys(obj);\n\n // Single-key object: check if it's a known operator.\n if (keys.length === 1) {\n const key = keys[0];\n\n if (COMPOUND_OPS.has(key)) {\n switch (key) {\n case \"any_of\":\n return (obj.any_of as Predicate[]).some((sub) => matches(value, sub));\n case \"all_of\":\n return (obj.all_of as Predicate[]).every((sub) =>\n matches(value, sub),\n );\n case \"not\":\n return !matches(value, obj.not);\n }\n }\n\n if (LEAF_OPS.has(key)) {\n return matchesLeaf(value, key, obj[key]);\n }\n\n // Single key but not a known operator → object predicate (field match).\n }\n\n // Object predicate: every key is a field on `value`, every key's value is\n // a sub-predicate that must hold for the corresponding field.\n if (!isPlainObject(value)) return false;\n const valueObj = value as Record<string, unknown>;\n\n for (const [field, subPred] of Object.entries(obj)) {\n if (!matches(valueObj[field], subPred)) return false;\n }\n return true;\n}\n\n/* Apply a single leaf operator to a value. Caller guarantees `op` is in LEAF_OPS. /\nfunction matchesLeaf(value: unknown, op: string, target: unknown): boolean {\n switch (op) {\n case \"equals\":\n return deepEquals(value, target);\n case \"contains\":\n return typeof value === \"string\" && value.includes(target as string);\n case \"not_contains\":\n return typeof value === \"string\" && !value.includes(target as string);\n case \"regex\":\n if (typeof value !== \"string\" \|\| typeof target !== \"string\") {\n return false;\n }\n try {\n return new RegExp(target).test(value);\n } catch {\n return false;\n }\n case \"gte\":\n return typeof value === \"number\" && value >= (target as number);\n case \"lte\":\n return typeof value === \"number\" && value <= (target as number);\n case \"gt\":\n return typeof value === \"number\" && value > (target as number);\n case \"lt\":\n return typeof value === \"number\" && value < (target as number);\n case \"one_of\":\n return (target as unknown[]).some((t) => deepEquals(value, t));\n default:\n throw new Error(`unknown leaf operator: ${op}`);\n }\n}\n\n/* True for non-null, non-array objects. /\nfunction isPlainObject(x: unknown): x is Record<string, unknown> {\n return typeof x === \"object\" && x !== null && !Array.isArray(x);\n}\n\n/\n Structural equality for unknown values. Used by `equals` and `one_of`.\n * Strict — no coercions, no NaN-equals-NaN special case (matches `===`).\n /\nfunction deepEquals(a: unknown, b: unknown): boolean {\n if (a === b) return true;\n if (typeof a !== typeof b) return false;\n if (a === null \|\| b === null) return false;\n if (typeof a !== \"object\") return false;\n\n if (Array.isArray(a) !== Array.isArray(b)) return false;\n if (Array.isArray(a) && Array.isArray(b)) {\n if (a.length !== b.length) return false;\n return a.every((v, i) => deepEquals(v, b[i]));\n }\n\n const aObj = a as Record<string, unknown>;\n const bObj = b as Record<string, unknown>;\n const aKeys = Object.keys(aObj);\n const bKeys = Object.keys(bObj);\n if (aKeys.length !== bKeys.length) return false;\n return aKeys.every((k) => deepEquals(aObj[k], bObj[k]));\n}\n","/\n Tool-call assertion evaluators.\n \n These assertions query the `toolCalls` array on the trajectory view:\n * presence, cardinality, ordering, and argument matching.\n \n Ordering is done on `turnIndex`, not wall-clock time. Parallel tool calls\n * within a single assistant turn share a turnIndex, which means \"A came\n * before B\" requires A's turn to strictly precede B's turn — calls within\n * the same turn are considered unordered. This is the right default\n * because Claude Code dispatches parallel calls concurrently and the\n * wall-clock ordering is non-deterministic.\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { ToolCall, TrajectoryView } from \"../types/trajectory\";\nimport { describeCardinality, parseCardinality } from \"./cardinality\";\nimport { describePattern, toolMatches } from \"./patterns\";\nimport { matches as predicateMatches } from \"./predicates\";\n\n// presence\n\n/* Assert a tool was called with optional cardinality (`times`). /\nexport function evaluateCalled(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called\" }>,\n): AssertionResult {\n const matching = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.tool),\n );\n const check = parseCardinality(assertion.times);\n const passed = check(matching.length);\n\n return {\n passed,\n description: `called(${describePattern(assertion.tool)}, ${describeCardinality(assertion.times)})`,\n details: passed\n ? `found ${matching.length} matching call(s)`\n : `found ${matching.length} call(s), expected ${describeCardinality(assertion.times)}`,\n matches: matching,\n };\n}\n\n/* Assert a tool was never called. /\nexport function evaluateNotCalled(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"not_called\" }>,\n): AssertionResult {\n const matching = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.tool),\n );\n const passed = matching.length === 0;\n\n return {\n passed,\n description: `not_called(${describePattern(assertion.tool)})`,\n details: passed\n ? \"no matching calls\"\n : `found ${matching.length} forbidden call(s)`,\n matches: matching,\n };\n}\n\n/* Assert at least one of the listed tools was called. /\nexport function evaluateCalledAnyOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_any_of\" }>,\n): AssertionResult {\n const allMatches: ToolCall[] = [];\n for (const pattern of assertion.tools) {\n allMatches.push(\n ...view.toolCalls.filter((c) => toolMatches(c.name, pattern)),\n );\n }\n const passed = allMatches.length > 0;\n return {\n passed,\n description: `called_any_of(${assertion.tools.map(describePattern).join(\", \")})`,\n details: passed\n ? `${allMatches.length} matching call(s)`\n : \"no calls matched any pattern\",\n matches: allMatches,\n };\n}\n\n/* Assert every listed tool was called at least once. /\nexport function evaluateCalledAllOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_all_of\" }>,\n): AssertionResult {\n const perPattern = assertion.tools.map((p) => ({\n pattern: p,\n matches: view.toolCalls.filter((c) => toolMatches(c.name, p)),\n }));\n const missing = perPattern.filter((p) => p.matches.length === 0);\n const passed = missing.length === 0;\n\n return {\n passed,\n description: `called_all_of(${assertion.tools.map(describePattern).join(\", \")})`,\n details: passed\n ? \"all patterns matched\"\n : `missing: ${missing.map((m) => describePattern(m.pattern)).join(\", \")}`,\n matches: perPattern.flatMap((p) => p.matches),\n };\n}\n\n// ordering\n\n/* Assert `first` tool's earliest turn strictly precedes `then` tool's earliest turn. /\nexport function evaluateCalledBefore(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_before\" }>,\n): AssertionResult {\n const firsts = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.first),\n );\n const thens = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.then),\n );\n const desc = `called_before(${describePattern(assertion.first)} → ${describePattern(assertion.then)})`;\n\n if (firsts.length === 0) {\n return {\n passed: false,\n description: desc,\n details: `no calls matching first`,\n };\n }\n if (thens.length === 0) {\n return {\n passed: false,\n description: desc,\n details: `no calls matching then`,\n };\n }\n\n // Earliest occurrence of each side, by turn. Strictly less than = \"before\".\n const earliestFirst = Math.min(...firsts.map((c) => c.turnIndex));\n const earliestThen = Math.min(...thens.map((c) => c.turnIndex));\n const passed = earliestFirst < earliestThen;\n\n return {\n passed,\n description: desc,\n details: passed\n ? `first @ turn ${earliestFirst}, then @ turn ${earliestThen}`\n : `first @ turn ${earliestFirst}, then @ turn ${earliestThen} (not before)`,\n matches: [...firsts, ...thens],\n };\n}\n\n/\n Assert tools appear in order.\n \n Non-strict mode allows interleaved calls; strict mode requires a contiguous subsequence.\n /\nexport function evaluateSequence(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"sequence\" }>,\n): AssertionResult {\n const { tools, strict = false } = assertion;\n const desc = `sequence([${tools.map(describePattern).join(\" → \")}]${strict ? \", strict\" : \"\"})`;\n\n if (tools.length === 0) {\n return {\n passed: true,\n description: desc,\n details: \"empty sequence trivially matches\",\n };\n }\n\n if (strict) {\n // Strict: the tools must appear in exact order with no other tool calls\n // interleaved. We look for a contiguous subsequence of the right shape.\n if (view.toolCalls.length < tools.length) {\n return {\n passed: false,\n description: desc,\n details: \"not enough tool calls\",\n };\n }\n for (\n let start = 0;\n start <= view.toolCalls.length - tools.length;\n start++\n ) {\n let ok = true;\n for (let i = 0; i < tools.length; i++) {\n if (!toolMatches(view.toolCalls[start + i].name, tools[i])) {\n ok = false;\n break;\n }\n }\n if (ok) {\n return {\n passed: true,\n description: desc,\n details: `matched at positions ${start}..${start + tools.length - 1}`,\n matches: view.toolCalls.slice(start, start + tools.length),\n };\n }\n }\n return { passed: false, description: desc, details: \"no contiguous match\" };\n }\n\n // Non-strict: tools must appear in order, interleaved calls allowed.\n // Walk the tool call list once, advancing the sequence pointer on each match.\n let idx = 0;\n const matched: ToolCall[] = [];\n for (const call of view.toolCalls) {\n if (idx < tools.length && toolMatches(call.name, tools[idx])) {\n matched.push(call);\n idx++;\n }\n }\n const passed = idx === tools.length;\n return {\n passed,\n description: desc,\n details: passed ? \"matched in order\" : `matched ${idx}/${tools.length}`,\n matches: matched,\n };\n}\n\n// arguments\n\n/* Assert at least one call to `tool` had arguments matching the predicate. /\nexport function evaluateCalledWith(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"called_with\" }>,\n): AssertionResult {\n const candidates = view.toolCalls.filter((c) =>\n toolMatches(c.name, assertion.tool),\n );\n const matching = candidates.filter((c) =>\n predicateMatches(c.args, assertion.args),\n );\n const passed = matching.length > 0;\n\n let details: string;\n if (passed) {\n details = `${matching.length} call(s) with matching args`;\n } else if (candidates.length === 0) {\n details = `no calls to ${describePattern(assertion.tool)} at all`;\n } else {\n details = `${candidates.length} call(s) but none with matching args`;\n }\n\n return {\n passed,\n description: `called_with(${describePattern(assertion.tool)}, args matching predicate)`,\n details,\n matches: matching,\n };\n}\n","/\n Behavior and response-text assertions.\n \n Cover everything that isn't a tool-call query:\n * - Did the agent answer without using any tool? (the \"blind answer\" case)\n * - Did it stay within iteration / cost / time budget?\n * - What did it say its stop reason was?\n * - Does the response text contain expected substrings or match a regex?\n * - Arbitrary user-supplied predicate (escape hatch).\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { TrajectoryView } from \"../types/trajectory\";\n\n// behavior\n\n/\n Was the response delivered without using any tool? This is the primary\n * failure mode detector for the skills-loading problem: when the harness\n * ignores the MCP, the trace shows zero tool calls and one terminal\n * assistant turn with finish reason `end_turn`.\n \n \"Without tool calls\" is defined as `toolCalls.length === 0` AND the\n * response text is non-empty (so we don't confuse \"answered blind\" with\n * \"session died before producing anything\").\n /\nexport function evaluateRespondedWithoutToolCalls(\n view: TrajectoryView,\n _assertion: Extract<Assertion, { type: \"responded_without_tool_calls\" }>,\n): AssertionResult {\n const passed = view.toolCalls.length === 0 && view.finalResponse.length > 0;\n return {\n passed,\n description: \"responded_without_tool_calls\",\n details: passed\n ? \"no tools called, response non-empty\"\n : view.toolCalls.length > 0\n ? `${view.toolCalls.length} tool call(s) made`\n : \"response was empty (session probably aborted)\",\n };\n}\n\n/* Assert the session stayed within the reported turn count. /\nexport function evaluateIterationsWithin(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"iterations_within\" }>,\n): AssertionResult {\n const n = view.usage.numTurns;\n const passed = n <= assertion.max;\n return {\n passed,\n description: `iterations_within(${assertion.max})`,\n details: `used ${n} turn(s)`,\n };\n}\n\n/* Assert total session cost in USD is within budget. /\nexport function evaluateCostWithinUsd(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"cost_within_usd\" }>,\n): AssertionResult {\n const cost = view.usage.totalCostUsd;\n const passed = cost <= assertion.max;\n return {\n passed,\n description: `cost_within_usd(${assertion.max.toFixed(4)})`,\n details: `used $${cost.toFixed(4)}`,\n };\n}\n\n/* Assert wall-clock session duration is within budget. /\nexport function evaluateDurationWithinMs(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"duration_within_ms\" }>,\n): AssertionResult {\n const ms = view.usage.durationMs;\n const passed = ms <= assertion.max;\n return {\n passed,\n description: `duration_within_ms(${assertion.max})`,\n details: `took ${ms}ms`,\n };\n}\n\n/* Assert the final stop reason matches one of the allowed values. /\nexport function evaluateFinishedWith(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"finished_with\" }>,\n): AssertionResult {\n const allowed = Array.isArray(assertion.reasons)\n ? assertion.reasons\n : [assertion.reasons];\n const actual = view.finalStopReason;\n const passed = actual !== null && allowed.includes(actual);\n return {\n passed,\n description: `finished_with(${allowed.join(\"\|\")})`,\n details: `actual: ${actual ?? \"(none)\"}`,\n };\n}\n\n// response text\n\n/* Assert `finalResponse` contains the given substring. /\nexport function evaluateResponseContains(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"response_contains\" }>,\n): AssertionResult {\n const passed = view.finalResponse.includes(assertion.text);\n return {\n passed,\n description: `response_contains(${JSON.stringify(assertion.text)})`,\n details: passed ? \"text found\" : \"text not in response\",\n };\n}\n\n/* Assert `finalResponse` does not contain the given substring. /\nexport function evaluateResponseNotContains(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"response_not_contains\" }>,\n): AssertionResult {\n const passed = !view.finalResponse.includes(assertion.text);\n return {\n passed,\n description: `response_not_contains(${JSON.stringify(assertion.text)})`,\n details: passed ? \"text absent\" : \"forbidden text found\",\n };\n}\n\n/* Assert `finalResponse` matches a regular expression. /\nexport function evaluateResponseMatches(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"response_matches\" }>,\n): AssertionResult {\n // Construction may throw on a malformed regex; surface that as a failure\n // rather than crashing the whole eval run.\n let passed: boolean;\n let details: string;\n try {\n const re = new RegExp(assertion.pattern, assertion.flags);\n passed = re.test(view.finalResponse);\n details = passed ? \"pattern matched\" : \"pattern did not match\";\n } catch (err) {\n passed = false;\n details = `invalid regex: ${err instanceof Error ? err.message : String(err)}`;\n }\n return {\n passed,\n description: `response_matches(/${assertion.pattern}/${assertion.flags ?? \"\"})`,\n details,\n };\n}\n\n// escape hatch\n\n/\n Run an arbitrary user-supplied predicate against the view.\n \n Only available from programmatic test definition (the YAML loader cannot\n * produce functions). Catches thrown errors and reports them as failures so\n * one bad predicate doesn't take down a whole eval run.\n /\nexport function evaluatePredicate(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"predicate\" }>,\n): AssertionResult {\n let passed = false;\n let details: string;\n try {\n passed = assertion.fn(view);\n details = passed ? \"predicate returned true\" : \"predicate returned false\";\n } catch (err) {\n details = `predicate threw: ${err instanceof Error ? err.message : String(err)}`;\n }\n return {\n passed,\n description: assertion.description ?? \"predicate(...)\",\n details,\n };\n}\n","/\n Compound assertion evaluators: `any_of`, `all_of`, `not`.\n \n These recurse into the main evaluator. To avoid a circular import between\n * this file and `evaluator.ts`, the dispatcher is passed in as a function\n * parameter rather than imported directly. The evaluator binds itself when\n * dispatching to these.\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { TrajectoryView } from \"../types/trajectory\";\n\n/\n Signature of the top-level dispatcher. Passed into compound evaluators so\n * they can recursively evaluate child assertions without a circular import.\n /\nexport type Evaluator = (\n view: TrajectoryView,\n assertion: Assertion,\n) => AssertionResult;\n\n/* Evaluate `all_of`: every child assertion must pass. /\nexport function evaluateAllOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"all_of\" }>,\n evaluate: Evaluator,\n): AssertionResult {\n const children = assertion.assertions.map((a) => evaluate(view, a));\n const passed = children.every((c) => c.passed);\n const failedCount = children.filter((c) => !c.passed).length;\n\n return {\n passed,\n description: `all_of (${children.length} child${children.length === 1 ? \"\" : \"ren\"})`,\n details: passed\n ? \"all passed\"\n : `${failedCount} of ${children.length} failed`,\n children,\n };\n}\n\n/* Evaluate `any_of`: at least one child assertion must pass. /\nexport function evaluateAnyOf(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"any_of\" }>,\n evaluate: Evaluator,\n): AssertionResult {\n const children = assertion.assertions.map((a) => evaluate(view, a));\n const passedCount = children.filter((c) => c.passed).length;\n const passed = passedCount > 0;\n\n return {\n passed,\n description: `any_of (${children.length} child${children.length === 1 ? \"\" : \"ren\"})`,\n details: passed ? `${passedCount} passed` : \"all failed\",\n children,\n };\n}\n\n/* Evaluate `not`: invert the inner assertion result. /\nexport function evaluateNot(\n view: TrajectoryView,\n assertion: Extract<Assertion, { type: \"not\" }>,\n evaluate: Evaluator,\n): AssertionResult {\n const child = evaluate(view, assertion.assertion);\n return {\n passed: !child.passed,\n description: `not(${child.description})`,\n details: child.passed\n ? \"inner passed (so outer fails)\"\n : \"inner failed (so outer passes)\",\n children: [child],\n };\n}\n","/\n Top-level assertion evaluator.\n \n Dispatches on the discriminant of the `Assertion` tagged union, delegating\n * to the per-kind evaluators in the sibling modules. This file deliberately\n * contains no logic of its own — keep it boring so adding a new assertion\n * type is just (a) extend the union in `types/assertions.ts`, (b) add an\n * evaluator function in the appropriate sibling, (c) add one case here.\n /\n\nimport type { Assertion, AssertionResult } from \"../types/assertions\";\nimport type { TrajectoryView } from \"../types/trajectory\";\n\nimport {\n evaluateCalled,\n evaluateCalledAllOf,\n evaluateCalledAnyOf,\n evaluateCalledBefore,\n evaluateCalledWith,\n evaluateNotCalled,\n evaluateSequence,\n} from \"./tool-calls\";\n\nimport {\n evaluateCostWithinUsd,\n evaluateDurationWithinMs,\n evaluateFinishedWith,\n evaluateIterationsWithin,\n evaluatePredicate,\n evaluateRespondedWithoutToolCalls,\n evaluateResponseContains,\n evaluateResponseMatches,\n evaluateResponseNotContains,\n} from \"./behavior\";\n\nimport { evaluateAllOf, evaluateAnyOf, evaluateNot } from \"./compound\";\n\n/\n Evaluate one assertion against a trajectory view.\n \n The switch is exhaustive — TypeScript's `never` check at the end will\n * flag any new variant added to the `Assertion` union that hasn't been\n * wired up here.\n /\nexport function evaluate(\n view: TrajectoryView,\n assertion: Assertion,\n): AssertionResult {\n switch (assertion.type) {\n // tool-call presence and ordering\n case \"called\":\n return evaluateCalled(view, assertion);\n case \"not_called\":\n return evaluateNotCalled(view, assertion);\n case \"called_any_of\":\n return evaluateCalledAnyOf(view, assertion);\n case \"called_all_of\":\n return evaluateCalledAllOf(view, assertion);\n case \"called_before\":\n return evaluateCalledBefore(view, assertion);\n case \"sequence\":\n return evaluateSequence(view, assertion);\n\n // tool-call arguments\n case \"called_with\":\n return evaluateCalledWith(view, assertion);\n\n // behavior\n case \"responded_without_tool_calls\":\n return evaluateRespondedWithoutToolCalls(view, assertion);\n case \"iterations_within\":\n return evaluateIterationsWithin(view, assertion);\n case \"cost_within_usd\":\n return evaluateCostWithinUsd(view, assertion);\n case \"duration_within_ms\":\n return evaluateDurationWithinMs(view, assertion);\n case \"finished_with\":\n return evaluateFinishedWith(view, assertion);\n\n // response text\n case \"response_contains\":\n return evaluateResponseContains(view, assertion);\n case \"response_not_contains\":\n return evaluateResponseNotContains(view, assertion);\n case \"response_matches\":\n return evaluateResponseMatches(view, assertion);\n\n // compound — pass the dispatcher in so they can recurse without\n // creating a circular import\n case \"all_of\":\n return evaluateAllOf(view, assertion, evaluate);\n case \"any_of\":\n return evaluateAnyOf(view, assertion, evaluate);\n case \"not\":\n return evaluateNot(view, assertion, evaluate);\n\n // escape hatch\n case \"predicate\":\n return evaluatePredicate(view, assertion);\n\n default: {\n // Exhaustiveness guard. If a new assertion variant is added to the\n // union and not wired into the switch above, TypeScript will fail\n // here at compile time. Don't remove this case.\n const _exhaustive: never = assertion;\n throw new Error(`unknown assertion: ${JSON.stringify(_exhaustive)}`);\n }\n }\n}\n\n/\n Evaluate a list of assertions independently. Used at the test-case level\n * where each top-level assertion is reported separately (and thresholded\n * separately, in the runner layer).\n /\nexport function evaluateAll(\n view: TrajectoryView,\n assertions: Assertion[],\n): AssertionResult[] {\n return assertions.map((a) => evaluate(view, a));\n}\n","/\n Default harness adapter registry.\n \n New adapters register here so the CLI and runner can resolve `adapter`\n * names from YAML without hard-coding imports at every call site.\n \n ## Adding a new harness adapter\n \n 1. Create an adapter module under `src/adapters/<id>/` implementing\n * {@link HarnessAdapter} from `./types`. Set `id` to match the YAML\n * `adapter` field (e.g. `\"codex\"`).\n * 2. Nest suite config under a camelCase key in {@link SuiteConfig}\n * (e.g. `codex: { ... }`) so each harness keeps its own options.\n * 3. Register at startup via {@link registerAdapter} — either in this\n * module for built-in adapters or from plugin/bootstrap code for\n * runtime extensions.\n * 4. Reference in suite YAML with `adapter: <id>` and the nested config\n * block; the runner calls `getAdapter(id).run(resolvedConfig)`.\n \n Built-in adapters are registered when this module loads. `claude-code` and\n * `codex` ship today; future harnesses (Gemini CLI, Antigravity CLI) follow\n * the same pattern in separate tracks.\n /\n\nimport type { HarnessAdapter } from \"./types\";\nimport { claudeCodeAdapter } from \"./claude-code/index\";\nimport { codexAdapter } from \"./codex/index\";\n\nconst ADAPTERS: Record<string, HarnessAdapter> = {};\n\nfunction registerBuiltIn(id: string, adapter: HarnessAdapter): void {\n ADAPTERS[id] = adapter;\n}\n\nregisterBuiltIn(\"claude-code\", claudeCodeAdapter);\nregisterBuiltIn(\"codex\", codexAdapter);\n\n/\n Register a harness adapter by id.\n \n Duplicate ids throw — registration is explicit so accidental overrides\n * surface immediately during startup or test setup.\n /\nexport function registerAdapter(id: string, adapter: HarnessAdapter): void {\n if (ADAPTERS[id]) {\n throw new Error(`adapter \"${id}\" is already registered`);\n }\n ADAPTERS[id] = adapter;\n}\n\n/* Return all registered adapter ids (built-in and runtime). /\nexport function listAdapters(): string[] {\n return Object.keys(ADAPTERS);\n}\n\n/* Resolve an adapter by id. Throws if unknown. /\nexport function getAdapter(id: string): HarnessAdapter {\n const adapter = ADAPTERS[id];\n if (!adapter) {\n throw new Error(\n `unknown adapter \"${id}\". Available: ${listAdapters().join(\", \")}`,\n );\n }\n return adapter;\n}\n\n/* Default adapter when YAML omits `adapter`. /\nexport const DEFAULT_ADAPTER_ID = \"claude-code\";\n\nexport function getDefaultAdapter(): HarnessAdapter {\n return getAdapter(DEFAULT_ADAPTER_ID);\n}\n","/\n Flatten nested suite config into harness-specific adapter config.\n \n Suite YAML nests adapter options under keys like `claudeCode`; adapters\n * expect a flat config object. This module merges layers and flattens per\n * adapter id.\n /\n\nimport { DEFAULT_ADAPTER_ID } from \"../adapters/registry\";\nimport type { BaseAdapterConfig } from \"../adapters/types\";\nimport type { ClaudeCodeAdapterConfig } from \"../adapters/claude-code/types\";\nimport type { CodexAdapterConfig } from \"../adapters/codex/types\";\nimport type { SuiteConfig } from \"../adapters/types\";\n\n/* Merged config passed to {@link HarnessAdapter.run}. /\nexport type ResolvedRunConfig = BaseAdapterConfig & Record<string, unknown>;\n\n/* Merge generic suite config layers into a flat {@link ClaudeCodeAdapterConfig}. /\nexport function toClaudeCodeConfig(\n layers: SuiteConfig[],\n prompt: string,\n): ClaudeCodeAdapterConfig {\n const merged: Record<string, unknown> = {};\n for (const layer of layers) {\n const { claudeCode, ...generic } = layer;\n Object.assign(merged, generic);\n if (claudeCode && typeof claudeCode === \"object\") {\n Object.assign(merged, claudeCode);\n }\n }\n merged.prompt = prompt;\n return merged as unknown as ClaudeCodeAdapterConfig;\n}\n\n/* Merge generic suite config layers into a flat {@link CodexAdapterConfig}. /\nexport function toCodexConfig(\n layers: SuiteConfig[],\n prompt: string,\n): CodexAdapterConfig {\n const merged: Record<string, unknown> = {};\n for (const layer of layers) {\n const { codex, ...generic } = layer;\n Object.assign(merged, generic);\n if (codex && typeof codex === \"object\") {\n Object.assign(merged, codex);\n }\n }\n merged.prompt = prompt;\n return merged as unknown as CodexAdapterConfig;\n}\n\n/\n Resolve merged suite layers into the flat config shape expected by the\n * selected harness adapter.\n /\nexport function resolveRunConfig(\n adapterId: string,\n layers: SuiteConfig[],\n prompt: string,\n): ResolvedRunConfig {\n if (adapterId === DEFAULT_ADAPTER_ID \|\| adapterId === \"claude-code\") {\n return toClaudeCodeConfig(layers, prompt) as ResolvedRunConfig;\n }\n\n if (adapterId === \"codex\") {\n return toCodexConfig(layers, prompt) as ResolvedRunConfig;\n }\n\n // Unknown adapters receive a shallow merge of all config layers.\n const merged: Record<string, unknown> = {};\n for (const layer of layers) {\n Object.assign(merged, layer);\n }\n merged.prompt = prompt;\n return merged as ResolvedRunConfig;\n}\n","/\n Case-level runner — config merge, single-repetition execution, and cell aggregation.\n \n The suite runner (`suite.ts`) fans out work; this module owns the per-rep\n * lifecycle: merge config layers, invoke the adapter, evaluate assertions, and\n * compute thresholded pass rates for one matrix cell.\n /\n\nimport type { AdapterDiagnostics, AdapterResult, BaseAdapterConfig } from \"../adapters/types\";\nimport { getDefaultAdapter } from \"../adapters/registry\";\nimport { resolveRunConfig } from \"../config/resolve-config\";\nimport { evaluateAll } from \"../assertions/evaluator\";\nimport type {\n AssertionStat,\n CellReport,\n MatrixCell,\n RepetitionError,\n RepetitionResult,\n TestCase,\n TestSuite,\n} from \"./types\";\n\n/* Default repetition count when `case.repetitions` is omitted. /\nexport const DEFAULT_REPETITIONS = 5;\n\n/* Default assertion pass-rate threshold when `threshold` is omitted. /\nexport const DEFAULT_THRESHOLD = 1.0;\n\n/* Injectable adapter run function (used by tests to stub harness I/O). /\nexport type AdapterRunFn = (\n config: BaseAdapterConfig & Record<string, unknown>,\n) => Promise<AdapterResult>;\n\n/\n Build the effective adapter config for one (suite, case, cell).\n \n Merge order (later wins): defaultConfig < case.config < cell.config.\n /\nexport function mergeConfig(\n suite: TestSuite,\n testCase: TestCase,\n cell: MatrixCell,\n): BaseAdapterConfig & Record<string, unknown> {\n const adapterId = suite.adapter ?? getDefaultAdapter().id;\n const layers = [\n suite.defaultConfig ?? {},\n testCase.config ?? {},\n cell.config,\n ];\n return resolveRunConfig(adapterId, layers, testCase.prompt);\n}\n\n/* Effective repetition count for a case (`case.repetitions` or default). /\nexport function getRepetitions(testCase: TestCase): number {\n return testCase.repetitions ?? DEFAULT_REPETITIONS;\n}\n\n/\n Run one repetition: invoke the adapter, evaluate assertions, capture errors.\n \n Adapter failures are returned as {@link RepetitionResult.error} rather than\n * thrown so the suite runner can continue other reps and report adapter error counts.\n /\nexport async function runRepetition(\n testCase: TestCase,\n _cell: MatrixCell,\n config: BaseAdapterConfig & Record<string, unknown>,\n repetitionIndex: number,\n run: AdapterRunFn,\n signal?: AbortSignal,\n): Promise<RepetitionResult> {\n const startTs = Date.now();\n\n try {\n const adapterResult = await run({\n ...config,\n signal: signal ?? config.signal,\n });\n\n const assertionResults = evaluateAll(\n adapterResult.view,\n testCase.assertions.map((t) => t.assertion),\n );\n\n return {\n repetitionIndex,\n adapterResult,\n error: null,\n assertionResults,\n durationMs: Date.now() - startTs,\n };\n } catch (err) {\n return {\n repetitionIndex,\n adapterResult: null,\n error: extractError(err),\n assertionResults: [],\n durationMs: Date.now() - startTs,\n };\n }\n}\n\n/\n Normalize thrown values into a {@link RepetitionError}.\n \n Preserves {@link AdapterDiagnostics} when the thrown value is an\n * {@link AdapterError} or carries a `diagnostics` property.\n /\nfunction extractError(err: unknown): RepetitionError {\n const message = err instanceof Error ? err.message : String(err);\n\n let diagnostics: Partial<AdapterDiagnostics> = {};\n if (err !== null && typeof err === \"object\" && \"diagnostics\" in err) {\n const d = (err as { diagnostics: unknown }).diagnostics;\n if (d !== null && typeof d === \"object\") {\n diagnostics = d as Partial<AdapterDiagnostics>;\n }\n }\n\n return { message, diagnostics };\n}\n\n/\n Roll up repetition results into a {@link CellReport}.\n \n Adapter errors reduce `evaluatedCount` but do not fail the cell by\n * themselves — only assertion threshold misses mark a cell as failed.\n /\nexport function aggregateCell(\n testCase: TestCase,\n cell: MatrixCell,\n repetitions: RepetitionResult[],\n): CellReport {\n const adapterErrors = repetitions.filter((r) => r.error !== null).length;\n const evaluatedReps = repetitions.filter((r) => r.error === null);\n\n const assertionStats: AssertionStat[] = testCase.assertions.map(\n (thresholded, i) => {\n const threshold = thresholded.threshold ?? DEFAULT_THRESHOLD;\n const passedCount = evaluatedReps.filter(\n (r) => r.assertionResults[i]?.passed,\n ).length;\n const evaluatedCount = evaluatedReps.length;\n const passRate = evaluatedCount === 0 ? 0 : passedCount / evaluatedCount;\n\n const description =\n evaluatedReps[0]?.assertionResults[i]?.description ??\n `(${thresholded.assertion.type})`;\n\n return {\n description,\n threshold,\n passedCount,\n evaluatedCount,\n passRate,\n meetsThreshold: evaluatedCount > 0 && passRate >= threshold,\n };\n },\n );\n\n const passed = assertionStats.every((s) => s.meetsThreshold);\n\n return {\n caseId: testCase.id,\n category: testCase.category,\n notes: testCase.notes,\n prompt: testCase.prompt,\n expectations: testCase.expectations,\n reference_trajectory: testCase.reference_trajectory,\n human_ratings: testCase.human_ratings,\n cell,\n repetitions,\n assertionStats,\n adapterErrors,\n passed,\n };\n}\n","/\n Promise-based concurrency limiter.\n \n Functionally equivalent to the `p-limit` package, inlined to avoid an\n * external dependency for ~20 lines of code.\n \n Usage:\n \n const limit = createLimit(4);\n * const results = await Promise.all(tasks.map(t => limit(() => run(t))));\n \n The limiter is unbounded in queue depth — it doesn't push back on the\n * caller. If you need bounded enqueue, wrap it.\n /\n\n/* A function that runs an async task under the concurrency limit. /\nexport type LimitedRunner = <T>(fn: () => Promise<T>) => Promise<T>;\n\nexport function createLimit(max: number): LimitedRunner {\n if (!Number.isInteger(max) \|\| max < 1) {\n throw new Error(`createLimit: max must be a positive integer, got ${max}`);\n }\n\n let running = 0;\n /\n FIFO list of resolvers belonging to tasks waiting for a slot. When a\n * running task finishes, the next resolver is invoked to wake one waiter.\n /\n const waiters: (() => void)[] = [];\n\n return async <T>(fn: () => Promise<T>): Promise<T> => {\n // Wait for a slot. The loop guards a race where another waiter could\n // grab the slot between our `await` resolving and our increment — in\n // single-threaded JS this is theoretical, but `while` is the right shape.\n while (running >= max) {\n await new Promise<void>((resolve) => waiters.push(resolve));\n }\n running++;\n\n try {\n return await fn();\n } finally {\n running--;\n // Wake exactly one waiter per finished task. Shifting from the front\n // gives FIFO behaviour — earlier callers get slots first.\n const next = waiters.shift();\n if (next) next();\n }\n };\n}\n","/\n Suite-level runner — fans out (case × cell × repetition) tasks with concurrency control.\n \n Tasks run under a {@link createLimit} pool; results are bucketed by case and\n * cell label, sorted by repetition index, then aggregated into a\n * {@link SuiteReport}.\n /\n\nimport { getAdapter, getDefaultAdapter } from \"../adapters/registry\";\nimport {\n aggregateCell,\n getRepetitions,\n mergeConfig,\n runRepetition,\n type AdapterRunFn,\n} from \"./case\";\nimport { createLimit } from \"./limit\";\nimport type {\n CellReport,\n MatrixCell,\n RepetitionResult,\n RunSuiteOptions,\n SuiteReport,\n TestCase,\n TestSuite,\n} from \"./types\";\n\nconst DEFAULT_MAX_CONCURRENT = 4;\n\n/* One unit of concurrent work: a single repetition for a (case, cell) pair. /\ninterface Task {\n testCase: TestCase;\n cell: MatrixCell;\n repetitionIndex: number;\n}\n\n/\n Execute an entire test suite and return an aggregated report.\n \n @throws When `suite.matrix` or `suite.cases` is empty.\n */\nexport async function runSuite(\n suite: TestSuite,\n options: RunSuiteOptions = {},\n): Promise<SuiteReport> {\n if (suite.matrix.length === 0) {\n throw new Error(\"runSuite: suite.matrix must contain at least one cell\");\n }\n if (suite.cases.length === 0) {\n throw new Error(\"runSuite: suite.cases must contain at least one case\");\n }\n\n const adapter =\n options.adapter ?? getAdapter(suite.adapter ?? getDefaultAdapter().id);\n\n const run: AdapterRunFn = (config) => adapter.run(config);\n\n const maxConcurrent = options.maxConcurrent ?? DEFAULT_MAX_CONCURRENT;\n const limit = createLimit(maxConcurrent);\n const onProgress = options.onProgress;\n\n const startTs = Date.now();\n const startedAt = new Date(startTs).toISOString();\n\n const tasks: Task[] = [];\n for (const testCase of suite.cases) {\n const reps = getRepetitions(testCase);\n for (const cell of suite.matrix) {\n for (let i = 0; i < reps; i++) {\n tasks.push({ testCase, cell, repetitionIndex: i });\n }\n }\n }\n\n onProgress?.({ kind: \"suite-start\", totalReps: tasks.length });\n\n const buckets = new Map<string, RepetitionResult[]>();\n // Stable key for grouping reps belonging to the same (case, cell).\n const bucketKey = (caseId: string, cellLabel: string) =>\n `${caseId}::${cellLabel}`;\n\n for (const testCase of suite.cases) {\n for (const cell of suite.matrix) {\n buckets.set(bucketKey(testCase.id, cell.label), []);\n }\n }\n\n await Promise.all(\n tasks.map((task) =>\n limit(async () => {\n if (options.signal?.aborted) return;\n\n onProgress?.({\n kind: \"rep-start\",\n caseId: task.testCase.id,\n cellLabel: task.cell.label,\n repIndex: task.repetitionIndex,\n });\n\n const config = mergeConfig(suite, task.testCase, task.cell);\n const result = await runRepetition(\n task.testCase,\n task.cell,\n config,\n task.repetitionIndex,\n run,\n options.signal,\n );\n\n buckets.get(bucketKey(task.testCase.id, task.cell.label))!.push(result);\n\n onProgress?.({\n kind: \"rep-complete\",\n caseId: task.testCase.id,\n cellLabel: task.cell.label,\n repIndex: task.repetitionIndex,\n ok: result.error === null,\n durationMs: result.durationMs,\n toolCallCount: result.adapterResult?.view.toolCalls.length,\n assertionResults: result.assertionResults,\n errorMessage: result.error?.message,\n });\n }),\n ),\n );\n\n const cells: CellReport[] = [];\n for (const testCase of suite.cases) {\n for (const cell of suite.matrix) {\n const reps = buckets.get(bucketKey(testCase.id, cell.label)) ?? [];\n reps.sort((a, b) => a.repetitionIndex - b.repetitionIndex);\n\n const cellReport = aggregateCell(testCase, cell, reps);\n cells.push(cellReport);\n\n onProgress?.({ kind: \"cell-complete\", report: cellReport });\n }\n }\n\n const report: SuiteReport = {\n startedAt,\n durationMs: Date.now() - startTs,\n cells,\n };\n\n onProgress?.({ kind: \"suite-complete\", report });\n\n return report;\n}\n"],"mappings":";;;;;;;;;;;AAqBA,SAAgB,YAAY,UAAkB,SAA+B;CAC3E,MAAM,IAAI,cAAc,OAAO;CAC/B,IAAI,CAAC,EAAE,SAAS,GAAG,GAAG,OAAO,aAAa;CAC1C,OAAO,YAAY,CAAC,CAAC,CAAC,KAAK,QAAQ;AACrC;;AAGA,SAAgB,cAAc,SAA8B;CAC1D,OAAO,OAAO,YAAY,WAAW,UAAU,QAAQ;AACzD;;AAGA,SAAgB,gBAAgB,SAA8B;CAC5D,OAAO,cAAc,OAAO;AAC9B;;;;;AAMA,SAAS,YAAY,MAAsB;CACzC,MAAM,UAAU,KACb,QAAQ,sBAAsB,MAAM,CAAC,CACrC,QAAQ,OAAO,IAAI;CACtB,OAAO,IAAI,OAAO,IAAI,QAAQ,EAAE;AAClC;;;AClBA,MAAM,2BAAW,IAAI,IAAI;CACvB;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;AACF,CAAC;AACD,MAAM,+BAAe,IAAI,IAAI;CAAC;CAAU;CAAU;AAAK,CAAC;;;;;;;;AASxD,SAAgB,QAAQ,OAAgB,WAA6B;CAGnE,IAAI,CAAC,cAAc,SAAS,GAC1B,OAAO,WAAW,OAAO,SAAS;CAGpC,MAAM,MAAM;CACZ,MAAM,OAAO,OAAO,KAAK,GAAG;CAG5B,IAAI,KAAK,WAAW,GAAG;EACrB,MAAM,MAAM,KAAK;EAEjB,IAAI,aAAa,IAAI,GAAG,GACtB,QAAQ,KAAR;GACE,KAAK,UACH,OAAQ,IAAI,OAAuB,MAAM,QAAQ,QAAQ,OAAO,GAAG,CAAC;GACtE,KAAK,UACH,OAAQ,IAAI,OAAuB,OAAO,QACxC,QAAQ,OAAO,GAAG,CACpB;GACF,KAAK,OACH,OAAO,CAAC,QAAQ,OAAO,IAAI,GAAG;EAClC;EAGF,IAAI,SAAS,IAAI,GAAG,GAClB,OAAO,YAAY,OAAO,KAAK,IAAI,IAAI;CAI3C;CAIA,IAAI,CAAC,cAAc,KAAK,GAAG,OAAO;CAClC,MAAM,WAAW;CAEjB,KAAK,MAAM,CAAC,OAAO,YAAY,OAAO,QAAQ,GAAG,GAC/C,IAAI,CAAC,QAAQ,SAAS,QAAQ,OAAO,GAAG,OAAO;CAEjD,OAAO;AACT;;AAGA,SAAS,YAAY,OAAgB,IAAY,QAA0B;CACzE,QAAQ,IAAR;EACE,KAAK,UACH,OAAO,WAAW,OAAO,MAAM;EACjC,KAAK,YACH,OAAO,OAAO,UAAU,YAAY,MAAM,SAAS,MAAgB;EACrE,KAAK,gBACH,OAAO,OAAO,UAAU,YAAY,CAAC,MAAM,SAAS,MAAgB;EACtE,KAAK;GACH,IAAI,OAAO,UAAU,YAAY,OAAO,WAAW,UACjD,OAAO;GAET,IAAI;IACF,OAAO,IAAI,OAAO,MAAM,CAAC,CAAC,KAAK,KAAK;GACtC,QAAQ;IACN,OAAO;GACT;EACF,KAAK,OACH,OAAO,OAAO,UAAU,YAAY,SAAU;EAChD,KAAK,OACH,OAAO,OAAO,UAAU,YAAY,SAAU;EAChD,KAAK,MACH,OAAO,OAAO,UAAU,YAAY,QAAS;EAC/C,KAAK,MACH,OAAO,OAAO,UAAU,YAAY,QAAS;EAC/C,KAAK,UACH,OAAQ,OAAqB,MAAM,MAAM,WAAW,OAAO,CAAC,CAAC;EAC/D,SACE,MAAM,IAAI,MAAM,0BAA0B,IAAI;CAClD;AACF;;AAGA,SAAS,cAAc,GAA0C;CAC/D,OAAO,OAAO,MAAM,YAAY,MAAM,QAAQ,CAAC,MAAM,QAAQ,CAAC;AAChE;;;;;AAMA,SAAS,WAAW,GAAY,GAAqB;CACnD,IAAI,MAAM,GAAG,OAAO;CACpB,IAAI,OAAO,MAAM,OAAO,GAAG,OAAO;CAClC,IAAI,MAAM,QAAQ,MAAM,MAAM,OAAO;CACrC,IAAI,OAAO,MAAM,UAAU,OAAO;CAElC,IAAI,MAAM,QAAQ,CAAC,MAAM,MAAM,QAAQ,CAAC,GAAG,OAAO;CAClD,IAAI,MAAM,QAAQ,CAAC,KAAK,MAAM,QAAQ,CAAC,GAAG;EACxC,IAAI,EAAE,WAAW,EAAE,QAAQ,OAAO;EAClC,OAAO,EAAE,OAAO,GAAG,MAAM,WAAW,GAAG,EAAE,EAAE,CAAC;CAC9C;CAEA,MAAM,OAAO;CACb,MAAM,OAAO;CACb,MAAM,QAAQ,OAAO,KAAK,IAAI;CAC9B,MAAM,QAAQ,OAAO,KAAK,IAAI;CAC9B,IAAI,MAAM,WAAW,MAAM,QAAQ,OAAO;CAC1C,OAAO,MAAM,OAAO,MAAM,WAAW,KAAK,IAAI,KAAK,EAAE,CAAC;AACxD;;;;AClIA,SAAgB,eACd,MACA,WACiB;CACjB,MAAM,WAAW,KAAK,UAAU,QAAQ,MACtC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CAEA,MAAM,SADQ,iBAAiB,UAAU,KACtB,CAAC,CAAC,SAAS,MAAM;CAEpC,OAAO;EACL;EACA,aAAa,UAAU,gBAAgB,UAAU,IAAI,EAAE,IAAI,oBAAoB,UAAU,KAAK,EAAE;EAChG,SAAS,SACL,SAAS,SAAS,OAAO,qBACzB,SAAS,SAAS,OAAO,qBAAqB,oBAAoB,UAAU,KAAK;EACrF,SAAS;CACX;AACF;;AAGA,SAAgB,kBACd,MACA,WACiB;CACjB,MAAM,WAAW,KAAK,UAAU,QAAQ,MACtC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CACA,MAAM,SAAS,SAAS,WAAW;CAEnC,OAAO;EACL;EACA,aAAa,cAAc,gBAAgB,UAAU,IAAI,EAAE;EAC3D,SAAS,SACL,sBACA,SAAS,SAAS,OAAO;EAC7B,SAAS;CACX;AACF;;AAGA,SAAgB,oBACd,MACA,WACiB;CACjB,MAAM,aAAyB,CAAC;CAChC,KAAK,MAAM,WAAW,UAAU,OAC9B,WAAW,KACT,GAAG,KAAK,UAAU,QAAQ,MAAM,YAAY,EAAE,MAAM,OAAO,CAAC,CAC9D;CAEF,MAAM,SAAS,WAAW,SAAS;CACnC,OAAO;EACL;EACA,aAAa,iBAAiB,UAAU,MAAM,IAAI,eAAe,CAAC,CAAC,KAAK,IAAI,EAAE;EAC9E,SAAS,SACL,GAAG,WAAW,OAAO,qBACrB;EACJ,SAAS;CACX;AACF;;AAGA,SAAgB,oBACd,MACA,WACiB;CACjB,MAAM,aAAa,UAAU,MAAM,KAAK,OAAO;EAC7C,SAAS;EACT,SAAS,KAAK,UAAU,QAAQ,MAAM,YAAY,EAAE,MAAM,CAAC,CAAC;CAC9D,EAAE;CACF,MAAM,UAAU,WAAW,QAAQ,MAAM,EAAE,QAAQ,WAAW,CAAC;CAC/D,MAAM,SAAS,QAAQ,WAAW;CAElC,OAAO;EACL;EACA,aAAa,iBAAiB,UAAU,MAAM,IAAI,eAAe,CAAC,CAAC,KAAK,IAAI,EAAE;EAC9E,SAAS,SACL,yBACA,YAAY,QAAQ,KAAK,MAAM,gBAAgB,EAAE,OAAO,CAAC,CAAC,CAAC,KAAK,IAAI;EACxE,SAAS,WAAW,SAAS,MAAM,EAAE,OAAO;CAC9C;AACF;;AAKA,SAAgB,qBACd,MACA,WACiB;CACjB,MAAM,SAAS,KAAK,UAAU,QAAQ,MACpC,YAAY,EAAE,MAAM,UAAU,KAAK,CACrC;CACA,MAAM,QAAQ,KAAK,UAAU,QAAQ,MACnC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CACA,MAAM,OAAO,iBAAiB,gBAAgB,UAAU,KAAK,EAAE,KAAK,gBAAgB,UAAU,IAAI,EAAE;CAEpG,IAAI,OAAO,WAAW,GACpB,OAAO;EACL,QAAQ;EACR,aAAa;EACb,SAAS;CACX;CAEF,IAAI,MAAM,WAAW,GACnB,OAAO;EACL,QAAQ;EACR,aAAa;EACb,SAAS;CACX;CAIF,MAAM,gBAAgB,KAAK,IAAI,GAAG,OAAO,KAAK,MAAM,EAAE,SAAS,CAAC;CAChE,MAAM,eAAe,KAAK,IAAI,GAAG,MAAM,KAAK,MAAM,EAAE,SAAS,CAAC;CAC9D,MAAM,SAAS,gBAAgB;CAE/B,OAAO;EACL;EACA,aAAa;EACb,SAAS,SACL,gBAAgB,cAAc,gBAAgB,iBAC9C,gBAAgB,cAAc,gBAAgB,aAAa;EAC/D,SAAS,CAAC,GAAG,QAAQ,GAAG,KAAK;CAC/B;AACF;;;;;;AAOA,SAAgB,iBACd,MACA,WACiB;CACjB,MAAM,EAAE,OAAO,SAAS,UAAU;CAClC,MAAM,OAAO,aAAa,MAAM,IAAI,eAAe,CAAC,CAAC,KAAK,KAAK,EAAE,GAAG,SAAS,aAAa,GAAG;CAE7F,IAAI,MAAM,WAAW,GACnB,OAAO;EACL,QAAQ;EACR,aAAa;EACb,SAAS;CACX;CAGF,IAAI,QAAQ;EAGV,IAAI,KAAK,UAAU,SAAS,MAAM,QAChC,OAAO;GACL,QAAQ;GACR,aAAa;GACb,SAAS;EACX;EAEF,KACE,IAAI,QAAQ,GACZ,SAAS,KAAK,UAAU,SAAS,MAAM,QACvC,SACA;GACA,IAAI,KAAK;GACT,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAChC,IAAI,CAAC,YAAY,KAAK,UAAU,QAAQ,EAAE,CAAC,MAAM,MAAM,EAAE,GAAG;IAC1D,KAAK;IACL;GACF;GAEF,IAAI,IACF,OAAO;IACL,QAAQ;IACR,aAAa;IACb,SAAS,wBAAwB,MAAM,IAAI,QAAQ,MAAM,SAAS;IAClE,SAAS,KAAK,UAAU,MAAM,OAAO,QAAQ,MAAM,MAAM;GAC3D;EAEJ;EACA,OAAO;GAAE,QAAQ;GAAO,aAAa;GAAM,SAAS;EAAsB;CAC5E;CAIA,IAAI,MAAM;CACV,MAAM,UAAsB,CAAC;CAC7B,KAAK,MAAM,QAAQ,KAAK,WACtB,IAAI,MAAM,MAAM,UAAU,YAAY,KAAK,MAAM,MAAM,IAAI,GAAG;EAC5D,QAAQ,KAAK,IAAI;EACjB;CACF;CAEF,MAAM,SAAS,QAAQ,MAAM;CAC7B,OAAO;EACL;EACA,aAAa;EACb,SAAS,SAAS,qBAAqB,WAAW,IAAI,GAAG,MAAM;EAC/D,SAAS;CACX;AACF;;AAKA,SAAgB,mBACd,MACA,WACiB;CACjB,MAAM,aAAa,KAAK,UAAU,QAAQ,MACxC,YAAY,EAAE,MAAM,UAAU,IAAI,CACpC;CACA,MAAM,WAAW,WAAW,QAAQ,MAClCA,QAAiB,EAAE,MAAM,UAAU,IAAI,CACzC;CACA,MAAM,SAAS,SAAS,SAAS;CAEjC,IAAI;CACJ,IAAI,QACF,UAAU,GAAG,SAAS,OAAO;MACxB,IAAI,WAAW,WAAW,GAC/B,UAAU,eAAe,gBAAgB,UAAU,IAAI,EAAE;MAEzD,UAAU,GAAG,WAAW,OAAO;CAGjC,OAAO;EACL;EACA,aAAa,eAAe,gBAAgB,UAAU,IAAI,EAAE;EAC5D;EACA,SAAS;CACX;AACF;;;;;;;;;;;;;ACrOA,SAAgB,kCACd,MACA,YACiB;CACjB,MAAM,SAAS,KAAK,UAAU,WAAW,KAAK,KAAK,cAAc,SAAS;CAC1E,OAAO;EACL;EACA,aAAa;EACb,SAAS,SACL,wCACA,KAAK,UAAU,SAAS,IACtB,GAAG,KAAK,UAAU,OAAO,sBACzB;CACR;AACF;;AAGA,SAAgB,yBACd,MACA,WACiB;CACjB,MAAM,IAAI,KAAK,MAAM;CAErB,OAAO;EACL,QAFa,KAAK,UAAU;EAG5B,aAAa,qBAAqB,UAAU,IAAI;EAChD,SAAS,QAAQ,EAAE;CACrB;AACF;;AAGA,SAAgB,sBACd,MACA,WACiB;CACjB,MAAM,OAAO,KAAK,MAAM;CAExB,OAAO;EACL,QAFa,QAAQ,UAAU;EAG/B,aAAa,mBAAmB,UAAU,IAAI,QAAQ,CAAC,EAAE;EACzD,SAAS,SAAS,KAAK,QAAQ,CAAC;CAClC;AACF;;AAGA,SAAgB,yBACd,MACA,WACiB;CACjB,MAAM,KAAK,KAAK,MAAM;CAEtB,OAAO;EACL,QAFa,MAAM,UAAU;EAG7B,aAAa,sBAAsB,UAAU,IAAI;EACjD,SAAS,QAAQ,GAAG;CACtB;AACF;;AAGA,SAAgB,qBACd,MACA,WACiB;CACjB,MAAM,UAAU,MAAM,QAAQ,UAAU,OAAO,IAC3C,UAAU,UACV,CAAC,UAAU,OAAO;CACtB,MAAM,SAAS,KAAK;CAEpB,OAAO;EACL,QAFa,WAAW,QAAQ,QAAQ,SAAS,MAAM;EAGvD,aAAa,iBAAiB,QAAQ,KAAK,GAAG,EAAE;EAChD,SAAS,WAAW,UAAU;CAChC;AACF;;AAKA,SAAgB,yBACd,MACA,WACiB;CACjB,MAAM,SAAS,KAAK,cAAc,SAAS,UAAU,IAAI;CACzD,OAAO;EACL;EACA,aAAa,qBAAqB,KAAK,UAAU,UAAU,IAAI,EAAE;EACjE,SAAS,SAAS,eAAe;CACnC;AACF;;AAGA,SAAgB,4BACd,MACA,WACiB;CACjB,MAAM,SAAS,CAAC,KAAK,cAAc,SAAS,UAAU,IAAI;CAC1D,OAAO;EACL;EACA,aAAa,yBAAyB,KAAK,UAAU,UAAU,IAAI,EAAE;EACrE,SAAS,SAAS,gBAAgB;CACpC;AACF;;AAGA,SAAgB,wBACd,MACA,WACiB;CAGjB,IAAI;CACJ,IAAI;CACJ,IAAI;EAEF,SAAS,IADM,OAAO,UAAU,SAAS,UAAU,KACzC,CAAC,CAAC,KAAK,KAAK,aAAa;EACnC,UAAU,SAAS,oBAAoB;CACzC,SAAS,KAAK;EACZ,SAAS;EACT,UAAU,kBAAkB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;CAC7E;CACA,OAAO;EACL;EACA,aAAa,qBAAqB,UAAU,QAAQ,GAAG,UAAU,SAAS,GAAG;EAC7E;CACF;AACF;;;;;;;;AAWA,SAAgB,kBACd,MACA,WACiB;CACjB,IAAI,SAAS;CACb,IAAI;CACJ,IAAI;EACF,SAAS,UAAU,GAAG,IAAI;EAC1B,UAAU,SAAS,4BAA4B;CACjD,SAAS,KAAK;EACZ,UAAU,oBAAoB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;CAC/E;CACA,OAAO;EACL;EACA,aAAa,UAAU,eAAe;EACtC;CACF;AACF;;;;AC7JA,SAAgB,cACd,MACA,WACA,UACiB;CACjB,MAAM,WAAW,UAAU,WAAW,KAAK,MAAM,SAAS,MAAM,CAAC,CAAC;CAClE,MAAM,SAAS,SAAS,OAAO,MAAM,EAAE,MAAM;CAC7C,MAAM,cAAc,SAAS,QAAQ,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC;CAEtD,OAAO;EACL;EACA,aAAa,WAAW,SAAS,OAAO,QAAQ,SAAS,WAAW,IAAI,KAAK,MAAM;EACnF,SAAS,SACL,eACA,GAAG,YAAY,MAAM,SAAS,OAAO;EACzC;CACF;AACF;;AAGA,SAAgB,cACd,MACA,WACA,UACiB;CACjB,MAAM,WAAW,UAAU,WAAW,KAAK,MAAM,SAAS,MAAM,CAAC,CAAC;CAClE,MAAM,cAAc,SAAS,QAAQ,MAAM,EAAE,MAAM,CAAC,CAAC;CACrD,MAAM,SAAS,cAAc;CAE7B,OAAO;EACL;EACA,aAAa,WAAW,SAAS,OAAO,QAAQ,SAAS,WAAW,IAAI,KAAK,MAAM;EACnF,SAAS,SAAS,GAAG,YAAY,WAAW;EAC5C;CACF;AACF;;AAGA,SAAgB,YACd,MACA,WACA,UACiB;CACjB,MAAM,QAAQ,SAAS,MAAM,UAAU,SAAS;CAChD,OAAO;EACL,QAAQ,CAAC,MAAM;EACf,aAAa,OAAO,MAAM,YAAY;EACtC,SAAS,MAAM,SACX,kCACA;EACJ,UAAU,CAAC,KAAK;CAClB;AACF;;;;;;;;;;AC9BA,SAAgB,SACd,MACA,WACiB;CACjB,QAAQ,UAAU,MAAlB;EAEE,KAAK,UACH,OAAO,eAAe,MAAM,SAAS;EACvC,KAAK,cACH,OAAO,kBAAkB,MAAM,SAAS;EAC1C,KAAK,iBACH,OAAO,oBAAoB,MAAM,SAAS;EAC5C,KAAK,iBACH,OAAO,oBAAoB,MAAM,SAAS;EAC5C,KAAK,iBACH,OAAO,qBAAqB,MAAM,SAAS;EAC7C,KAAK,YACH,OAAO,iBAAiB,MAAM,SAAS;EAGzC,KAAK,eACH,OAAO,mBAAmB,MAAM,SAAS;EAG3C,KAAK,gCACH,OAAO,kCAAkC,MAAM,SAAS;EAC1D,KAAK,qBACH,OAAO,yBAAyB,MAAM,SAAS;EACjD,KAAK,mBACH,OAAO,sBAAsB,MAAM,SAAS;EAC9C,KAAK,sBACH,OAAO,yBAAyB,MAAM,SAAS;EACjD,KAAK,iBACH,OAAO,qBAAqB,MAAM,SAAS;EAG7C,KAAK,qBACH,OAAO,yBAAyB,MAAM,SAAS;EACjD,KAAK,yBACH,OAAO,4BAA4B,MAAM,SAAS;EACpD,KAAK,oBACH,OAAO,wBAAwB,MAAM,SAAS;EAIhD,KAAK,UACH,OAAO,cAAc,MAAM,WAAW,QAAQ;EAChD,KAAK,UACH,OAAO,cAAc,MAAM,WAAW,QAAQ;EAChD,KAAK,OACH,OAAO,YAAY,MAAM,WAAW,QAAQ;EAG9C,KAAK,aACH,OAAO,kBAAkB,MAAM,SAAS;EAE1C,SAKE,MAAM,IAAI,MAAM,sBAAsB,KAAK,UAAUC,SAAW,GAAG;CAEvE;AACF;;;;;;AAOA,SAAgB,YACd,MACA,YACmB;CACnB,OAAO,WAAW,KAAK,MAAM,SAAS,MAAM,CAAC,CAAC;AAChD;;;AC5FA,MAAM,WAA2C,CAAC;AAElD,SAAS,gBAAgB,IAAY,SAA+B;CAClE,SAAS,MAAM;AACjB;AAEA,gBAAgB,eAAe,iBAAiB;AAChD,gBAAgB,SAAS,YAAY;;;;;;;AAQrC,SAAgB,gBAAgB,IAAY,SAA+B;CACzE,IAAI,SAAS,KACX,MAAM,IAAI,MAAM,YAAY,GAAG,wBAAwB;CAEzD,SAAS,MAAM;AACjB;;AAGA,SAAgB,eAAyB;CACvC,OAAO,OAAO,KAAK,QAAQ;AAC7B;;AAGA,SAAgB,WAAW,IAA4B;CACrD,MAAM,UAAU,SAAS;CACzB,IAAI,CAAC,SACH,MAAM,IAAI,MACR,oBAAoB,GAAG,gBAAgB,aAAa,CAAC,CAAC,KAAK,IAAI,GACjE;CAEF,OAAO;AACT;;AAGA,MAAa,qBAAqB;AAElC,SAAgB,oBAAoC;CAClD,OAAO,WAAW,kBAAkB;AACtC;;;;;;;;;;;ACrDA,SAAgB,mBACd,QACA,QACyB;CACzB,MAAM,SAAkC,CAAC;CACzC,KAAK,MAAM,SAAS,QAAQ;EAC1B,MAAM,EAAE,YAAY,GAAG,YAAY;EACnC,OAAO,OAAO,QAAQ,OAAO;EAC7B,IAAI,cAAc,OAAO,eAAe,UACtC,OAAO,OAAO,QAAQ,UAAU;CAEpC;CACA,OAAO,SAAS;CAChB,OAAO;AACT;;AAGA,SAAgB,cACd,QACA,QACoB;CACpB,MAAM,SAAkC,CAAC;CACzC,KAAK,MAAM,SAAS,QAAQ;EAC1B,MAAM,EAAE,OAAO,GAAG,YAAY;EAC9B,OAAO,OAAO,QAAQ,OAAO;EAC7B,IAAI,SAAS,OAAO,UAAU,UAC5B,OAAO,OAAO,QAAQ,KAAK;CAE/B;CACA,OAAO,SAAS;CAChB,OAAO;AACT;;;;;AAMA,SAAgB,iBACd,WACA,QACA,QACmB;CACnB,IAAI,cAAA,iBAAoC,cAAc,eACpD,OAAO,mBAAmB,QAAQ,MAAM;CAG1C,IAAI,cAAc,SAChB,OAAO,cAAc,QAAQ,MAAM;CAIrC,MAAM,SAAkC,CAAC;CACzC,KAAK,MAAM,SAAS,QAClB,OAAO,OAAO,QAAQ,KAAK;CAE7B,OAAO,SAAS;CAChB,OAAO;AACT;;;;ACpDA,MAAa,sBAAsB;;AAGnC,MAAa,oBAAoB;;;;;;AAYjC,SAAgB,YACd,OACA,UACA,MAC6C;CAO7C,OAAO,iBANW,MAAM,WAAW,kBAAkB,CAAC,CAAC,IAMpB;EAJjC,MAAM,iBAAiB,CAAC;EACxB,SAAS,UAAU,CAAC;EACpB,KAAK;CAEiC,GAAG,SAAS,MAAM;AAC5D;;AAGA,SAAgB,eAAe,UAA4B;CACzD,OAAO,SAAS,eAAA;AAClB;;;;;;;AAQA,eAAsB,cACpB,UACA,OACA,QACA,iBACA,KACA,QAC2B;CAC3B,MAAM,UAAU,KAAK,IAAI;CAEzB,IAAI;EACF,MAAM,gBAAgB,MAAM,IAAI;GAC9B,GAAG;GACH,QAAQ,UAAU,OAAO;EAC3B,CAAC;EAOD,OAAO;GACL;GACA;GACA,OAAO;GACP,kBATuB,YACvB,cAAc,MACd,SAAS,WAAW,KAAK,MAAM,EAAE,SAAS,CAO3B;GACf,YAAY,KAAK,IAAI,IAAI;EAC3B;CACF,SAAS,KAAK;EACZ,OAAO;GACL;GACA,eAAe;GACf,OAAO,aAAa,GAAG;GACvB,kBAAkB,CAAC;GACnB,YAAY,KAAK,IAAI,IAAI;EAC3B;CACF;AACF;;;;;;;AAQA,SAAS,aAAa,KAA+B;CACnD,MAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;CAE/D,IAAI,cAA2C,CAAC;CAChD,IAAI,QAAQ,QAAQ,OAAO,QAAQ,YAAY,iBAAiB,KAAK;EACnE,MAAM,IAAK,IAAiC;EAC5C,IAAI,MAAM,QAAQ,OAAO,MAAM,UAC7B,cAAc;CAElB;CAEA,OAAO;EAAE;EAAS;CAAY;AAChC;;;;;;;AAQA,SAAgB,cACd,UACA,MACA,aACY;CACZ,MAAM,gBAAgB,YAAY,QAAQ,MAAM,EAAE,UAAU,IAAI,CAAC,CAAC;CAClE,MAAM,gBAAgB,YAAY,QAAQ,MAAM,EAAE,UAAU,IAAI;CAEhE,MAAM,iBAAkC,SAAS,WAAW,KACzD,aAAa,MAAM;EAClB,MAAM,YAAY,YAAY,aAAA;EAC9B,MAAM,cAAc,cAAc,QAC/B,MAAM,EAAE,iBAAiB,EAAE,EAAE,MAChC,CAAC,CAAC;EACF,MAAM,iBAAiB,cAAc;EACrC,MAAM,WAAW,mBAAmB,IAAI,IAAI,cAAc;EAM1D,OAAO;GACL,aAJA,cAAc,EAAE,EAAE,iBAAiB,EAAE,EAAE,eACvC,IAAI,YAAY,UAAU,KAAK;GAI/B;GACA;GACA;GACA;GACA,gBAAgB,iBAAiB,KAAK,YAAY;EACpD;CACF,CACF;CAEA,MAAM,SAAS,eAAe,OAAO,MAAM,EAAE,cAAc;CAE3D,OAAO;EACL,QAAQ,SAAS;EACjB,UAAU,SAAS;EACnB,OAAO,SAAS;EAChB,QAAQ,SAAS;EACjB,cAAc,SAAS;EACvB,sBAAsB,SAAS;EAC/B,eAAe,SAAS;EACxB;EACA;EACA;EACA;EACA;CACF;AACF;;;AC9JA,SAAgB,YAAY,KAA4B;CACtD,IAAI,CAAC,OAAO,UAAU,GAAG,KAAK,MAAM,GAClC,MAAM,IAAI,MAAM,oDAAoD,KAAK;CAG3E,IAAI,UAAU;;;;;CAKd,MAAM,UAA0B,CAAC;CAEjC,OAAO,OAAU,OAAqC;EAIpD,OAAO,WAAW,KAChB,MAAM,IAAI,SAAe,YAAY,QAAQ,KAAK,OAAO,CAAC;EAE5D;EAEA,IAAI;GACF,OAAO,MAAM,GAAG;EAClB,UAAU;GACR;GAGA,MAAM,OAAO,QAAQ,MAAM;GAC3B,IAAI,MAAM,KAAK;EACjB;CACF;AACF;;;;;;;;;;ACtBA,MAAM,yBAAyB;;;;;;AAc/B,eAAsB,SACpB,OACA,UAA2B,CAAC,GACN;CACtB,IAAI,MAAM,OAAO,WAAW,GAC1B,MAAM,IAAI,MAAM,uDAAuD;CAEzE,IAAI,MAAM,MAAM,WAAW,GACzB,MAAM,IAAI,MAAM,sDAAsD;CAGxE,MAAM,UACJ,QAAQ,WAAW,WAAW,MAAM,WAAW,kBAAkB,CAAC,CAAC,EAAE;CAEvE,MAAM,OAAqB,WAAW,QAAQ,IAAI,MAAM;CAGxD,MAAM,QAAQ,YADQ,QAAQ,iBAAiB,sBACR;CACvC,MAAM,aAAa,QAAQ;CAE3B,MAAM,UAAU,KAAK,IAAI;CACzB,MAAM,YAAY,IAAI,KAAK,OAAO,CAAC,CAAC,YAAY;CAEhD,MAAM,QAAgB,CAAC;CACvB,KAAK,MAAM,YAAY,MAAM,OAAO;EAClC,MAAM,OAAO,eAAe,QAAQ;EACpC,KAAK,MAAM,QAAQ,MAAM,QACvB,KAAK,IAAI,IAAI,GAAG,IAAI,MAAM,KACxB,MAAM,KAAK;GAAE;GAAU;GAAM,iBAAiB;EAAE,CAAC;CAGvD;CAEA,aAAa;EAAE,MAAM;EAAe,WAAW,MAAM;CAAO,CAAC;CAE7D,MAAM,0BAAU,IAAI,IAAgC;CAEpD,MAAM,aAAa,QAAgB,cACjC,GAAG,OAAO,IAAI;CAEhB,KAAK,MAAM,YAAY,MAAM,OAC3B,KAAK,MAAM,QAAQ,MAAM,QACvB,QAAQ,IAAI,UAAU,SAAS,IAAI,KAAK,KAAK,GAAG,CAAC,CAAC;CAItD,MAAM,QAAQ,IACZ,MAAM,KAAK,SACT,MAAM,YAAY;EAChB,IAAI,QAAQ,QAAQ,SAAS;EAE7B,aAAa;GACX,MAAM;GACN,QAAQ,KAAK,SAAS;GACtB,WAAW,KAAK,KAAK;GACrB,UAAU,KAAK;EACjB,CAAC;EAED,MAAM,SAAS,YAAY,OAAO,KAAK,UAAU,KAAK,IAAI;EAC1D,MAAM,SAAS,MAAM,cACnB,KAAK,UACL,KAAK,MACL,QACA,KAAK,iBACL,KACA,QAAQ,MACV;EAEA,QAAQ,IAAI,UAAU,KAAK,SAAS,IAAI,KAAK,KAAK,KAAK,CAAC,CAAC,CAAE,KAAK,MAAM;EAEtE,aAAa;GACX,MAAM;GACN,QAAQ,KAAK,SAAS;GACtB,WAAW,KAAK,KAAK;GACrB,UAAU,KAAK;GACf,IAAI,OAAO,UAAU;GACrB,YAAY,OAAO;GACnB,eAAe,OAAO,eAAe,KAAK,UAAU;GACpD,kBAAkB,OAAO;GACzB,cAAc,OAAO,OAAO;EAC9B,CAAC;CACH,CAAC,CACH,CACF;CAEA,MAAM,QAAsB,CAAC;CAC7B,KAAK,MAAM,YAAY,MAAM,OAC3B,KAAK,MAAM,QAAQ,MAAM,QAAQ;EAC/B,MAAM,OAAO,QAAQ,IAAI,UAAU,SAAS,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC;EACjE,KAAK,MAAM,GAAG,MAAM,EAAE,kBAAkB,EAAE,eAAe;EAEzD,MAAM,aAAa,cAAc,UAAU,MAAM,IAAI;EACrD,MAAM,KAAK,UAAU;EAErB,aAAa;GAAE,MAAM;GAAiB,QAAQ;EAAW,CAAC;CAC5D;CAGF,MAAM,SAAsB;EAC1B;EACA,YAAY,KAAK,IAAI,IAAI;EACzB;CACF;CAEA,aAAa;EAAE,MAAM;EAAkB;CAAO,CAAC;CAE/C,OAAO;AACT"}

package/dist/{types-CD3TwOtZ.d.ts → types-0QkNVyp9.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { a as HarnessAdapter, d as ToolCall, f as TrajectoryView, r as AdapterResult, s as SuiteConfig, t as AdapterDiagnostics } from "./types-B9H4IZtA.js";
+import { a as HarnessAdapter, d as ToolCall, f as TrajectoryView, r as AdapterResult, s as SuiteConfig, t as AdapterDiagnostics } from "./types-C0gBkl0-.js";
 //#region src/types/assertions.d.ts
 /**
@@ -430,4 +430,4 @@ interface SuiteReport {
 }
 //#endregion
 export { ObjectPredicate as A, TrajectoryPairInstanceJson as C, Cardinality as D, AssertionResult as E, ThresholdedAssertion as M, ToolPattern as N, CompoundPredicate as O, TrajectoryInstancesJson as S, Assertion as T, ProtojsonToolCall as _, ProgressEvent as a, ReferenceTrajectoryConfig as b, RunSuiteOptions as c, TestSuite as d, EvalDatasetRow as f, InstancesJsonlRow as g, InstanceData as h, ProgressCallback as i, Predicate as j, LeafPredicate as k, SuiteReport as l, HarnessMetrics as m, CellReport as n, RepetitionError as o, EvaluationInstanceJson as p, MatrixCell as r, RepetitionResult as s, AssertionStat as t, TestCase as u, ProtojsonTrajectory as v, TrajectorySingleToolUseInstanceJson as w, TrajectoryInstanceMetricKey as x, ReferenceToolNameMode as y };
-//# sourceMappingURL=types-CD3TwOtZ.d.ts.map
+//# sourceMappingURL=types-0QkNVyp9.d.ts.map

package/dist/types-Bac8_Ixb.js ADDED Viewed

@@ -0,0 +1,246 @@
+//#region src/types/stream.ts
+/** Type guards. Prefer these over manual `e.type === "..."` checks at call sites. */
+function isSystemInit(e) {
+	return e.type === "system" && e.subtype === "init";
+}
+function isSystemRetry(e) {
+	return e.type === "system" && e.subtype === "api_retry";
+}
+function isAssistantMessage(e) {
+	return e.type === "assistant";
+}
+function isUserMessage(e) {
+	return e.type === "user";
+}
+function isResult(e) {
+	return e.type === "result";
+}
+function isTextBlock(b) {
+	return b.type === "text";
+}
+function isToolUseBlock(b) {
+	return b.type === "tool_use";
+}
+function isToolResultBlock(b) {
+	return b.type === "tool_result";
+}
+//#endregion
+//#region src/types/trajectory.ts
+/**
+* Extract the MCP namespace prefix from a tool name.
+*
+* Claude Code formats MCP tool names as `mcp__<server>__<tool>`. The namespace
+* is the first two segments joined: `mcp__<server>`. Returns null for non-MCP
+* tool names (built-ins like `Bash`, `Read`, `Edit`).
+*
+* @example
+*   namespaceOf("mcp__api__search_skills") // "mcp__api"
+*   namespaceOf("Bash")                     // null
+*/
+function namespaceOf(toolName) {
+	if (!toolName.startsWith("mcp__")) return null;
+	const parts = toolName.split("__");
+	if (parts.length < 3) return null;
+	return `${parts[0]}__${parts[1]}`;
+}
+//#endregion
+//#region src/trajectory/builder.ts
+/**
+* TrajectoryBuilder — consumes a stream of {@link StreamEvent} values and
+* produces a {@link TrajectoryView}.
+*
+* State machine: the builder is a small, tolerant state machine. Invariants:
+*
+*   - Exactly one `system/init` event opens the session. The builder requires
+*     it to be present before `build()`.
+*   - Each `assistant` event begins a new turn. Text blocks accumulate into
+*     the turn's text; `tool_use` blocks become `ToolCall` records.
+*   - `user` events with `tool_result` blocks deliver tool results back. We
+*     match them to pending calls by `tool_use_id`.
+*   - One `result` event closes the session and carries aggregate usage.
+*
+* The builder is *tolerant of partial streams*: a process killed mid-run
+* produces a coherent (but flagged) view. Tool calls without matching results
+* keep `result: null`. The `success` flag reflects whether a successful result
+* event was actually observed.
+*
+* Why a class (not a reducer)?
+*   The internal `pendingCalls` map is mutable by design — we modify ToolCall
+*   objects in place when results arrive, so other parts of the view (which
+*   hold references to the same objects) see the update for free. A reducer
+*   would force a deep copy per result event, which is wasteful and would
+*   complicate identity-based queries.
+*/
+var TrajectoryBuilder = class {
+	meta = null;
+	sessionStartTs = null;
+	turns = [];
+	allToolCalls = [];
+	/**
+	* tool_use_id → ToolCall, for matching results back to calls.
+	* Entries are removed once a result is observed.
+	*/
+	pendingCalls = /* @__PURE__ */ new Map();
+	retries = [];
+	finalUsage = null;
+	finalCostUsd = 0;
+	finalDurationMs = 0;
+	finalNumTurns = 0;
+	finalResultText = "";
+	sawResultEvent = false;
+	resultIsError = false;
+	/**
+	* Consume one event. Safe to call with events in stream order.
+	*
+	* Unknown event types are silently ignored — the schema evolves and we
+	* don't want CI to break on a new event type we haven't modelled.
+	*/
+	consume(event) {
+		if (isSystemInit(event)) {
+			this.meta = {
+				sessionId: event.session_id,
+				model: event.model,
+				cwd: event.cwd,
+				permissionMode: event.permissionMode,
+				availableTools: event.tools ?? [],
+				mcpServers: (event.mcp_servers ?? []).map((s) => ({
+					name: s.name,
+					status: s.status
+				}))
+			};
+			this.sessionStartTs = Date.now();
+			return;
+		}
+		if (event.type === "system" && event.subtype === "api_retry") {
+			this.retries.push({
+				offsetMs: this.sessionStartTs ? Date.now() - this.sessionStartTs : 0,
+				raw: event
+			});
+			return;
+		}
+		if (isAssistantMessage(event)) {
+			this.handleAssistantMessage(event);
+			return;
+		}
+		if (isUserMessage(event)) {
+			this.handleUserMessage(event);
+			return;
+		}
+		if (isResult(event)) {
+			this.sawResultEvent = true;
+			this.resultIsError = event.is_error;
+			this.finalUsage = event.usage ?? null;
+			this.finalCostUsd = event.total_cost_usd ?? 0;
+			this.finalDurationMs = event.duration_ms ?? 0;
+			this.finalNumTurns = event.num_turns ?? 0;
+			this.finalResultText = event.result ?? "";
+			return;
+		}
+	}
+	/**
+	* Finalize the view. Call after consuming the last event from the stream.
+	*
+	* Throws if no `system/init` was observed — at that point we have no model,
+	* no session id, and no available-tools list, which means assertions like
+	* "called any mcp__api__* tool" can't even be evaluated meaningfully.
+	*/
+	build() {
+		if (this.meta === null) throw new Error("TrajectoryBuilder.build() called before any system/init event was observed. The harness may have failed to start, or the stream was truncated before init.");
+		const lastTurn = this.turns[this.turns.length - 1];
+		const accumulatedText = this.turns.map((t) => t.text).filter((t) => t.length > 0).join("\n\n").trim();
+		return {
+			meta: this.meta,
+			toolCalls: this.allToolCalls,
+			turns: this.turns,
+			finalResponse: accumulatedText || this.finalResultText,
+			finalStopReason: lastTurn?.stopReason ?? null,
+			usage: {
+				inputTokens: this.finalUsage?.input_tokens ?? 0,
+				outputTokens: this.finalUsage?.output_tokens ?? 0,
+				totalCostUsd: this.finalCostUsd,
+				durationMs: this.finalDurationMs,
+				numTurns: this.finalNumTurns || this.turns.length
+			},
+			retries: this.retries,
+			success: this.sawResultEvent && !this.resultIsError
+		};
+	}
+	handleAssistantMessage(event) {
+		const turnIndex = this.turns.length;
+		const textChunks = [];
+		const toolCallsThisTurn = [];
+		for (const block of event.message.content) {
+			if (isTextBlock(block)) {
+				textChunks.push(block.text);
+				continue;
+			}
+			if (isToolUseBlock(block)) {
+				const call = {
+					name: block.name,
+					namespace: namespaceOf(block.name),
+					callId: block.id,
+					args: block.input,
+					result: null,
+					isError: false,
+					turnIndex,
+					callIndex: this.allToolCalls.length
+				};
+				this.allToolCalls.push(call);
+				this.pendingCalls.set(block.id, call);
+				toolCallsThisTurn.push(call);
+				continue;
+			}
+		}
+		this.turns.push({
+			turnIndex,
+			text: textChunks.join("").trim(),
+			toolCalls: toolCallsThisTurn,
+			stopReason: event.message.stop_reason ?? null
+		});
+	}
+	handleUserMessage(event) {
+		const content = event.message.content;
+		if (typeof content === "string") return;
+		for (const block of content) {
+			if (!isToolResultBlock(block)) continue;
+			const call = this.pendingCalls.get(block.tool_use_id);
+			if (!call) continue;
+			call.result = block.content;
+			call.isError = block.is_error ?? false;
+			this.pendingCalls.delete(block.tool_use_id);
+		}
+	}
+};
+/**
+* Convenience: drain an async iterable of events through a fresh builder.
+*
+* Suitable when you have the full event stream and just want the view.
+* For interactive/incremental scenarios (e.g. surfacing partial state in a UI)
+* instantiate {@link TrajectoryBuilder} directly and call `consume()` /
+* `build()` yourself.
+*/
+async function buildTrajectory(events) {
+	const builder = new TrajectoryBuilder();
+	for await (const event of events) builder.consume(event);
+	return builder.build();
+}
+//#endregion
+//#region src/adapters/types.ts
+/**
+* Thrown when the harness fails to produce a usable trajectory.
+*
+* Most commonly this means the process failed before emitting a usable
+* session init event. Inspect `diagnostics.stderr` for the cause.
+*/
+var AdapterError = class extends Error {
+	diagnostics;
+	constructor(message, diagnostics) {
+		super(message);
+		this.diagnostics = diagnostics;
+		this.name = "AdapterError";
+	}
+};
+//#endregion
+export { isAssistantMessage as a, isSystemRetry as c, isToolUseBlock as d, isUserMessage as f, namespaceOf as i, isTextBlock as l, TrajectoryBuilder as n, isResult as o, buildTrajectory as r, isSystemInit as s, AdapterError as t, isToolResultBlock as u };
+//# sourceMappingURL=types-Bac8_Ixb.js.map

package/dist/types-Bac8_Ixb.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types-Bac8_Ixb.js","names":[],"sources":["../src/types/stream.ts","../src/types/trajectory.ts","../src/trajectory/builder.ts","../src/adapters/types.ts"],"sourcesContent":["/**\n * Discriminated union of events emitted by Claude Code's\n * `--output-format stream-json` mode.\n *\n * The format is NDJSON (one JSON object per line on stdout). Each line has\n * a required `type` field and often a `subtype` for further disambiguation.\n *\n * Source notes: the stream-json schema is not formally documented as of mid-2026.\n * These types are derived from:\n * - https://code.claude.com/docs/en/headless\n * - https://github.com/anthropics/claude-code/issues/24612 (event-types tracking issue)\n * - https://takopi.dev/reference/runners/claude/stream-json-cheatsheet/\n * - The `@anthropic-ai/claude-agent-sdk` TypeScript declaration files,\n * which are the de-facto source of truth.\n *\n * When adding new event types, prefer extending the union here rather than\n * branching on `any` in callers. Unknown events should be tolerated silently\n * by the builder (the schema evolves and we don't want CI to break on a new\n * event type we haven't modelled yet).\n */\n\n/** Top-level discriminated union of stream-json events. */\nexport type StreamEvent =\n | SystemInitEvent\n | SystemRetryEvent\n | SystemPluginInstallEvent\n | SystemCompactBoundaryEvent\n | SystemUnknownEvent\n | AssistantMessageEvent\n | UserMessageEvent\n | ResultEvent;\n\n// system events\n\n/** Emitted once at session start. Carries the session-level metadata. */\nexport interface SystemInitEvent {\n type: \"system\";\n subtype: \"init\";\n session_id: string;\n cwd: string;\n model: string;\n permissionMode?: string;\n apiKeySource?: string;\n /** Names of tools available in the session (built-in + MCP). */\n tools: string[];\n /** MCP servers configured for this session, with connection status. */\n mcp_servers: McpServerStatus[];\n}\n\nexport interface McpServerStatus {\n name: string;\n status: \"connected\" | \"disconnected\" | \"error\" | string;\n}\n\n/** Emitted when the API rate-limits us or otherwise asks for a retry. */\nexport interface SystemRetryEvent {\n type: \"system\";\n subtype: \"api_retry\";\n session_id: string;\n /** Implementation-defined retry payload (delay, reason, etc). */\n [key: string]: unknown;\n}\n\n/** Emitted while marketplace plugins are installing pre-session. */\nexport interface SystemPluginInstallEvent {\n type: \"system\";\n subtype: \"plugin_install\";\n session_id: string;\n [key: string]: unknown;\n}\n\n/** Emitted when Claude Code compacts the context window mid-session. */\nexport interface SystemCompactBoundaryEvent {\n type: \"system\";\n subtype: \"compact_boundary\";\n session_id: string;\n [key: string]: unknown;\n}\n\n/**\n * Catch-all for `type: \"system\"` events we haven't modelled.\n *\n * Keeps the union exhaustive while tolerating schema evolution. Callers should\n * either explicitly handle a known subtype or fall through to ignore.\n */\nexport interface SystemUnknownEvent {\n type: \"system\";\n subtype: string;\n session_id?: string;\n [key: string]: unknown;\n}\n\n// conversational events\n\n/** One assistant turn. The `message` field mirrors the Anthropic Messages API shape. */\nexport interface AssistantMessageEvent {\n type: \"assistant\";\n session_id: string;\n message: AssistantMessage;\n}\n\nexport interface AssistantMessage {\n id: string;\n type: \"message\";\n role: \"assistant\";\n content: ContentBlock[];\n model?: string;\n stop_reason?: StopReason | null;\n usage?: Usage;\n}\n\n/**\n * A user-role message in the stream.\n *\n * In stream-json these are usually *synthetic* — the harness injects them to\n * feed tool results back into the conversation after dispatching a tool. The\n * very first user message (the prompt) is also emitted here for completeness.\n */\nexport interface UserMessageEvent {\n type: \"user\";\n session_id: string;\n message: UserMessage;\n}\n\nexport interface UserMessage {\n role: \"user\";\n /** String for the initial prompt, array of blocks when carrying tool results. */\n content: ContentBlock[] | string;\n}\n\n// content blocks\n\nexport type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock;\n\nexport interface TextBlock {\n type: \"text\";\n text: string;\n}\n\nexport interface ToolUseBlock {\n type: \"tool_use\";\n /** Unique id assigned by the model; used to match tool_result back to this call. */\n id: string;\n /** Tool name. MCP tools follow the convention `mcp__<server>__<tool>`. */\n name: string;\n /** Arguments the model passed. Schema is per-tool. */\n input: unknown;\n}\n\nexport interface ToolResultBlock {\n type: \"tool_result\";\n /** The id of the corresponding tool_use block. */\n tool_use_id: string;\n /** Tool output. May be plain text or further content blocks for richer tools. */\n content: string | ContentBlock[];\n is_error?: boolean;\n}\n\n// result envelope\n\n/** Emitted once at session end. Carries aggregate usage and cost. */\nexport interface ResultEvent {\n type: \"result\";\n subtype: \"success\" | \"error\";\n session_id: string;\n total_cost_usd: number;\n is_error: boolean;\n duration_ms: number;\n duration_api_ms?: number;\n num_turns: number;\n /** The final text the harness returned, if any. */\n result?: string;\n usage?: Usage;\n}\n\n// shared scalars\n\n/**\n * Reasons the model can stop a turn. Open-ended string union because new\n * stop reasons appear over time.\n */\nexport type StopReason =\n | \"end_turn\"\n | \"tool_use\"\n | \"max_tokens\"\n | \"stop_sequence\"\n | (string & {});\n\nexport interface Usage {\n input_tokens: number;\n output_tokens: number;\n cache_creation_input_tokens?: number;\n cache_read_input_tokens?: number;\n}\n\n// type guards\n\n/** Type guards. Prefer these over manual `e.type === \"...\"` checks at call sites. */\n\nexport function isSystemInit(e: StreamEvent): e is SystemInitEvent {\n return e.type === \"system\" && (e as SystemInitEvent).subtype === \"init\";\n}\n\nexport function isSystemRetry(e: StreamEvent): e is SystemRetryEvent {\n return e.type === \"system\" && (e as SystemRetryEvent).subtype === \"api_retry\";\n}\n\nexport function isAssistantMessage(e: StreamEvent): e is AssistantMessageEvent {\n return e.type === \"assistant\";\n}\n\nexport function isUserMessage(e: StreamEvent): e is UserMessageEvent {\n return e.type === \"user\";\n}\n\nexport function isResult(e: StreamEvent): e is ResultEvent {\n return e.type === \"result\";\n}\n\nexport function isTextBlock(b: ContentBlock): b is TextBlock {\n return b.type === \"text\";\n}\n\nexport function isToolUseBlock(b: ContentBlock): b is ToolUseBlock {\n return b.type === \"tool_use\";\n}\n\nexport function isToolResultBlock(b: ContentBlock): b is ToolResultBlock {\n return b.type === \"tool_result\";\n}\n","/**\n * TrajectoryView — the assertion-friendly projection of a Claude Code session.\n *\n * The view is derived from the stream of {@link StreamEvent} values produced by\n * the harness, but is optimized for the queries that the assertion DSL needs to\n * express:\n *\n * - did tool X get called? (look at `toolCalls`)\n * - did tool A come before tool B? (compare `turnIndex` / `callIndex`)\n * - was a tool called with arguments matching predicate P? (`toolCalls[i].args`)\n * - did the agent answer without using any tool? (`toolCalls.length === 0`)\n *\n * The view is reconstructable from the raw events (lossless w.r.t. assertions),\n * but operating on it directly is dramatically simpler than walking event\n * streams or OTel span trees.\n *\n * Design notes:\n * - `turnIndex` and `callIndex` are the right primitives for ordering.\n * Wall-clock timestamps from the stream are unreliable for sub-second\n * ordering and parallel tool dispatch.\n * - Parallel tool calls (multiple `tool_use` blocks in one assistant message)\n * share a `turnIndex` but have distinct `callIndex` values in emission order.\n * - `namespace` is precomputed so assertions like `called(pattern: \"mcp__api__*\")`\n * can do a cheap string check.\n */\n\nimport type { StopReason } from \"./stream\";\n\nexport interface TrajectoryView {\n /** Session metadata, captured from the `system/init` event. */\n meta: SessionMeta;\n\n /** Every tool call, in global emission order. */\n toolCalls: ToolCall[];\n\n /** Each assistant turn: text content + any tool calls emitted in that turn. */\n turns: AssistantTurn[];\n\n /** All assistant text concatenated across turns. Useful for `response_contains`. */\n finalResponse: string;\n\n /** Stop reason of the *last* assistant turn. */\n finalStopReason: StopReason | null;\n\n /** Aggregate usage and cost from the result event. */\n usage: UsageSummary;\n\n /** Retry events observed during the run (rate limits, transient errors). */\n retries: RetryRecord[];\n\n /** Whether the result envelope indicated success. */\n success: boolean;\n}\n\nexport interface SessionMeta {\n sessionId: string;\n model: string;\n cwd: string;\n permissionMode?: string;\n /** Tool names the harness reported as available at session start. */\n availableTools: string[];\n /** MCP servers configured for the session, with connection status. */\n mcpServers: { name: string; status: string }[];\n}\n\nexport interface ToolCall {\n /** Fully-qualified tool name, e.g. `\"mcp__api__search_skills\"` or `\"Bash\"`. */\n name: string;\n\n /**\n * Namespace prefix for MCP-style names (`\"mcp__api\"`), or null for built-ins.\n * Precomputed via {@link namespaceOf} for cheap pattern matching.\n */\n namespace: string | null;\n\n /** The `tool_use` block's `id`; matches a later `tool_result.tool_use_id`. */\n callId: string;\n\n /** Args the model emitted on this call. Tool-specific schema. */\n args: unknown;\n\n /** Tool result, or null if no result was observed (e.g. process killed). */\n result: unknown | null;\n\n /** Whether the tool reported an error in its result. */\n isError: boolean;\n\n /**\n * Which assistant turn produced this call. Parallel calls within a single\n * assistant message share a `turnIndex`.\n */\n turnIndex: number;\n\n /** Index in the global ordered tool-call sequence. */\n callIndex: number;\n}\n\nexport interface AssistantTurn {\n turnIndex: number;\n /** Text emitted in this turn (may be empty if turn was tool-only). */\n text: string;\n /** Tool calls emitted in this turn, in their block order. */\n toolCalls: ToolCall[];\n /** Stop reason reported by the model for this turn. */\n stopReason: StopReason | null;\n}\n\nexport interface UsageSummary {\n inputTokens: number;\n outputTokens: number;\n totalCostUsd: number;\n durationMs: number;\n numTurns: number;\n}\n\nexport interface RetryRecord {\n /** ms since session start (approximate; the stream doesn't include precise ts). */\n offsetMs: number;\n /** Raw payload from the `system/api_retry` event for diagnostics. */\n raw: unknown;\n}\n\n// helpers\n\n/**\n * Extract the MCP namespace prefix from a tool name.\n *\n * Claude Code formats MCP tool names as `mcp__<server>__<tool>`. The namespace\n * is the first two segments joined: `mcp__<server>`. Returns null for non-MCP\n * tool names (built-ins like `Bash`, `Read`, `Edit`).\n *\n * @example\n * namespaceOf(\"mcp__api__search_skills\") // \"mcp__api\"\n * namespaceOf(\"Bash\") // null\n */\nexport function namespaceOf(toolName: string): string | null {\n if (!toolName.startsWith(\"mcp__\")) return null;\n const parts = toolName.split(\"__\");\n if (parts.length < 3) return null;\n return `${parts[0]}__${parts[1]}`;\n}\n","/**\n * TrajectoryBuilder — consumes a stream of {@link StreamEvent} values and\n * produces a {@link TrajectoryView}.\n *\n * State machine: the builder is a small, tolerant state machine. Invariants:\n *\n * - Exactly one `system/init` event opens the session. The builder requires\n * it to be present before `build()`.\n * - Each `assistant` event begins a new turn. Text blocks accumulate into\n * the turn's text; `tool_use` blocks become `ToolCall` records.\n * - `user` events with `tool_result` blocks deliver tool results back. We\n * match them to pending calls by `tool_use_id`.\n * - One `result` event closes the session and carries aggregate usage.\n *\n * The builder is *tolerant of partial streams*: a process killed mid-run\n * produces a coherent (but flagged) view. Tool calls without matching results\n * keep `result: null`. The `success` flag reflects whether a successful result\n * event was actually observed.\n *\n * Why a class (not a reducer)?\n * The internal `pendingCalls` map is mutable by design — we modify ToolCall\n * objects in place when results arrive, so other parts of the view (which\n * hold references to the same objects) see the update for free. A reducer\n * would force a deep copy per result event, which is wasteful and would\n * complicate identity-based queries.\n */\n\nimport {\n isAssistantMessage,\n isResult,\n isSystemInit,\n isTextBlock,\n isToolResultBlock,\n isToolUseBlock,\n isUserMessage,\n type StreamEvent,\n type Usage,\n} from \"../types/stream\";\nimport {\n namespaceOf,\n type AssistantTurn,\n type RetryRecord,\n type SessionMeta,\n type ToolCall,\n type TrajectoryView,\n} from \"../types/trajectory\";\n\nexport class TrajectoryBuilder {\n private meta: SessionMeta | null = null;\n private sessionStartTs: number | null = null;\n\n private turns: AssistantTurn[] = [];\n private allToolCalls: ToolCall[] = [];\n\n /**\n * tool_use_id → ToolCall, for matching results back to calls.\n * Entries are removed once a result is observed.\n */\n private pendingCalls: Map<string, ToolCall> = new Map();\n\n private retries: RetryRecord[] = [];\n\n private finalUsage: Usage | null = null;\n private finalCostUsd = 0;\n private finalDurationMs = 0;\n private finalNumTurns = 0;\n private finalResultText = \"\";\n private sawResultEvent = false;\n private resultIsError = false;\n\n /**\n * Consume one event. Safe to call with events in stream order.\n *\n * Unknown event types are silently ignored — the schema evolves and we\n * don't want CI to break on a new event type we haven't modelled.\n */\n consume(event: StreamEvent): void {\n if (isSystemInit(event)) {\n this.meta = {\n sessionId: event.session_id,\n model: event.model,\n cwd: event.cwd,\n permissionMode: event.permissionMode,\n availableTools: event.tools ?? [],\n mcpServers: (event.mcp_servers ?? []).map((s) => ({\n name: s.name,\n status: s.status,\n })),\n };\n this.sessionStartTs = Date.now();\n return;\n }\n\n if (event.type === \"system\" && event.subtype === \"api_retry\") {\n this.retries.push({\n offsetMs: this.sessionStartTs ? Date.now() - this.sessionStartTs : 0,\n raw: event,\n });\n return;\n }\n\n if (isAssistantMessage(event)) {\n this.handleAssistantMessage(event);\n return;\n }\n\n if (isUserMessage(event)) {\n this.handleUserMessage(event);\n return;\n }\n\n if (isResult(event)) {\n this.sawResultEvent = true;\n this.resultIsError = event.is_error;\n this.finalUsage = event.usage ?? null;\n this.finalCostUsd = event.total_cost_usd ?? 0;\n this.finalDurationMs = event.duration_ms ?? 0;\n this.finalNumTurns = event.num_turns ?? 0;\n this.finalResultText = event.result ?? \"\";\n return;\n }\n\n // Unknown event: ignored. See class doc.\n }\n\n /**\n * Finalize the view. Call after consuming the last event from the stream.\n *\n * Throws if no `system/init` was observed — at that point we have no model,\n * no session id, and no available-tools list, which means assertions like\n * \"called any mcp__api__* tool\" can't even be evaluated meaningfully.\n */\n build(): TrajectoryView {\n if (this.meta === null) {\n throw new Error(\n \"TrajectoryBuilder.build() called before any system/init event was observed. \" +\n \"The harness may have failed to start, or the stream was truncated before init.\",\n );\n }\n\n const lastTurn = this.turns[this.turns.length - 1];\n\n // Prefer the assistant text we accumulated turn-by-turn over the\n // `result.result` field, because the latter is sometimes a summary\n // and the former is exactly what the model said.\n const accumulatedText = this.turns\n .map((t) => t.text)\n .filter((t) => t.length > 0)\n .join(\"\\n\\n\")\n .trim();\n\n return {\n meta: this.meta,\n toolCalls: this.allToolCalls,\n turns: this.turns,\n finalResponse: accumulatedText || this.finalResultText,\n finalStopReason: lastTurn?.stopReason ?? null,\n usage: {\n inputTokens: this.finalUsage?.input_tokens ?? 0,\n outputTokens: this.finalUsage?.output_tokens ?? 0,\n totalCostUsd: this.finalCostUsd,\n durationMs: this.finalDurationMs,\n // Fall back to observed turn count if the result event was missing.\n numTurns: this.finalNumTurns || this.turns.length,\n },\n retries: this.retries,\n // Successful = saw a non-error result envelope. Streams that ended without\n // a result event are reported as unsuccessful regardless of tool outcomes.\n success: this.sawResultEvent && !this.resultIsError,\n };\n }\n\n // private handlers\n\n private handleAssistantMessage(\n event: Extract<StreamEvent, { type: \"assistant\" }>,\n ): void {\n const turnIndex = this.turns.length;\n const textChunks: string[] = [];\n const toolCallsThisTurn: ToolCall[] = [];\n\n for (const block of event.message.content) {\n if (isTextBlock(block)) {\n textChunks.push(block.text);\n continue;\n }\n if (isToolUseBlock(block)) {\n const call: ToolCall = {\n name: block.name,\n namespace: namespaceOf(block.name),\n callId: block.id,\n args: block.input,\n result: null,\n isError: false,\n turnIndex,\n callIndex: this.allToolCalls.length,\n };\n this.allToolCalls.push(call);\n this.pendingCalls.set(block.id, call);\n toolCallsThisTurn.push(call);\n continue;\n }\n // tool_result blocks don't appear in assistant messages — those arrive\n // via user messages. If one does appear, ignore it; we'd rather drop\n // an unexpected block than crash the eval.\n }\n\n this.turns.push({\n turnIndex,\n text: textChunks.join(\"\").trim(),\n toolCalls: toolCallsThisTurn,\n stopReason: event.message.stop_reason ?? null,\n });\n }\n\n private handleUserMessage(\n event: Extract<StreamEvent, { type: \"user\" }>,\n ): void {\n const content = event.message.content;\n\n // The very first user message carries the prompt as a plain string. We\n // already know the prompt (the caller passed it to the adapter), so we\n // ignore this case — there's nothing assertion-relevant in it.\n if (typeof content === \"string\") return;\n\n for (const block of content) {\n if (!isToolResultBlock(block)) continue;\n\n const call = this.pendingCalls.get(block.tool_use_id);\n if (!call) {\n // Unmatched result: ignore. Can happen if events arrive out of order\n // or the corresponding tool_use was emitted in an earlier run that\n // we're resuming. Either way, dropping is safer than throwing.\n continue;\n }\n\n call.result = block.content;\n call.isError = block.is_error ?? false;\n this.pendingCalls.delete(block.tool_use_id);\n }\n }\n}\n\n/**\n * Convenience: drain an async iterable of events through a fresh builder.\n *\n * Suitable when you have the full event stream and just want the view.\n * For interactive/incremental scenarios (e.g. surfacing partial state in a UI)\n * instantiate {@link TrajectoryBuilder} directly and call `consume()` /\n * `build()` yourself.\n */\nexport async function buildTrajectory(\n events: AsyncIterable<StreamEvent>,\n): Promise<TrajectoryView> {\n const builder = new TrajectoryBuilder();\n for await (const event of events) {\n builder.consume(event);\n }\n return builder.build();\n}\n","/**\n * Generic harness adapter contract.\n *\n * Every harness adapter produces a {@link TrajectoryView} plus process\n * diagnostics. The runner and assertion engine depend only on these types —\n * not on any specific harness implementation.\n */\n\nimport type { TrajectoryView } from \"../types/trajectory\";\n\n/** Base config every adapter must accept. */\nexport interface BaseAdapterConfig {\n prompt: string;\n model?: string;\n timeoutMs?: number;\n signal?: AbortSignal;\n env?: Record<string, string>;\n cwd?: string;\n}\n\n/** Suite-level config: generic fields plus adapter-specific nested blocks. */\nexport type SuiteConfig = Partial<BaseAdapterConfig> & {\n /** Claude Code adapter options (when `adapter` is `claude-code`). */\n claudeCode?: Record<string, unknown>;\n /** Codex CLI adapter options (when `adapter` is `codex`). */\n codex?: Record<string, unknown>;\n};\n\n/** Generic harness adapter interface. */\nexport interface HarnessAdapter<\n TConfig extends BaseAdapterConfig = BaseAdapterConfig,\n> {\n readonly id: string;\n run(config: TConfig): Promise<AdapterResult>;\n}\n\n/** Successful adapter run. */\nexport interface AdapterResult {\n view: TrajectoryView;\n diagnostics: AdapterDiagnostics;\n}\n\n/** Process-level diagnostics from any adapter. */\nexport interface AdapterDiagnostics {\n exitCode: number | null;\n signal: NodeJS.Signals | null;\n stderr: string;\n parseErrors: ParseErrorRecord[];\n timedOut: boolean;\n durationMs: number;\n}\n\nexport interface ParseErrorRecord {\n line: string;\n error: string;\n}\n\n/**\n * Thrown when the harness fails to produce a usable trajectory.\n *\n * Most commonly this means the process failed before emitting a usable\n * session init event. Inspect `diagnostics.stderr` for the cause.\n */\nexport class AdapterError extends Error {\n constructor(\n message: string,\n public readonly diagnostics: Partial<AdapterDiagnostics>,\n ) {\n super(message);\n this.name = \"AdapterError\";\n }\n}\n"],"mappings":";;AAuMA,SAAgB,aAAa,GAAsC;CACjE,OAAO,EAAE,SAAS,YAAa,EAAsB,YAAY;AACnE;AAEA,SAAgB,cAAc,GAAuC;CACnE,OAAO,EAAE,SAAS,YAAa,EAAuB,YAAY;AACpE;AAEA,SAAgB,mBAAmB,GAA4C;CAC7E,OAAO,EAAE,SAAS;AACpB;AAEA,SAAgB,cAAc,GAAuC;CACnE,OAAO,EAAE,SAAS;AACpB;AAEA,SAAgB,SAAS,GAAkC;CACzD,OAAO,EAAE,SAAS;AACpB;AAEA,SAAgB,YAAY,GAAiC;CAC3D,OAAO,EAAE,SAAS;AACpB;AAEA,SAAgB,eAAe,GAAoC;CACjE,OAAO,EAAE,SAAS;AACpB;AAEA,SAAgB,kBAAkB,GAAuC;CACvE,OAAO,EAAE,SAAS;AACpB;;;;;;;;;;;;;;AC9FA,SAAgB,YAAY,UAAiC;CAC3D,IAAI,CAAC,SAAS,WAAW,OAAO,GAAG,OAAO;CAC1C,MAAM,QAAQ,SAAS,MAAM,IAAI;CACjC,IAAI,MAAM,SAAS,GAAG,OAAO;CAC7B,OAAO,GAAG,MAAM,GAAG,IAAI,MAAM;AAC/B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AC7FA,IAAa,oBAAb,MAA+B;CAC7B,OAAmC;CACnC,iBAAwC;CAExC,QAAiC,CAAC;CAClC,eAAmC,CAAC;;;;;CAMpC,+BAA8C,IAAI,IAAI;CAEtD,UAAiC,CAAC;CAElC,aAAmC;CACnC,eAAuB;CACvB,kBAA0B;CAC1B,gBAAwB;CACxB,kBAA0B;CAC1B,iBAAyB;CACzB,gBAAwB;;;;;;;CAQxB,QAAQ,OAA0B;EAChC,IAAI,aAAa,KAAK,GAAG;GACvB,KAAK,OAAO;IACV,WAAW,MAAM;IACjB,OAAO,MAAM;IACb,KAAK,MAAM;IACX,gBAAgB,MAAM;IACtB,gBAAgB,MAAM,SAAS,CAAC;IAChC,aAAa,MAAM,eAAe,CAAC,EAAA,CAAG,KAAK,OAAO;KAChD,MAAM,EAAE;KACR,QAAQ,EAAE;IACZ,EAAE;GACJ;GACA,KAAK,iBAAiB,KAAK,IAAI;GAC/B;EACF;EAEA,IAAI,MAAM,SAAS,YAAY,MAAM,YAAY,aAAa;GAC5D,KAAK,QAAQ,KAAK;IAChB,UAAU,KAAK,iBAAiB,KAAK,IAAI,IAAI,KAAK,iBAAiB;IACnE,KAAK;GACP,CAAC;GACD;EACF;EAEA,IAAI,mBAAmB,KAAK,GAAG;GAC7B,KAAK,uBAAuB,KAAK;GACjC;EACF;EAEA,IAAI,cAAc,KAAK,GAAG;GACxB,KAAK,kBAAkB,KAAK;GAC5B;EACF;EAEA,IAAI,SAAS,KAAK,GAAG;GACnB,KAAK,iBAAiB;GACtB,KAAK,gBAAgB,MAAM;GAC3B,KAAK,aAAa,MAAM,SAAS;GACjC,KAAK,eAAe,MAAM,kBAAkB;GAC5C,KAAK,kBAAkB,MAAM,eAAe;GAC5C,KAAK,gBAAgB,MAAM,aAAa;GACxC,KAAK,kBAAkB,MAAM,UAAU;GACvC;EACF;CAGF;;;;;;;;CASA,QAAwB;EACtB,IAAI,KAAK,SAAS,MAChB,MAAM,IAAI,MACR,4JAEF;EAGF,MAAM,WAAW,KAAK,MAAM,KAAK,MAAM,SAAS;EAKhD,MAAM,kBAAkB,KAAK,MAC1B,KAAK,MAAM,EAAE,IAAI,CAAC,CAClB,QAAQ,MAAM,EAAE,SAAS,CAAC,CAAC,CAC3B,KAAK,MAAM,CAAC,CACZ,KAAK;EAER,OAAO;GACL,MAAM,KAAK;GACX,WAAW,KAAK;GAChB,OAAO,KAAK;GACZ,eAAe,mBAAmB,KAAK;GACvC,iBAAiB,UAAU,cAAc;GACzC,OAAO;IACL,aAAa,KAAK,YAAY,gBAAgB;IAC9C,cAAc,KAAK,YAAY,iBAAiB;IAChD,cAAc,KAAK;IACnB,YAAY,KAAK;IAEjB,UAAU,KAAK,iBAAiB,KAAK,MAAM;GAC7C;GACA,SAAS,KAAK;GAGd,SAAS,KAAK,kBAAkB,CAAC,KAAK;EACxC;CACF;CAIA,uBACE,OACM;EACN,MAAM,YAAY,KAAK,MAAM;EAC7B,MAAM,aAAuB,CAAC;EAC9B,MAAM,oBAAgC,CAAC;EAEvC,KAAK,MAAM,SAAS,MAAM,QAAQ,SAAS;GACzC,IAAI,YAAY,KAAK,GAAG;IACtB,WAAW,KAAK,MAAM,IAAI;IAC1B;GACF;GACA,IAAI,eAAe,KAAK,GAAG;IACzB,MAAM,OAAiB;KACrB,MAAM,MAAM;KACZ,WAAW,YAAY,MAAM,IAAI;KACjC,QAAQ,MAAM;KACd,MAAM,MAAM;KACZ,QAAQ;KACR,SAAS;KACT;KACA,WAAW,KAAK,aAAa;IAC/B;IACA,KAAK,aAAa,KAAK,IAAI;IAC3B,KAAK,aAAa,IAAI,MAAM,IAAI,IAAI;IACpC,kBAAkB,KAAK,IAAI;IAC3B;GACF;EAIF;EAEA,KAAK,MAAM,KAAK;GACd;GACA,MAAM,WAAW,KAAK,EAAE,CAAC,CAAC,KAAK;GAC/B,WAAW;GACX,YAAY,MAAM,QAAQ,eAAe;EAC3C,CAAC;CACH;CAEA,kBACE,OACM;EACN,MAAM,UAAU,MAAM,QAAQ;EAK9B,IAAI,OAAO,YAAY,UAAU;EAEjC,KAAK,MAAM,SAAS,SAAS;GAC3B,IAAI,CAAC,kBAAkB,KAAK,GAAG;GAE/B,MAAM,OAAO,KAAK,aAAa,IAAI,MAAM,WAAW;GACpD,IAAI,CAAC,MAIH;GAGF,KAAK,SAAS,MAAM;GACpB,KAAK,UAAU,MAAM,YAAY;GACjC,KAAK,aAAa,OAAO,MAAM,WAAW;EAC5C;CACF;AACF;;;;;;;;;AAUA,eAAsB,gBACpB,QACyB;CACzB,MAAM,UAAU,IAAI,kBAAkB;CACtC,WAAW,MAAM,SAAS,QACxB,QAAQ,QAAQ,KAAK;CAEvB,OAAO,QAAQ,MAAM;AACvB;;;;;;;;;ACpMA,IAAa,eAAb,cAAkC,MAAM;CAGpB;CAFlB,YACE,SACA,aACA;EACA,MAAM,OAAO;EAFG,KAAA,cAAA;EAGhB,KAAK,OAAO;CACd;AACF"}