npm - @tangle-network/agent-runtime - Versions diffs - 0.45.0 → 0.46.0 - Mend

@tangle-network/agent-runtime 0.45.0 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/README.md +3 -3
package/dist/agent.d.ts +5 -5
package/dist/agent.js +2 -2
package/dist/agent.js.map +1 -1
package/dist/analyst-loop.d.ts +5 -40
package/dist/analyst-loop.js +2 -4
package/dist/{chunk-KEWO4KI6.js → chunk-65FQLI4V.js} +628 -138
package/dist/chunk-65FQLI4V.js.map +1 -0
package/dist/{chunk-NYN5RTLP.js → chunk-GN75RGM6.js} +7 -7
package/dist/chunk-GN75RGM6.js.map +1 -0
package/dist/{chunk-PRX45WE2.js → chunk-GSUO5QS6.js} +1 -119
package/dist/chunk-GSUO5QS6.js.map +1 -0
package/dist/{chunk-FK53TXOP.js → chunk-HNUXAZIJ.js} +4 -27
package/dist/chunk-HNUXAZIJ.js.map +1 -0
package/dist/{chunk-IJ6FGOPO.js → chunk-I42NHLKX.js} +3 -3
package/dist/chunk-I42NHLKX.js.map +1 -0
package/dist/{chunk-IJGS6J7X.js → chunk-JNPK46YH.js} +2 -2
package/dist/{chunk-QR4UUC5P.js → chunk-KADIJAD4.js} +33 -19
package/dist/chunk-KADIJAD4.js.map +1 -0
package/dist/{chunk-Z2QXVBA6.js → chunk-KPN7OQ64.js} +4 -4
package/dist/chunk-KPN7OQ64.js.map +1 -0
package/dist/{chunk-KSMX62JF.js → chunk-VR4JIC5H.js} +2 -2
package/dist/{coder-CczgMqFx.d.ts → coder-DCWFQpmJ.d.ts} +1 -1
package/dist/{dynamic-BvllHV6M.d.ts → driver-C-mtBo7h.d.ts} +6 -6
package/dist/{improvement-adapter-CWegd3vw.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
package/dist/improvement.d.ts +2 -2
package/dist/index.d.ts +8 -8
package/dist/index.js +8 -8
package/dist/{kb-gate-D9GBocLN.d.ts → kb-gate-2Gwpz_27.d.ts} +7 -7
package/dist/{loop-runner-bin-CPrCoKqC.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +11 -11
package/dist/loop-runner-bin.d.ts +6 -6
package/dist/loop-runner-bin.js +6 -6
package/dist/loops.d.ts +5 -5
package/dist/loops.js +10 -10
package/dist/mcp/bin.js +6 -6
package/dist/mcp/bin.js.map +1 -1
package/dist/mcp/index.d.ts +11 -11
package/dist/mcp/index.js +7 -7
package/dist/{otel-export-Dy2DyUCU.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
package/dist/profiles.d.ts +8 -8
package/dist/profiles.js +1 -1
package/dist/profiles.js.map +1 -1
package/dist/{run-loop--hSoIknW.d.ts → run-loop-CU2Y00Si.d.ts} +2 -2
package/dist/runtime.d.ts +156 -52
package/dist/runtime.js +10 -10
package/dist/{types-1HbsFa7H.d.ts → types-BfoeiQRZ.d.ts} +20 -20
package/dist/{types-DdzkffAm.d.ts → types-DnYoHvvZ.d.ts} +17 -5
package/dist/{types-BtRLF2U3.d.ts → types-p8dWBIXL.d.ts} +1 -1
package/dist/workflow.d.ts +3 -3
package/dist/workflow.js +2 -2
package/dist/workflow.js.map +1 -1
package/package.json +13 -24
package/skills/agent-runtime-adoption/SKILL.md +3 -3
package/skills/generate-eval/SKILL.md +60 -0
package/dist/chunk-FK53TXOP.js.map +0 -1
package/dist/chunk-IJ6FGOPO.js.map +0 -1
package/dist/chunk-KEWO4KI6.js.map +0 -1
package/dist/chunk-NYN5RTLP.js.map +0 -1
package/dist/chunk-PRX45WE2.js.map +0 -1
package/dist/chunk-QR4UUC5P.js.map +0 -1
package/dist/chunk-Z2QXVBA6.js.map +0 -1
/package/dist/{chunk-IJGS6J7X.js.map → chunk-JNPK46YH.js.map} +0 -0
/package/dist/{chunk-KSMX62JF.js.map → chunk-VR4JIC5H.js.map} +0 -0

package/dist/profiles.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/profiles/ui-auditor/prompt.ts","../src/profiles/ui-auditor/in-process-client.ts","../src/profiles/ui-auditor/lens-prompts.ts","../src/profiles/ui-auditor/output-adapter.ts","../src/profiles/ui-auditor/validator.ts","../src/profiles/ui-auditor/profile.ts"],"sourcesContent":["/*\n @experimental\n \n Prompt formatter for the auditor profile. `formatAuditorPrompt` produces\n * the user message handed to the iteration — describes the captures to be\n * taken and the lens to apply. The system prompt comes from\n * `buildAuditorSystemPrompt(lens)` (lens-prompts.ts).\n \n The formatter prepends a machine-readable envelope (`<<UI_AUDIT_TASK>>`\n * … `<<UI_AUDIT_TASK_END>>`) carrying a JSON-serialised task. The\n * in-process auditor client recovers the task from this envelope so the\n * iteration is self-describing — robust to concurrent fanout, where any\n * per-client side state (e.g. a \"current task\" register) would race.\n \n The formatter is pure and deterministic — re-run on the same task\n * produces the same prompt. Tests and trace replays rely on this.\n /\n\nimport type { UiAuditTask } from './task'\n\nconst ENVELOPE_BEGIN = '<<UI_AUDIT_TASK>>'\nconst ENVELOPE_END = '<<UI_AUDIT_TASK_END>>'\n\n/* @experimental /\nexport function encodeAuditTaskEnvelope(task: UiAuditTask): string {\n return `${ENVELOPE_BEGIN}${JSON.stringify(task)}${ENVELOPE_END}`\n}\n\n/\n Parse a task envelope back out of a prompt string. Returns undefined if\n * the prompt does not contain a complete envelope OR if the payload is\n * not valid JSON.\n \n @experimental\n /\nexport function decodeAuditTaskEnvelope(prompt: string): UiAuditTask \| undefined {\n const start = prompt.indexOf(ENVELOPE_BEGIN)\n if (start === -1) return undefined\n const payloadStart = start + ENVELOPE_BEGIN.length\n const end = prompt.indexOf(ENVELOPE_END, payloadStart)\n if (end === -1) return undefined\n const payload = prompt.slice(payloadStart, end)\n try {\n const parsed = JSON.parse(payload) as unknown\n if (!parsed \|\| typeof parsed !== 'object') return undefined\n const t = parsed as Partial<UiAuditTask>\n if (typeof t.lens !== 'string' \|\| !Array.isArray(t.captures)) return undefined\n return t as UiAuditTask\n } catch {\n return undefined\n }\n}\n\n/* @experimental /\nexport function formatAuditorPrompt(task: UiAuditTask): string {\n const lines: string[] = []\n lines.push(`# UI audit iteration — lens: ${task.lens}`)\n lines.push('')\n if (task.productContext && task.productContext.trim().length > 0) {\n lines.push('## Product context')\n lines.push(task.productContext.trim())\n lines.push('')\n }\n lines.push('## Captures to take')\n task.captures.forEach((cap, i) => {\n const vp = cap.viewport ? `${cap.viewport.width}x${cap.viewport.height}` : '1280x800 (default)'\n const detail = [\n `viewport=${vp}`,\n cap.fullPage ? 'fullPage=true' : null,\n cap.elementSelector ? `selector=\\`${cap.elementSelector}\\`` : null,\n cap.waitFor ? `waitFor=\\`${cap.waitFor}\\`` : null,\n cap.waitMs !== undefined ? `waitMs=${cap.waitMs}` : null,\n cap.label ? `label=${cap.label}` : null,\n ]\n .filter((s): s is string => s !== null)\n .join(' · ')\n lines.push(`${i + 1}. route=\\`${cap.route}\\` url=${cap.url} ${detail ? `(${detail})` : ''}`)\n })\n lines.push('')\n if (task.knownFindingIds && task.knownFindingIds.length > 0) {\n lines.push('## Known findings (link via similarTo, do not refile)')\n lines.push(task.knownFindingIds.map((n) => `#${String(n).padStart(3, '0')}`).join(', '))\n lines.push('')\n }\n lines.push('## Output format')\n lines.push(\n 'Emit a single JSON object with the shape `{ findings: UiFinding[], notes?: string }` where every finding has the fields enumerated in your system prompt. The screenshots field on each finding must reference the captures above by path. Do not emit findings outside the lens.',\n )\n return lines.join('\\n')\n}\n","/\n @experimental\n \n `createInProcessUiAuditClient` — a `LoopSandboxClient` that drives a\n * Playwright browser in-process and delegates finding identification to a\n * consumer-supplied {@link UiJudge}.\n \n Why this exists: `runLoop` is built around a sandbox-SDK seam — each\n * iteration is `client.create() → box.streamPrompt() → box.delete()`.\n * For UI audit, spinning up a real container running a coding harness\n * per iteration is overkill: the work is one browser capture + one\n * vision LLM call. This client satisfies the kernel contract while\n * doing the audit in-process; no container, no sandbox-SDK backend.\n \n The client owns ONE browser for its lifetime and creates a fresh\n * context per iteration (isolated cookies/storage). Playwright is\n * dynamically imported so consumers who use a different `LoopSandboxClient`\n * — e.g. a fleet executor that drives Playwright remotely — do not pay\n * the peer dep cost.\n \n Concurrency: each iteration's prompt carries a self-describing task\n * envelope (see `prompt.ts`), so concurrent fanout iterations do not race\n * over per-client side state.\n /\n\nimport type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'\nimport type { LoopSandboxClient } from '../../runtime/types'\nimport type { UiJudge } from './judge'\nimport { decodeAuditTaskEnvelope } from './prompt'\nimport { slugify } from './slugify'\nimport type { UiAuditCapture, UiAuditCaptureRequest } from './task'\n\n// All synthetic events the auditor emits flow through this helper. Reason:\n// `SandboxEvent.data` is a sandbox-SDK shape (effectively `Record<string,\n// unknown>`) that our typed payloads (`UiAuditCapture`, `UiFinding`, …) do not\n// satisfy structurally. The cast moves the type-system smell into a single,\n// named, documented call site so the call sites in `runIteration` stay clean.\n// The runtime contract — `{ type, data }` — is what the output adapter reads;\n// the static type is what the kernel collects into `SandboxEvent[]`.\nfunction asSandboxEvent<T>(type: string, data: T): SandboxEvent {\n return { type, data } as unknown as SandboxEvent\n}\n\n/* @experimental /\nexport interface InProcessUiAuditClientOptions {\n /\n Absolute path under which screenshots are written. Each capture lands\n * at `<workspaceDir>/screenshots/<filename>`; finding screenshot paths\n * are workspace-relative (`screenshots/<filename>`).\n /\n workspaceDir: string\n /* The vision judge that turns captures into findings. /\n judge: UiJudge\n /\n Navigation policy.\n \n `'strict'` (default) waits for `networkidle` and fails the iteration\n * if the page does not settle. `'spa'` waits for `domcontentloaded` —\n * use for single-page apps that hold open long-poll/websocket\n * connections and never settle.\n /\n navPolicy?: 'strict' \| 'spa'\n /\n Browser launch override. Default: chromium headless via Playwright.\n * Consumers pass a custom factory to target a remote browser, a\n * different channel, or a fleet adapter.\n /\n launchBrowser?: () => Promise<BrowserHandle>\n}\n\n/* @experimental /\nexport interface BrowserHandle {\n newContext(options?: {\n viewport?: { width: number; height: number }\n }): Promise<BrowserContextHandle>\n close(): Promise<void>\n}\n\n/* @experimental /\nexport interface BrowserContextHandle {\n newPage(): Promise<PageHandle>\n close(): Promise<void>\n}\n\n/* @experimental /\nexport interface PageHandle {\n setViewportSize(size: { width: number; height: number }): Promise<void>\n goto(url: string, options?: { waitUntil?: string; timeout?: number }): Promise<unknown>\n waitForSelector(selector: string, options?: { timeout?: number }): Promise<unknown>\n waitForTimeout(ms: number): Promise<void>\n screenshot(options: { path: string; fullPage?: boolean }): Promise<void>\n locator(selector: string): {\n first(): { screenshot(options: { path: string }): Promise<void> }\n }\n}\n\nconst DEFAULT_VIEWPORT = { width: 1280, height: 800 } as const\nconst NAV_TIMEOUT_MS = 30_000\n\nasync function defaultLaunch(): Promise<BrowserHandle> {\n const mod = (await import('playwright')) as unknown as {\n chromium?: { launch(options?: { headless?: boolean }): Promise<BrowserHandle> }\n }\n if (!mod?.chromium \|\| typeof mod.chromium.launch !== 'function') {\n throw new Error(\n 'ui-auditor: playwright is not installed. Install `playwright` (and run `playwright install chromium`) or pass a custom `launchBrowser` to createInProcessUiAuditClient.',\n )\n }\n return mod.chromium.launch({ headless: true })\n}\n\nfunction nowStamp(): string {\n const d = new Date()\n const pad = (n: number) => String(n).padStart(2, '0')\n // Millisecond resolution so parallel fanout iterations capturing the same\n // route/viewport/label within the same second don't collide on filename and\n // silently overwrite each other.\n return (\n `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}-` +\n `${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}` +\n `-${String(d.getUTCMilliseconds()).padStart(3, '0')}`\n )\n}\n\nfunction viewportOf(req: UiAuditCaptureRequest): { width: number; height: number } {\n return req.viewport ?? DEFAULT_VIEWPORT\n}\n\nfunction captureFilename(req: UiAuditCaptureRequest): string {\n const vp = viewportOf(req)\n const labelPart = req.label ? `--${slugify(req.label, 'label')}` : ''\n return `${slugify(req.route, 'route')}--${vp.width}x${vp.height}${labelPart}--${nowStamp()}.png`\n}\n\nfunction assertHttpUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`ui-auditor: capture url is not parseable (got ${JSON.stringify(url)})`)\n }\n // SSRF defense at the client boundary. The MCP tool already restricts to\n // http(s), but `createInProcessUiAuditClient` is exported and can be wired\n // up directly by consumers (the example does this). A crafted task envelope\n // could otherwise navigate Playwright to `file://`, `data:`, `javascript:`\n // and read local files or execute inline content.\n if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {\n throw new Error(\n `ui-auditor: capture url must use http or https (got ${parsed.protocol} in ${JSON.stringify(url)})`,\n )\n }\n}\n\nasync function captureOne(\n page: PageHandle,\n req: UiAuditCaptureRequest,\n outAbsPath: string,\n signal: AbortSignal,\n navPolicy: 'strict' \| 'spa',\n): Promise<void> {\n signal.throwIfAborted()\n assertHttpUrl(req.url)\n // Apply the per-capture viewport before navigation. The capture metadata\n // and filename both encode this viewport; the rendered page must match.\n await page.setViewportSize(viewportOf(req))\n const waitUntil = navPolicy === 'spa' ? 'domcontentloaded' : 'networkidle'\n await page.goto(req.url, { waitUntil, timeout: NAV_TIMEOUT_MS })\n if (req.waitFor) {\n await page.waitForSelector(req.waitFor, { timeout: 15_000 })\n }\n const extra = req.waitMs ?? 500\n if (extra > 0) await page.waitForTimeout(extra)\n signal.throwIfAborted()\n if (req.elementSelector) {\n await page.locator(req.elementSelector).first().screenshot({ path: outAbsPath })\n } else {\n await page.screenshot({ path: outAbsPath, fullPage: req.fullPage === true })\n }\n}\n\ninterface SyntheticSandbox extends SandboxInstance {}\n\nfunction makeSandboxId(): string {\n const rand = () => Math.random().toString(16).slice(2, 10)\n return `ui-audit-${rand()}${rand()}`\n}\n\n/* @experimental /\nexport function createInProcessUiAuditClient(\n options: InProcessUiAuditClientOptions,\n): LoopSandboxClient & {\n /\n Close the underlying browser. Idempotent.\n \n Contract: callers MUST ensure no iterations are in flight when this is\n * called. The kernel respects this — `runLoop` awaits every iteration\n * before returning, so `await runLoop(...); await client.close()` is the\n * intended pattern (see `examples/ui-audit`). If `close()` is invoked\n * concurrently with a running iteration, the browser teardown will race\n * against in-flight page operations; the iteration will surface an\n * AggregateError carrying both the iteration error and the close error,\n * but no work is lost silently.\n /\n close(): Promise<void>\n} {\n const launch = options.launchBrowser ?? defaultLaunch\n const navPolicy = options.navPolicy ?? 'strict'\n let browserPromise: Promise<BrowserHandle> \| undefined\n let closed = false\n\n async function getBrowser(): Promise<BrowserHandle> {\n if (closed) {\n throw new Error('ui-auditor: client is closed; create a new client to run another iteration')\n }\n if (!browserPromise) browserPromise = launch()\n return browserPromise\n }\n\n async function runIteration(\n promptText: string,\n signal: AbortSignal,\n ): AsyncIterable<SandboxEvent> {\n const task = decodeAuditTaskEnvelope(promptText)\n if (!task) {\n throw new Error(\n 'ui-auditor: prompt is missing a UI_AUDIT_TASK envelope. Use uiAuditorProfile().taskToPrompt to format prompts, or pass an envelope-prefixed prompt manually.',\n )\n }\n if (task.captures.length === 0) {\n throw new Error('ui-auditor: task has zero captures; nothing to audit.')\n }\n\n yield asSandboxEvent('audit.lens', { lens: task.lens })\n\n const browser = await getBrowser()\n const context = await browser.newContext({ viewport: DEFAULT_VIEWPORT })\n // Track both the primary iteration error and any context-close failure so\n // the cleanup path never silently swallows a leaked-context bug AND a\n // close failure never shadows the real iteration error. After the\n // try/catch/finally settles, we rethrow the primary if there was one,\n // otherwise we rethrow the close error.\n let primaryError: unknown\n let closeError: unknown\n try {\n const page = await context.newPage()\n const captures: UiAuditCapture[] = []\n const fs = await import('node:fs/promises')\n const path = await import('node:path')\n const shotsDir = path.join(options.workspaceDir, 'screenshots')\n await fs.mkdir(shotsDir, { recursive: true })\n\n for (const req of task.captures) {\n signal.throwIfAborted()\n const filename = captureFilename(req)\n const absPath = path.join(shotsDir, filename)\n const relPath = `screenshots/${filename}`\n await captureOne(page, req, absPath, signal, navPolicy)\n const vp = viewportOf(req)\n const cap: UiAuditCapture = {\n path: relPath,\n viewport: `${vp.width}x${vp.height}`,\n fullPage: req.fullPage === true,\n route: req.route,\n url: req.url,\n capturedAt: new Date().toISOString(),\n }\n if (req.elementSelector) cap.elementSelector = req.elementSelector\n if (req.label) cap.label = req.label\n captures.push(cap)\n yield asSandboxEvent('audit.capture', cap)\n }\n\n const judgeOut = await options.judge({\n lens: task.lens,\n captures,\n productContext: task.productContext,\n knownFindingIds: task.knownFindingIds,\n promptText,\n signal,\n })\n\n for (const finding of judgeOut.findings) {\n yield asSandboxEvent('audit.finding', finding)\n }\n if (judgeOut.notes && judgeOut.notes.trim().length > 0) {\n yield asSandboxEvent('audit.notes', { notes: judgeOut.notes })\n }\n\n const usage = judgeOut.tokenUsage ?? { input: 0, output: 0 }\n yield asSandboxEvent('done', {\n tokenUsage: {\n inputTokens: usage.input,\n outputTokens: usage.output,\n },\n totalCostUsd: judgeOut.costUsd ?? 0,\n })\n } catch (err) {\n primaryError = err\n } finally {\n try {\n await context.close()\n } catch (err) {\n closeError = err\n }\n }\n // When both the iteration and the cleanup fail, surface both via\n // AggregateError so a leaked context bug is not silently masked by an\n // earlier iteration failure (per the fail-loud doctrine).\n if (primaryError !== undefined && closeError !== undefined) {\n throw new AggregateError(\n [primaryError, closeError],\n 'ui-auditor: iteration failed AND context.close() failed; both errors attached.',\n )\n }\n if (primaryError !== undefined) throw primaryError\n if (closeError !== undefined) throw closeError\n }\n\n function makeSyntheticSandbox(): SyntheticSandbox {\n const id = makeSandboxId()\n const instance = {\n id,\n streamPrompt(message: string, opts?: { signal?: AbortSignal }): AsyncIterable<SandboxEvent> {\n const signal = opts?.signal ?? new AbortController().signal\n return runIteration(message, signal)\n },\n async delete(): Promise<void> {\n // No per-sandbox resources to release; the browser is shared and\n // closed by `client.close()`. Intentionally a no-op so trace-time\n // `box.delete()` succeeds without doing surprising work.\n },\n }\n return instance as unknown as SyntheticSandbox\n }\n\n return {\n async create(_options?: CreateSandboxOptions) {\n return makeSyntheticSandbox()\n },\n describePlacement(box) {\n const id = (box as unknown as { id?: string }).id\n return { kind: 'sibling', sandboxId: typeof id === 'string' ? id : undefined }\n },\n async close() {\n closed = true\n const pending = browserPromise\n browserPromise = undefined\n if (pending) {\n const browser = await pending\n await browser.close()\n }\n },\n }\n}\n","/*\n @experimental\n \n Per-lens guidance the auditor inlines into its system prompt for an\n * iteration. Each entry is a self-contained brief — the same content the\n * standalone ui-issue-finder skill ships, embedded as a string constant so\n * agent-runtime carries no runtime dep on that external workspace.\n \n Briefs are deliberately concrete: they enumerate the SIGNALS to look for\n * and the cross-lens distinctions to respect, so the judge files fewer\n * pile-on findings under generic labels.\n /\n\nimport type { UiLens } from './substrate'\n\n/* @experimental /\nexport const SHARED_AUDITOR_RULES = `\nYou are auditing a UI for a specific class of problems. Stay strictly in your assigned lens — do not file issues that belong to another lens (a separate iteration will catch those).\n\nA finding is only valid if a thoughtful product designer would agree the screenshot shows something that should change. Avoid:\n- Personal taste (\"I'd prefer brand blue\").\n- Hallucinated text or controls you cannot actually see in the screenshot.\n- Suggestions that depend on requirements you don't have access to.\n- Pile-on findings about the same root cause — file ONE finding and use \\`similarTo\\` to link the rest.\n\nRequired for every finding:\n- title: concrete, names the offending element AND what's wrong (NOT \"improve UX\").\n- severity: critical=blocks a core task or accessibility blocker; high=noticeable friction; med=visible polish issue; low=nitpick.\n- observation: 1–3 sentences describing exactly what you see that is wrong.\n- impact: who is affected and how (concrete).\n- suggestedFix: a specific change a developer could apply without asking you back.\n- screenshots: refer to the captures attached to this iteration by path.\n- selector: when you can pin the offending element with a CSS selector.\n\nMost findings are med or low. Reserve high/critical for genuine blockers.\n`.trim()\n\n/* @experimental /\nexport const LENS_BRIEFS: Record<UiLens, string> = {\n consistency: `\nLENS: consistency\nLook for inconsistencies in the design system — things that look like they came from different products glued together.\nSignals: multiple font families, inconsistent weights/sizes for the same role, two shades of \"primary\", arbitrary paddings/margins that don't snap to a scale (4/8/12/16/24), same control with different border-radius or shadow on different pages, mixed icon styles (filled vs outlined), inconsistent button heights/padding for the same variant, inconsistent capitalization (Title Case vs sentence case) for the same role.\nNOT this lens: layout misalignment (use \\`layout\\`), confusing user flow (use \\`ux-flow\\`), contrast/keyboard issues (use \\`accessibility\\`).\nTitle format: \\`Inconsistent <thing> between <A> and <B>\\`.\n`.trim(),\n hierarchy: `\nLENS: hierarchy\nLook for broken visual hierarchy — places where the eye does not land on what matters most.\nSignals: primary CTA same weight as secondary/tertiary controls, headings (H1/H2/H3) nearly the same size, important data buried (headline number smaller than its label), decoration outshining content, too many emphases competing, wrong scan order, missing or overly heavy section dividers.\nNOT this lens: same-role styled differently (\\`consistency\\`), grid/alignment (\\`layout\\`), contrast-failing text (\\`accessibility\\`).\nTitle format: \\`Weak hierarchy: <element> does not read as the <intended-role>\\`.\n`.trim(),\n layout: `\nLENS: layout\nLook for layout and organization problems — alignment, grouping, whitespace, structural choices that hurt scannability.\nSignals: misalignment within rows, inconsistent gutters in grids, orphan whitespace next to crammed regions, poor grouping (related fields separated, unrelated fields adjacent), no visual sections (long wall of content), container overflow (text/content punching out of card boundaries), cramped or oversized hit targets, sidebars/headers sized wrong relative to main content.\nNOT this lens: same-role styled differently (\\`consistency\\`), click-distance/friction (\\`ux-flow\\`), overflow specifically at small viewports (\\`responsive\\`).\nTitle format: \\`<Region> alignment/spacing problem\\` or \\`<Region> grouping unclear\\`.\n`.trim(),\n 'ux-flow': `\nLENS: ux-flow\nLook for interaction-flow friction — action sequences that are slower, more annoying, or more error-prone than necessary.\nSignals: sequential clicks far apart (e.g. Next top-right while user is bottom-left), destructive action adjacent to primary with same weight, confirmations that don't say what's being confirmed, primary CTA below the fold or hidden in a kebab menu, silent state changes (toggle gives no feedback), form ordering that fights real-world order, dead-end states after submit, lost inputs on back-navigation, hidden pre-selected options.\nNOT this lens: visual style only (\\`consistency\\`), component arrangement without a flow problem (\\`layout\\`), microcopy clarity (\\`content\\`).\nTitle format: \\`<Action A> → <Action B> friction: <root cause>\\`.\n`.trim(),\n duplication: `\nLENS: duplication\nLook for redundancy — the same control, link, or piece of content appearing more than once with no good reason.\nSignals: two ways to do the same action on the same screen with no difference, repeated nav (same links in sidebar AND top nav), drifted duplicates (two copies that have diverged), content repeated verbatim, icon + label saying the same thing twice in one row, per-row + bulk actions that overlap confusingly, multiple status indicators conveying the same status.\nNOT this lens: inconsistent styling of duplicates (\\`consistency\\`) — this lens is about the existence of duplicates.\nTitle format: \\`Duplicate <thing> in <location A> and <location B>\\`.\n`.trim(),\n accessibility: `\nLENS: accessibility\nLook for accessibility blockers and degradations. Be conservative — do not assume violations you cannot see.\nSignals: insufficient contrast on body text or controls, missing/invisible focus styles, tiny tap targets (<24px on mobile), color as sole signal (red border with no message), form labels missing or not associated (placeholders standing in for labels), broken heading order (H1 → H4), modals that don't trap focus, decorative elements that take focus, errors not announced, important text rendered inside images.\nNOT this lens: generic \"looks confusing\" (\\`hierarchy\\` or \\`content\\`), layout overflow at small viewports (\\`responsive\\`).\nTitle format: \\`Accessibility: <specific blocker> in <element>\\`.\n`.trim(),\n responsive: `\nLENS: responsive\nLook for layout breakage across viewport sizes — content that works at one width but degrades at another. This iteration's captures should include the same surface at >=2 viewports; compare across them.\nSignals: horizontal scroll where content should reflow, overlapping elements (header overlaps content, fixed footer covers inputs), desktop nav crammed into mobile without collapsing, table columns that don't truncate, tap targets too close at touch sizes, controls vanishing at certain widths, layout flips that break grouping order, modals exceeding viewport height (confirm button unreachable).\nNOT this lens: issues present at every viewport (\\`consistency\\` / \\`hierarchy\\` instead).\nTitle format: \\`<Element/Region> breaks at <viewport>\\`.\n`.trim(),\n states: `\nLENS: states\nLook for missing or broken UI states — the not-happy-paths that make a product feel finished or unfinished. The iteration's captures should depict at least one non-default state.\nSignals: empty lists with no guidance, skeletons that don't match final layout (CLS on settle), error states with no message or recovery action, disabled buttons with no explanation, toasts that disappear before being read, success states that don't confirm, missing hover/focus/active/disabled variants on primary controls, no long-content view, no-permission state broken.\nNOT this lens: generic polish on the happy path (other lenses), missing focus rings specifically (\\`accessibility\\`).\nTitle format: \\`Missing/broken <state> state on <surface>\\`.\n`.trim(),\n content: `\nLENS: content\nLook for microcopy and content problems — text that is unclear, inconsistent, condescending, jargon-heavy, or wrong.\nSignals: jargon/internal language leaking (\"Provisioning a Tenant\" instead of \"Setting up your account\"), inconsistent terminology (workspace vs team), verbose button labels, empty-state copy that's just \"No results\", error messages blaming the user, tone inconsistency, truncation without affordance, mixed date/number formats on one page, placeholder used as a label, \"Saved!\" toast appearing before save completes, typos and grammar errors.\nNOT this lens: visual treatment of text (\\`hierarchy\\` / \\`consistency\\`), missing labels for a11y (\\`accessibility\\`).\nTitle format: \\`Copy: \"<actual text>\" in <location>\\` or \\`Inconsistent term: \"<A>\" vs \"<B>\"\\`.\n`.trim(),\n interaction: `\nLENS: interaction\nLook for interaction quality problems — affordances, feedback, and micro-interactions.\nSignals: no affordance (clickable areas not looking clickable, non-clickable areas looking clickable), missing feedback (>100ms click with no progress), hover surprises (whole row highlights but only title clickable), cursor inconsistency, animations that block input, missing transitions where they're needed (accordion snaps open), drag-and-drop without indicators, scroll-jacking, click-through bugs (card click handler firing alongside button), hover-only revelations on touch.\nNOT this lens: position of controls (\\`layout\\` / \\`ux-flow\\`), missing focus styles (\\`accessibility\\`).\nTitle format: \\`<Action> on <element>: <missing/wrong> feedback\\`.\n`.trim(),\n 'performance-perceived': `\nLENS: performance-perceived\nLook for perceived-performance problems — visible jank a real user would notice, not benchmark numbers. This iteration's captures should include >=2 frames during load to show shift.\nSignals: layout shift (CLS) when late-arriving images/fonts/banners settle, FOUC (flash of unstyled content), font swap jumps, late-loading hero images that shift everything, skeletons that don't match final shape, spinners on instant local actions, loading state reappearing after content paints (refetch on focus), modal open animation longer than the operation it precedes.\nNOT this lens: slow API calls (file separately), stale data after navigation (\\`states\\`).\nTitle format: \\`Layout shift / late paint on <route>: <root cause>\\`.\n`.trim(),\n other: `\nLENS: other\nUse ONLY when a finding is clearly a UI quality issue but does not fit any other lens. Strongly prefer a specific lens — \\`other\\` should be rare. Title must still be concrete.\n`.trim(),\n}\n\n/\n Build a system prompt for a single auditor iteration.\n \n @experimental\n /\nexport function buildAuditorSystemPrompt(lens: UiLens): string {\n const brief = LENS_BRIEFS[lens]\n return `${SHARED_AUDITOR_RULES}\\n\\n${brief}`\n}\n","/\n @experimental\n \n Sandbox-event stream → UiAuditOutput decoder. The custom auditor\n * `LoopSandboxClient` emits events of the form:\n \n { type: 'audit.capture', data: UiAuditCapture }\n * { type: 'audit.finding', data: UiFinding }\n * { type: 'audit.notes', data: { notes: string } }\n * { type: 'audit.lens', data: { lens: UiLens } }\n * { type: 'done', data: { tokenUsage: { ... }, totalCostUsd?: number } }\n \n Other event types are tolerated and ignored. The adapter is pure: it\n * folds an already-collected event array into a UiAuditOutput.\n /\n\nimport type { SandboxEvent } from '@tangle-network/sandbox'\nimport { UI_LENSES, type UiFinding, type UiLens } from './substrate'\nimport type { UiAuditCapture, UiAuditOutput } from './task'\n\n// Build the lens-validation set from the canonical UI_LENSES tuple so adding\n// a lens to the substrate automatically extends the parser; otherwise a new\n// lens would silently fail isUiLens() and parseAuditorEvents would drop\n// every event using it.\nconst KNOWN_LENS_VALUES = new Set<UiLens>(UI_LENSES)\n\nfunction isUiLens(v: unknown): v is UiLens {\n return typeof v === 'string' && KNOWN_LENS_VALUES.has(v as UiLens)\n}\n\n/* @experimental /\nexport function parseAuditorEvents(events: SandboxEvent[]): UiAuditOutput {\n const findings: UiFinding[] = []\n const captures: UiAuditCapture[] = []\n let lens: UiLens \| undefined\n let notes: string \| undefined\n\n for (const evt of events) {\n if (!evt \|\| typeof evt !== 'object') continue\n const type = String(evt.type ?? '')\n const data =\n evt.data && typeof evt.data === 'object' ? (evt.data as Record<string, unknown>) : undefined\n if (!data) continue\n\n switch (type) {\n case 'audit.lens': {\n const v = data.lens\n if (isUiLens(v)) lens = v\n break\n }\n case 'audit.capture': {\n const cap = data as unknown as Partial<UiAuditCapture>\n if (\n typeof cap.path === 'string' &&\n typeof cap.viewport === 'string' &&\n typeof cap.fullPage === 'boolean' &&\n typeof cap.route === 'string' &&\n typeof cap.url === 'string' &&\n typeof cap.capturedAt === 'string'\n ) {\n const out: UiAuditCapture = {\n path: cap.path,\n viewport: cap.viewport,\n fullPage: cap.fullPage,\n route: cap.route,\n url: cap.url,\n capturedAt: cap.capturedAt,\n }\n if (cap.elementSelector) out.elementSelector = cap.elementSelector\n if (cap.label) out.label = cap.label\n captures.push(out)\n }\n break\n }\n case 'audit.finding': {\n const f = data as unknown as Partial<UiFinding>\n // Hard requirement: all the actionable fields must be present and\n // non-empty for a finding to enter the output. The validator does the\n // softer scoring; the adapter only filters structural junk.\n if (\n typeof f.title === 'string' &&\n f.title.trim().length > 0 &&\n isUiLens(f.lens) &&\n typeof f.severity === 'string' &&\n ['low', 'med', 'high', 'critical'].includes(f.severity) &&\n typeof f.route === 'string' &&\n typeof f.observation === 'string' &&\n typeof f.impact === 'string' &&\n typeof f.suggestedFix === 'string' &&\n Array.isArray(f.screenshots)\n ) {\n findings.push(f as UiFinding)\n }\n break\n }\n case 'audit.notes': {\n const n = data.notes\n if (typeof n === 'string' && n.trim().length > 0) notes = n\n break\n }\n default:\n // Tolerate cost/usage events and other backend chatter — extractLlmCallEvent\n // in run-loop.ts handles cost accounting upstream from the adapter.\n break\n }\n }\n\n const out: UiAuditOutput = { lens: lens ?? 'other', findings, captures }\n if (notes) out.notes = notes\n return out\n}\n","/\n @experimental\n \n Auditor validator — scores a single iteration's findings for actionability\n * and gates the iteration result. The kernel uses `valid` + `score` for\n * winner selection across fanned-out iterations and to detect a degenerate\n * iteration (lens-violating findings, no screenshot evidence, no findings\n * at all on a route where we expected some).\n \n Hard fails (`valid = false`):\n * - A finding is filed under a lens that does not match the iteration's\n * lens. The whole iteration is bad — the judge isn't following the\n * lens discipline and the resulting Markdown would mislead reviewers.\n * - A finding has no screenshot reference.\n * - A finding's screenshot references a path that wasn't captured in\n * this iteration.\n \n Score (0..1, max two decimals stable):\n * - 0.4 * specificityRatio — proportion of findings with a selector\n * - 0.4 * evidenceRatio — proportion of findings whose screenshots resolve to captures\n * - 0.2 * (1 - genericTitleRatio) — proportion of findings whose titles\n * are concrete (not \"improve UX\", \"fix layout\", etc.)\n \n An iteration with zero findings scores 0.5 by convention — neither a\n * confident pass nor a hard failure (the judge might just have nothing to\n * say on this lens). The driver decides what to do with it.\n /\n\nimport type { DefaultVerdict } from '@tangle-network/agent-eval'\nimport type { Validator } from '../../runtime/types'\nimport type { UiAuditOutput, UiAuditTask } from './task'\n\nconst GENERIC_TITLE_PATTERNS = [\n /^improve\\s/i,\n /^fix\\s/i,\n /^update\\s/i,\n /^better\\s/i,\n /^bad\\s/i,\n /^make\\s.+\\sbetter/i,\n /\\bUX\\b\\s$/i,\n /\\bUI\\b\\s$/i,\n]\n\nfunction isGenericTitle(title: string): boolean {\n const t = title.trim()\n if (t.length < 16) return true\n return GENERIC_TITLE_PATTERNS.some((re) => re.test(t))\n}\n\n/* @experimental /\nexport function createUiAuditorValidator(task: UiAuditTask): Validator<UiAuditOutput> {\n return {\n async validate(output) {\n const findings = output.findings\n const captures = output.captures\n const capturePaths = new Set(captures.map((c) => c.path))\n\n const offLens = findings.filter((f) => f.lens !== task.lens)\n if (offLens.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${offLens.length} finding(s) filed under wrong lens (expected ${task.lens}; got ${offLens.map((f) => f.lens).join(', ')})`,\n scores: { offLens: 0 },\n }\n return verdict\n }\n\n const missingEvidence = findings.filter(\n (f) => !Array.isArray(f.screenshots) \|\| f.screenshots.length === 0,\n )\n if (missingEvidence.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${missingEvidence.length} finding(s) have no screenshot evidence`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n const unresolvedShot = findings.filter((f) =>\n f.screenshots.some((s) => !capturePaths.has(s.path)),\n )\n if (unresolvedShot.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${unresolvedShot.length} finding(s) reference screenshot paths not captured this iteration`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n if (findings.length === 0) {\n const verdict: DefaultVerdict = {\n valid: true,\n score: 0.5,\n notes: 'No findings reported. Neither a confident pass nor a failure.',\n scores: { specificity: 0, evidence: 1, titles: 1 },\n }\n return verdict\n }\n\n const withSelector = findings.filter((f) => typeof f.selector === 'string').length\n const specificity = withSelector / findings.length\n const generic = findings.filter((f) => isGenericTitle(f.title)).length\n const titles = 1 - generic / findings.length\n // Compute evidence honestly from the data: proportion of findings whose\n // screenshots are all resolvable against this iteration's captures. The\n // guards above hard-fail when this would be < 1, so today the result is\n // always 1; if a future change relaxes those guards into a soft-fail\n // mode, this still produces a truthful evidence ratio rather than a\n // stale constant inflating the score.\n const withFullEvidence = findings.filter(\n (f) =>\n Array.isArray(f.screenshots) &&\n f.screenshots.length > 0 &&\n f.screenshots.every((s) => capturePaths.has(s.path)),\n ).length\n const evidence = withFullEvidence / findings.length\n const score = Number((0.4 specificity + 0.4 * evidence + 0.2 * titles).toFixed(4))\n\n const verdict: DefaultVerdict = {\n valid: true,\n score,\n notes: `${findings.length} finding(s) — specificity=${specificity.toFixed(2)} evidence=${evidence.toFixed(2)} titles=${titles.toFixed(2)}`,\n scores: { specificity, evidence, titles },\n }\n return verdict\n },\n }\n}\n","/*\n @experimental\n \n `uiAuditorProfile` — preset for vision-driven UI audit iterations.\n \n Mirrors the shape of `coderProfile`: returns the `AgentRunSpec`, output\n * adapter, validator, and prompt formatter the loop kernel needs. Unlike\n * `coderProfile`, the agent's \"harness\" is not a sandbox-SDK code-runner\n * — it's a vision-capable judge driving a browser. The loop kernel still\n * iterates `client.create() → box.streamPrompt() → box.delete()`; the\n * client/box pair are provided by `createInProcessUiAuditClient` (in\n * `./in-process-client.ts`) or by a consumer-supplied `LoopSandboxClient`.\n /\n\nimport type { AgentProfile } from '@tangle-network/sandbox'\nimport type { AgentRunSpec, OutputAdapter, Validator } from '../../runtime/types'\nimport { buildAuditorSystemPrompt } from './lens-prompts'\nimport { parseAuditorEvents } from './output-adapter'\nimport { encodeAuditTaskEnvelope, formatAuditorPrompt } from './prompt'\nimport type { UiAuditOutput, UiAuditTask } from './task'\nimport { createUiAuditorValidator } from './validator'\n\n/* @experimental /\nexport interface UiAuditorProfileOptions {\n /\n Stable name surfaced in trace events. Defaults to `ui-auditor`.\n /\n name?: string\n /\n Optional model identifier passed in `AgentProfile.model.default`.\n * The consumer's `LoopSandboxClient` chooses how to interpret it.\n /\n model?: string\n /\n Task bound to the validator. Without it the validator uses the lens\n * embedded in the iteration output as its expectation — fine for one-off\n * use; less strict than passing the task explicitly.\n /\n task?: UiAuditTask\n}\n\n/* @experimental */\nexport function uiAuditorProfile(options: UiAuditorProfileOptions = {}): {\n profile: AgentProfile\n taskToPrompt: (task: UiAuditTask) => string\n output: OutputAdapter<UiAuditOutput>\n validator: Validator<UiAuditOutput>\n agentRunSpec: AgentRunSpec<UiAuditTask>\n} {\n const name = options.name ?? 'ui-auditor'\n\n // Lens is per-task; the profile's system prompt is filled in by the\n // taskToPrompt formatter at iteration time (prefixed to the user\n // message). Keeping the profile lens-agnostic lets one AgentRunSpec\n // serve every lens-iteration of the loop.\n const profile: AgentProfile = {\n name,\n description: 'Vision-driven UI auditor. One lens per iteration.',\n prompt: { systemPrompt: '' },\n model: options.model ? { default: options.model } : undefined,\n tools: { browser: true, vision: true },\n metadata: { role: 'ui-auditor' },\n }\n\n const output: OutputAdapter<UiAuditOutput> = { parse: parseAuditorEvents }\n const validator: Validator<UiAuditOutput> = options.task\n ? createUiAuditorValidator(options.task)\n : createUiAuditorValidator({ lens: 'other', captures: [] })\n\n // Prompt shape (consumed both by sandbox-SDK harnesses AND by the\n // in-process auditor client):\n // <<UI_AUDIT_TASK>>{json}<<UI_AUDIT_TASK_END>>\n // <system-prompt for the lens>\n // <human-readable iteration brief>\n // The envelope makes the iteration self-describing so concurrent fanout\n // does not race over per-client side state. Sandbox-SDK harnesses can\n // ignore the envelope; the in-process auditor client decodes it back\n // into a typed UiAuditTask via decodeAuditTaskEnvelope.\n const taskToPrompt = (task: UiAuditTask): string =>\n `${encodeAuditTaskEnvelope(task)}\\n${buildAuditorSystemPrompt(task.lens)}\\n\\n${formatAuditorPrompt(task)}`\n\n const agentRunSpec: AgentRunSpec<UiAuditTask> = {\n name,\n profile,\n taskToPrompt,\n }\n\n return { profile, taskToPrompt, output, validator, agentRunSpec }\n}\n"],"mappings":";;;;;;;;;;;;;;;AAoBA,IAAM,iBAAiB;AACvB,IAAM,eAAe;AAGd,SAAS,wBAAwB,MAA2B;AACjE,SAAO,GAAG,cAAc,GAAG,KAAK,UAAU,IAAI,CAAC,GAAG,YAAY;AAChE;AASO,SAAS,wBAAwB,QAAyC;AAC/E,QAAM,QAAQ,OAAO,QAAQ,cAAc;AAC3C,MAAI,UAAU,GAAI,QAAO;AACzB,QAAM,eAAe,QAAQ,eAAe;AAC5C,QAAM,MAAM,OAAO,QAAQ,cAAc,YAAY;AACrD,MAAI,QAAQ,GAAI,QAAO;AACvB,QAAM,UAAU,OAAO,MAAM,cAAc,GAAG;AAC9C,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,OAAO;AACjC,QAAI,CAAC,UAAU,OAAO,WAAW,SAAU,QAAO;AAClD,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,SAAS,YAAY,CAAC,MAAM,QAAQ,EAAE,QAAQ,EAAG,QAAO;AACrE,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,oBAAoB,MAA2B;AAC7D,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,qCAAgC,KAAK,IAAI,EAAE;AACtD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,kBAAkB,KAAK,eAAe,KAAK,EAAE,SAAS,GAAG;AAChE,UAAM,KAAK,oBAAoB;AAC/B,UAAM,KAAK,KAAK,eAAe,KAAK,CAAC;AACrC,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,qBAAqB;AAChC,OAAK,SAAS,QAAQ,CAAC,KAAK,MAAM;AAChC,UAAM,KAAK,IAAI,WAAW,GAAG,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,MAAM,KAAK;AAC3E,UAAM,SAAS;AAAA,MACb,YAAY,EAAE;AAAA,MACd,IAAI,WAAW,kBAAkB;AAAA,MACjC,IAAI,kBAAkB,cAAc,IAAI,eAAe,OAAO;AAAA,MAC9D,IAAI,UAAU,aAAa,IAAI,OAAO,OAAO;AAAA,MAC7C,IAAI,WAAW,SAAY,UAAU,IAAI,MAAM,KAAK;AAAA,MACpD,IAAI,QAAQ,SAAS,IAAI,KAAK,KAAK;AAAA,IACrC,EACG,OAAO,CAAC,MAAmB,MAAM,IAAI,EACrC,KAAK,QAAK;AACb,UAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,KAAK,UAAU,IAAI,GAAG,IAAI,SAAS,IAAI,MAAM,MAAM,EAAE,EAAE;AAAA,EAC7F,CAAC;AACD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,mBAAmB,KAAK,gBAAgB,SAAS,GAAG;AAC3D,UAAM,KAAK,uDAAuD;AAClE,UAAM,KAAK,KAAK,gBAAgB,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AACvF,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,kBAAkB;AAC7B,QAAM;AAAA,IACJ;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;;;AClDA,SAAS,eAAkB,MAAc,MAAuB;AAC9D,SAAO,EAAE,MAAM,KAAK;AACtB;AAuDA,IAAM,mBAAmB,EAAE,OAAO,MAAM,QAAQ,IAAI;AACpD,IAAM,iBAAiB;AAEvB,eAAe,gBAAwC;AACrD,QAAM,MAAO,MAAM,OAAO,YAAY;AAGtC,MAAI,CAAC,KAAK,YAAY,OAAO,IAAI,SAAS,WAAW,YAAY;AAC/D,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO,IAAI,SAAS,OAAO,EAAE,UAAU,KAAK,CAAC;AAC/C;AAEA,SAAS,WAAmB;AAC1B,QAAM,IAAI,oBAAI,KAAK;AACnB,QAAM,MAAM,CAAC,MAAc,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG;AAIpD,SACE,GAAG,EAAE,eAAe,CAAC,GAAG,IAAI,EAAE,YAAY,IAAI,CAAC,CAAC,GAAG,IAAI,EAAE,WAAW,CAAC,CAAC,IACnE,IAAI,EAAE,YAAY,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,IACrE,OAAO,EAAE,mBAAmB,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC;AAEvD;AAEA,SAAS,WAAW,KAA+D;AACjF,SAAO,IAAI,YAAY;AACzB;AAEA,SAAS,gBAAgB,KAAoC;AAC3D,QAAM,KAAK,WAAW,GAAG;AACzB,QAAM,YAAY,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK;AACnE,SAAO,GAAG,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK,GAAG,KAAK,IAAI,GAAG,MAAM,GAAG,SAAS,KAAK,SAAS,CAAC;AAC5F;AAEA,SAAS,cAAc,KAAmB;AACxC,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,iDAAiD,KAAK,UAAU,GAAG,CAAC,GAAG;AAAA,EACzF;AAMA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI;AAAA,MACR,uDAAuD,OAAO,QAAQ,OAAO,KAAK,UAAU,GAAG,CAAC;AAAA,IAClG;AAAA,EACF;AACF;AAEA,eAAe,WACb,MACA,KACA,YACA,QACA,WACe;AACf,SAAO,eAAe;AACtB,gBAAc,IAAI,GAAG;AAGrB,QAAM,KAAK,gBAAgB,WAAW,GAAG,CAAC;AAC1C,QAAM,YAAY,cAAc,QAAQ,qBAAqB;AAC7D,QAAM,KAAK,KAAK,IAAI,KAAK,EAAE,WAAW,SAAS,eAAe,CAAC;AAC/D,MAAI,IAAI,SAAS;AACf,UAAM,KAAK,gBAAgB,IAAI,SAAS,EAAE,SAAS,KAAO,CAAC;AAAA,EAC7D;AACA,QAAM,QAAQ,IAAI,UAAU;AAC5B,MAAI,QAAQ,EAAG,OAAM,KAAK,eAAe,KAAK;AAC9C,SAAO,eAAe;AACtB,MAAI,IAAI,iBAAiB;AACvB,UAAM,KAAK,QAAQ,IAAI,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAAA,EACjF,OAAO;AACL,UAAM,KAAK,WAAW,EAAE,MAAM,YAAY,UAAU,IAAI,aAAa,KAAK,CAAC;AAAA,EAC7E;AACF;AAIA,SAAS,gBAAwB;AAC/B,QAAM,OAAO,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACzD,SAAO,YAAY,KAAK,CAAC,GAAG,KAAK,CAAC;AACpC;AAGO,SAAS,6BACd,SAeA;AACA,QAAM,SAAS,QAAQ,iBAAiB;AACxC,QAAM,YAAY,QAAQ,aAAa;AACvC,MAAI;AACJ,MAAI,SAAS;AAEb,iBAAe,aAAqC;AAClD,QAAI,QAAQ;AACV,YAAM,IAAI,MAAM,4EAA4E;AAAA,IAC9F;AACA,QAAI,CAAC,eAAgB,kBAAiB,OAAO;AAC7C,WAAO;AAAA,EACT;AAEA,kBAAgB,aACd,YACA,QAC6B;AAC7B,UAAM,OAAO,wBAAwB,UAAU;AAC/C,QAAI,CAAC,MAAM;AACT,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAK,SAAS,WAAW,GAAG;AAC9B,YAAM,IAAI,MAAM,uDAAuD;AAAA,IACzE;AAEA,UAAM,eAAe,cAAc,EAAE,MAAM,KAAK,KAAK,CAAC;AAEtD,UAAM,UAAU,MAAM,WAAW;AACjC,UAAM,UAAU,MAAM,QAAQ,WAAW,EAAE,UAAU,iBAAiB,CAAC;AAMvE,QAAI;AACJ,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,MAAM,QAAQ,QAAQ;AACnC,YAAM,WAA6B,CAAC;AACpC,YAAM,KAAK,MAAM,OAAO,aAAkB;AAC1C,YAAM,OAAO,MAAM,OAAO,MAAW;AACrC,YAAM,WAAW,KAAK,KAAK,QAAQ,cAAc,aAAa;AAC9D,YAAM,GAAG,MAAM,UAAU,EAAE,WAAW,KAAK,CAAC;AAE5C,iBAAW,OAAO,KAAK,UAAU;AAC/B,eAAO,eAAe;AACtB,cAAM,WAAW,gBAAgB,GAAG;AACpC,cAAM,UAAU,KAAK,KAAK,UAAU,QAAQ;AAC5C,cAAM,UAAU,eAAe,QAAQ;AACvC,cAAM,WAAW,MAAM,KAAK,SAAS,QAAQ,SAAS;AACtD,cAAM,KAAK,WAAW,GAAG;AACzB,cAAM,MAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,UAAU,GAAG,GAAG,KAAK,IAAI,GAAG,MAAM;AAAA,UAClC,UAAU,IAAI,aAAa;AAAA,UAC3B,OAAO,IAAI;AAAA,UACX,KAAK,IAAI;AAAA,UACT,aAAY,oBAAI,KAAK,GAAE,YAAY;AAAA,QACrC;AACA,YAAI,IAAI,gBAAiB,KAAI,kBAAkB,IAAI;AACnD,YAAI,IAAI,MAAO,KAAI,QAAQ,IAAI;AAC/B,iBAAS,KAAK,GAAG;AACjB,cAAM,eAAe,iBAAiB,GAAG;AAAA,MAC3C;AAEA,YAAM,WAAW,MAAM,QAAQ,MAAM;AAAA,QACnC,MAAM,KAAK;AAAA,QACX;AAAA,QACA,gBAAgB,KAAK;AAAA,QACrB,iBAAiB,KAAK;AAAA,QACtB;AAAA,QACA;AAAA,MACF,CAAC;AAED,iBAAW,WAAW,SAAS,UAAU;AACvC,cAAM,eAAe,iBAAiB,OAAO;AAAA,MAC/C;AACA,UAAI,SAAS,SAAS,SAAS,MAAM,KAAK,EAAE,SAAS,GAAG;AACtD,cAAM,eAAe,eAAe,EAAE,OAAO,SAAS,MAAM,CAAC;AAAA,MAC/D;AAEA,YAAM,QAAQ,SAAS,cAAc,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC3D,YAAM,eAAe,QAAQ;AAAA,QAC3B,YAAY;AAAA,UACV,aAAa,MAAM;AAAA,UACnB,cAAc,MAAM;AAAA,QACtB;AAAA,QACA,cAAc,SAAS,WAAW;AAAA,MACpC,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,qBAAe;AAAA,IACjB,UAAE;AACA,UAAI;AACF,cAAM,QAAQ,MAAM;AAAA,MACtB,SAAS,KAAK;AACZ,qBAAa;AAAA,MACf;AAAA,IACF;AAIA,QAAI,iBAAiB,UAAa,eAAe,QAAW;AAC1D,YAAM,IAAI;AAAA,QACR,CAAC,cAAc,UAAU;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AACA,QAAI,iBAAiB,OAAW,OAAM;AACtC,QAAI,eAAe,OAAW,OAAM;AAAA,EACtC;AAEA,WAAS,uBAAyC;AAChD,UAAM,KAAK,cAAc;AACzB,UAAM,WAAW;AAAA,MACf;AAAA,MACA,aAAa,SAAiB,MAA8D;AAC1F,cAAM,SAAS,MAAM,UAAU,IAAI,gBAAgB,EAAE;AACrD,eAAO,aAAa,SAAS,MAAM;AAAA,MACrC;AAAA,MACA,MAAM,SAAwB;AAAA,MAI9B;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,UAAiC;AAC5C,aAAO,qBAAqB;AAAA,IAC9B;AAAA,IACA,kBAAkB,KAAK;AACrB,YAAM,KAAM,IAAmC;AAC/C,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,OAAO,WAAW,KAAK,OAAU;AAAA,IAC/E;AAAA,IACA,MAAM,QAAQ;AACZ,eAAS;AACT,YAAM,UAAU;AAChB,uBAAiB;AACjB,UAAI,SAAS;AACX,cAAM,UAAU,MAAM;AACtB,cAAM,QAAQ,MAAM;AAAA,MACtB;AAAA,IACF;AAAA,EACF;AACF;;;ACjVO,IAAM,uBAAuB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBlC,KAAK;AAGA,IAAM,cAAsC;AAAA,EACjD,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,eAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMf,KAAK;AAAA,EACL,YAAY;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMZ,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMT,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,yBAAyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMzB,KAAK;AAAA,EACL,OAAO;AAAA;AAAA;AAAA,EAGP,KAAK;AACP;AAOO,SAAS,yBAAyB,MAAsB;AAC7D,QAAM,QAAQ,YAAY,IAAI;AAC9B,SAAO,GAAG,oBAAoB;AAAA;AAAA,EAAO,KAAK;AAC5C;;;AC1GA,IAAM,oBAAoB,IAAI,IAAY,SAAS;AAEnD,SAAS,SAAS,GAAyB;AACzC,SAAO,OAAO,MAAM,YAAY,kBAAkB,IAAI,CAAW;AACnE;AAGO,SAAS,mBAAmB,QAAuC;AACxE,QAAM,WAAwB,CAAC;AAC/B,QAAM,WAA6B,CAAC;AACpC,MAAI;AACJ,MAAI;AAEJ,aAAW,OAAO,QAAQ;AACxB,QAAI,CAAC,OAAO,OAAO,QAAQ,SAAU;AACrC,UAAM,OAAO,OAAO,IAAI,QAAQ,EAAE;AAClC,UAAM,OACJ,IAAI,QAAQ,OAAO,IAAI,SAAS,WAAY,IAAI,OAAmC;AACrF,QAAI,CAAC,KAAM;AAEX,YAAQ,MAAM;AAAA,MACZ,KAAK,cAAc;AACjB,cAAM,IAAI,KAAK;AACf,YAAI,SAAS,CAAC,EAAG,QAAO;AACxB;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,MAAM;AACZ,YACE,OAAO,IAAI,SAAS,YACpB,OAAO,IAAI,aAAa,YACxB,OAAO,IAAI,aAAa,aACxB,OAAO,IAAI,UAAU,YACrB,OAAO,IAAI,QAAQ,YACnB,OAAO,IAAI,eAAe,UAC1B;AACA,gBAAMA,OAAsB;AAAA,YAC1B,MAAM,IAAI;AAAA,YACV,UAAU,IAAI;AAAA,YACd,UAAU,IAAI;AAAA,YACd,OAAO,IAAI;AAAA,YACX,KAAK,IAAI;AAAA,YACT,YAAY,IAAI;AAAA,UAClB;AACA,cAAI,IAAI,gBAAiB,CAAAA,KAAI,kBAAkB,IAAI;AACnD,cAAI,IAAI,MAAO,CAAAA,KAAI,QAAQ,IAAI;AAC/B,mBAAS,KAAKA,IAAG;AAAA,QACnB;AACA;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,IAAI;AAIV,YACE,OAAO,EAAE,UAAU,YACnB,EAAE,MAAM,KAAK,EAAE,SAAS,KACxB,SAAS,EAAE,IAAI,KACf,OAAO,EAAE,aAAa,YACtB,CAAC,OAAO,OAAO,QAAQ,UAAU,EAAE,SAAS,EAAE,QAAQ,KACtD,OAAO,EAAE,UAAU,YACnB,OAAO,EAAE,gBAAgB,YACzB,OAAO,EAAE,WAAW,YACpB,OAAO,EAAE,iBAAiB,YAC1B,MAAM,QAAQ,EAAE,WAAW,GAC3B;AACA,mBAAS,KAAK,CAAc;AAAA,QAC9B;AACA;AAAA,MACF;AAAA,MACA,KAAK,eAAe;AAClB,cAAM,IAAI,KAAK;AACf,YAAI,OAAO,MAAM,YAAY,EAAE,KAAK,EAAE,SAAS,EAAG,SAAQ;AAC1D;AAAA,MACF;AAAA,MACA;AAGE;AAAA,IACJ;AAAA,EACF;AAEA,QAAM,MAAqB,EAAE,MAAM,QAAQ,SAAS,UAAU,SAAS;AACvE,MAAI,MAAO,KAAI,QAAQ;AACvB,SAAO;AACT;;;AC9EA,IAAM,yBAAyB;AAAA,EAC7B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,SAAS,eAAe,OAAwB;AAC9C,QAAM,IAAI,MAAM,KAAK;AACrB,MAAI,EAAE,SAAS,GAAI,QAAO;AAC1B,SAAO,uBAAuB,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,CAAC;AACvD;AAGO,SAAS,yBAAyB,MAA6C;AACpF,SAAO;AAAA,IACL,MAAM,SAAS,QAAQ;AACrB,YAAM,WAAW,OAAO;AACxB,YAAM,WAAW,OAAO;AACxB,YAAM,eAAe,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,EAAE,SAAS,KAAK,IAAI;AAC3D,UAAI,QAAQ,SAAS,GAAG;AACtB,cAAMC,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,QAAQ,MAAM,gDAAgD,KAAK,IAAI,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,UAC/H,QAAQ,EAAE,SAAS,EAAE;AAAA,QACvB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,kBAAkB,SAAS;AAAA,QAC/B,CAAC,MAAM,CAAC,MAAM,QAAQ,EAAE,WAAW,KAAK,EAAE,YAAY,WAAW;AAAA,MACnE;AACA,UAAI,gBAAgB,SAAS,GAAG;AAC9B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,gBAAgB,MAAM;AAAA,UAChC,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,iBAAiB,SAAS;AAAA,QAAO,CAAC,MACtC,EAAE,YAAY,KAAK,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACrD;AACA,UAAI,eAAe,SAAS,GAAG;AAC7B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,eAAe,MAAM;AAAA,UAC/B,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,UAAI,SAAS,WAAW,GAAG;AACzB,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO;AAAA,UACP,QAAQ,EAAE,aAAa,GAAG,UAAU,GAAG,QAAQ,EAAE;AAAA,QACnD;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,eAAe,SAAS,OAAO,CAAC,MAAM,OAAO,EAAE,aAAa,QAAQ,EAAE;AAC5E,YAAM,cAAc,eAAe,SAAS;AAC5C,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,eAAe,EAAE,KAAK,CAAC,EAAE;AAChE,YAAM,SAAS,IAAI,UAAU,SAAS;AAOtC,YAAM,mBAAmB,SAAS;AAAA,QAChC,CAAC,MACC,MAAM,QAAQ,EAAE,WAAW,KAC3B,EAAE,YAAY,SAAS,KACvB,EAAE,YAAY,MAAM,CAAC,MAAM,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACvD,EAAE;AACF,YAAM,WAAW,mBAAmB,SAAS;AAC7C,YAAM,QAAQ,QAAQ,MAAM,cAAc,MAAM,WAAW,MAAM,QAAQ,QAAQ,CAAC,CAAC;AAEnF,YAAM,UAA0B;AAAA,QAC9B,OAAO;AAAA,QACP;AAAA,QACA,OAAO,GAAG,SAAS,MAAM,kCAA6B,YAAY,QAAQ,CAAC,CAAC,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW,OAAO,QAAQ,CAAC,CAAC;AAAA,QACxI,QAAQ,EAAE,aAAa,UAAU,OAAO;AAAA,MAC1C;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;;;AC1FO,SAAS,iBAAiB,UAAmC,CAAC,GAMnE;AACA,QAAM,OAAO,QAAQ,QAAQ;AAM7B,QAAM,UAAwB;AAAA,IAC5B;AAAA,IACA,aAAa;AAAA,IACb,QAAQ,EAAE,cAAc,GAAG;AAAA,IAC3B,OAAO,QAAQ,QAAQ,EAAE,SAAS,QAAQ,MAAM,IAAI;AAAA,IACpD,OAAO,EAAE,SAAS,MAAM,QAAQ,KAAK;AAAA,IACrC,UAAU,EAAE,MAAM,aAAa;AAAA,EACjC;AAEA,QAAM,SAAuC,EAAE,OAAO,mBAAmB;AACzE,QAAM,YAAsC,QAAQ,OAChD,yBAAyB,QAAQ,IAAI,IACrC,yBAAyB,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE,CAAC;AAW5D,QAAM,eAAe,CAAC,SACpB,GAAG,wBAAwB,IAAI,CAAC;AAAA,EAAK,yBAAyB,KAAK,IAAI,CAAC;AAAA;AAAA,EAAO,oBAAoB,IAAI,CAAC;AAE1G,QAAM,eAA0C;AAAA,IAC9C;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,cAAc,QAAQ,WAAW,aAAa;AAClE;","names":["out","verdict"]}
1	+ {"version":3,"sources":["../src/profiles/ui-auditor/prompt.ts","../src/profiles/ui-auditor/in-process-client.ts","../src/profiles/ui-auditor/lens-prompts.ts","../src/profiles/ui-auditor/output-adapter.ts","../src/profiles/ui-auditor/validator.ts","../src/profiles/ui-auditor/profile.ts"],"sourcesContent":["/*\n @experimental\n \n Prompt formatter for the auditor profile. `formatAuditorPrompt` produces\n * the user message handed to the iteration — describes the captures to be\n * taken and the lens to apply. The system prompt comes from\n * `buildAuditorSystemPrompt(lens)` (lens-prompts.ts).\n \n The formatter prepends a machine-readable envelope (`<<UI_AUDIT_TASK>>`\n * … `<<UI_AUDIT_TASK_END>>`) carrying a JSON-serialised task. The\n * in-process auditor client recovers the task from this envelope so the\n * iteration is self-describing — robust to concurrent fanout, where any\n * per-client side state (e.g. a \"current task\" register) would race.\n \n The formatter is pure and deterministic — re-run on the same task\n * produces the same prompt. Tests and trace replays rely on this.\n /\n\nimport type { UiAuditTask } from './task'\n\nconst ENVELOPE_BEGIN = '<<UI_AUDIT_TASK>>'\nconst ENVELOPE_END = '<<UI_AUDIT_TASK_END>>'\n\n/* @experimental /\nexport function encodeAuditTaskEnvelope(task: UiAuditTask): string {\n return `${ENVELOPE_BEGIN}${JSON.stringify(task)}${ENVELOPE_END}`\n}\n\n/\n Parse a task envelope back out of a prompt string. Returns undefined if\n * the prompt does not contain a complete envelope OR if the payload is\n * not valid JSON.\n \n @experimental\n /\nexport function decodeAuditTaskEnvelope(prompt: string): UiAuditTask \| undefined {\n const start = prompt.indexOf(ENVELOPE_BEGIN)\n if (start === -1) return undefined\n const payloadStart = start + ENVELOPE_BEGIN.length\n const end = prompt.indexOf(ENVELOPE_END, payloadStart)\n if (end === -1) return undefined\n const payload = prompt.slice(payloadStart, end)\n try {\n const parsed = JSON.parse(payload) as unknown\n if (!parsed \|\| typeof parsed !== 'object') return undefined\n const t = parsed as Partial<UiAuditTask>\n if (typeof t.lens !== 'string' \|\| !Array.isArray(t.captures)) return undefined\n return t as UiAuditTask\n } catch {\n return undefined\n }\n}\n\n/* @experimental /\nexport function formatAuditorPrompt(task: UiAuditTask): string {\n const lines: string[] = []\n lines.push(`# UI audit iteration — lens: ${task.lens}`)\n lines.push('')\n if (task.productContext && task.productContext.trim().length > 0) {\n lines.push('## Product context')\n lines.push(task.productContext.trim())\n lines.push('')\n }\n lines.push('## Captures to take')\n task.captures.forEach((cap, i) => {\n const vp = cap.viewport ? `${cap.viewport.width}x${cap.viewport.height}` : '1280x800 (default)'\n const detail = [\n `viewport=${vp}`,\n cap.fullPage ? 'fullPage=true' : null,\n cap.elementSelector ? `selector=\\`${cap.elementSelector}\\`` : null,\n cap.waitFor ? `waitFor=\\`${cap.waitFor}\\`` : null,\n cap.waitMs !== undefined ? `waitMs=${cap.waitMs}` : null,\n cap.label ? `label=${cap.label}` : null,\n ]\n .filter((s): s is string => s !== null)\n .join(' · ')\n lines.push(`${i + 1}. route=\\`${cap.route}\\` url=${cap.url} ${detail ? `(${detail})` : ''}`)\n })\n lines.push('')\n if (task.knownFindingIds && task.knownFindingIds.length > 0) {\n lines.push('## Known findings (link via similarTo, do not refile)')\n lines.push(task.knownFindingIds.map((n) => `#${String(n).padStart(3, '0')}`).join(', '))\n lines.push('')\n }\n lines.push('## Output format')\n lines.push(\n 'Emit a single JSON object with the shape `{ findings: UiFinding[], notes?: string }` where every finding has the fields enumerated in your system prompt. The screenshots field on each finding must reference the captures above by path. Do not emit findings outside the lens.',\n )\n return lines.join('\\n')\n}\n","/\n @experimental\n \n `createInProcessUiAuditClient` — a `SandboxClient` that drives a\n * Playwright browser in-process and delegates finding identification to a\n * consumer-supplied {@link UiJudge}.\n \n Why this exists: `runLoop` is built around a sandbox-SDK seam — each\n * iteration is `client.create() → box.streamPrompt() → box.delete()`.\n * For UI audit, spinning up a real container running a coding harness\n * per iteration is overkill: the work is one browser capture + one\n * vision LLM call. This client satisfies the kernel contract while\n * doing the audit in-process; no container, no sandbox-SDK backend.\n \n The client owns ONE browser for its lifetime and creates a fresh\n * context per iteration (isolated cookies/storage). Playwright is\n * dynamically imported so consumers who use a different `SandboxClient`\n * — e.g. a fleet executor that drives Playwright remotely — do not pay\n * the peer dep cost.\n \n Concurrency: each iteration's prompt carries a self-describing task\n * envelope (see `prompt.ts`), so concurrent fanout iterations do not race\n * over per-client side state.\n /\n\nimport type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'\nimport type { SandboxClient } from '../../runtime/types'\nimport type { UiJudge } from './judge'\nimport { decodeAuditTaskEnvelope } from './prompt'\nimport { slugify } from './slugify'\nimport type { UiAuditCapture, UiAuditCaptureRequest } from './task'\n\n// All synthetic events the auditor emits flow through this helper. Reason:\n// `SandboxEvent.data` is a sandbox-SDK shape (effectively `Record<string,\n// unknown>`) that our typed payloads (`UiAuditCapture`, `UiFinding`, …) do not\n// satisfy structurally. The cast moves the type-system smell into a single,\n// named, documented call site so the call sites in `runIteration` stay clean.\n// The runtime contract — `{ type, data }` — is what the output adapter reads;\n// the static type is what the kernel collects into `SandboxEvent[]`.\nfunction asSandboxEvent<T>(type: string, data: T): SandboxEvent {\n return { type, data } as unknown as SandboxEvent\n}\n\n/* @experimental /\nexport interface InProcessUiAuditClientOptions {\n /\n Absolute path under which screenshots are written. Each capture lands\n * at `<workspaceDir>/screenshots/<filename>`; finding screenshot paths\n * are workspace-relative (`screenshots/<filename>`).\n /\n workspaceDir: string\n /* The vision judge that turns captures into findings. /\n judge: UiJudge\n /\n Navigation policy.\n \n `'strict'` (default) waits for `networkidle` and fails the iteration\n * if the page does not settle. `'spa'` waits for `domcontentloaded` —\n * use for single-page apps that hold open long-poll/websocket\n * connections and never settle.\n /\n navPolicy?: 'strict' \| 'spa'\n /\n Browser launch override. Default: chromium headless via Playwright.\n * Consumers pass a custom factory to target a remote browser, a\n * different channel, or a fleet adapter.\n /\n launchBrowser?: () => Promise<BrowserHandle>\n}\n\n/* @experimental /\nexport interface BrowserHandle {\n newContext(options?: {\n viewport?: { width: number; height: number }\n }): Promise<BrowserContextHandle>\n close(): Promise<void>\n}\n\n/* @experimental /\nexport interface BrowserContextHandle {\n newPage(): Promise<PageHandle>\n close(): Promise<void>\n}\n\n/* @experimental /\nexport interface PageHandle {\n setViewportSize(size: { width: number; height: number }): Promise<void>\n goto(url: string, options?: { waitUntil?: string; timeout?: number }): Promise<unknown>\n waitForSelector(selector: string, options?: { timeout?: number }): Promise<unknown>\n waitForTimeout(ms: number): Promise<void>\n screenshot(options: { path: string; fullPage?: boolean }): Promise<void>\n locator(selector: string): {\n first(): { screenshot(options: { path: string }): Promise<void> }\n }\n}\n\nconst DEFAULT_VIEWPORT = { width: 1280, height: 800 } as const\nconst NAV_TIMEOUT_MS = 30_000\n\nasync function defaultLaunch(): Promise<BrowserHandle> {\n const mod = (await import('playwright')) as unknown as {\n chromium?: { launch(options?: { headless?: boolean }): Promise<BrowserHandle> }\n }\n if (!mod?.chromium \|\| typeof mod.chromium.launch !== 'function') {\n throw new Error(\n 'ui-auditor: playwright is not installed. Install `playwright` (and run `playwright install chromium`) or pass a custom `launchBrowser` to createInProcessUiAuditClient.',\n )\n }\n return mod.chromium.launch({ headless: true })\n}\n\nfunction nowStamp(): string {\n const d = new Date()\n const pad = (n: number) => String(n).padStart(2, '0')\n // Millisecond resolution so parallel fanout iterations capturing the same\n // route/viewport/label within the same second don't collide on filename and\n // silently overwrite each other.\n return (\n `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}-` +\n `${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}` +\n `-${String(d.getUTCMilliseconds()).padStart(3, '0')}`\n )\n}\n\nfunction viewportOf(req: UiAuditCaptureRequest): { width: number; height: number } {\n return req.viewport ?? DEFAULT_VIEWPORT\n}\n\nfunction captureFilename(req: UiAuditCaptureRequest): string {\n const vp = viewportOf(req)\n const labelPart = req.label ? `--${slugify(req.label, 'label')}` : ''\n return `${slugify(req.route, 'route')}--${vp.width}x${vp.height}${labelPart}--${nowStamp()}.png`\n}\n\nfunction assertHttpUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`ui-auditor: capture url is not parseable (got ${JSON.stringify(url)})`)\n }\n // SSRF defense at the client boundary. The MCP tool already restricts to\n // http(s), but `createInProcessUiAuditClient` is exported and can be wired\n // up directly by consumers (the example does this). A crafted task envelope\n // could otherwise navigate Playwright to `file://`, `data:`, `javascript:`\n // and read local files or execute inline content.\n if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {\n throw new Error(\n `ui-auditor: capture url must use http or https (got ${parsed.protocol} in ${JSON.stringify(url)})`,\n )\n }\n}\n\nasync function captureOne(\n page: PageHandle,\n req: UiAuditCaptureRequest,\n outAbsPath: string,\n signal: AbortSignal,\n navPolicy: 'strict' \| 'spa',\n): Promise<void> {\n signal.throwIfAborted()\n assertHttpUrl(req.url)\n // Apply the per-capture viewport before navigation. The capture metadata\n // and filename both encode this viewport; the rendered page must match.\n await page.setViewportSize(viewportOf(req))\n const waitUntil = navPolicy === 'spa' ? 'domcontentloaded' : 'networkidle'\n await page.goto(req.url, { waitUntil, timeout: NAV_TIMEOUT_MS })\n if (req.waitFor) {\n await page.waitForSelector(req.waitFor, { timeout: 15_000 })\n }\n const extra = req.waitMs ?? 500\n if (extra > 0) await page.waitForTimeout(extra)\n signal.throwIfAborted()\n if (req.elementSelector) {\n await page.locator(req.elementSelector).first().screenshot({ path: outAbsPath })\n } else {\n await page.screenshot({ path: outAbsPath, fullPage: req.fullPage === true })\n }\n}\n\ninterface SyntheticSandbox extends SandboxInstance {}\n\nfunction makeSandboxId(): string {\n const rand = () => Math.random().toString(16).slice(2, 10)\n return `ui-audit-${rand()}${rand()}`\n}\n\n/* @experimental /\nexport function createInProcessUiAuditClient(\n options: InProcessUiAuditClientOptions,\n): SandboxClient & {\n /\n Close the underlying browser. Idempotent.\n \n Contract: callers MUST ensure no iterations are in flight when this is\n * called. The kernel respects this — `runLoop` awaits every iteration\n * before returning, so `await runLoop(...); await client.close()` is the\n * intended pattern (see `examples/ui-audit`). If `close()` is invoked\n * concurrently with a running iteration, the browser teardown will race\n * against in-flight page operations; the iteration will surface an\n * AggregateError carrying both the iteration error and the close error,\n * but no work is lost silently.\n /\n close(): Promise<void>\n} {\n const launch = options.launchBrowser ?? defaultLaunch\n const navPolicy = options.navPolicy ?? 'strict'\n let browserPromise: Promise<BrowserHandle> \| undefined\n let closed = false\n\n async function getBrowser(): Promise<BrowserHandle> {\n if (closed) {\n throw new Error('ui-auditor: client is closed; create a new client to run another iteration')\n }\n if (!browserPromise) browserPromise = launch()\n return browserPromise\n }\n\n async function runIteration(\n promptText: string,\n signal: AbortSignal,\n ): AsyncIterable<SandboxEvent> {\n const task = decodeAuditTaskEnvelope(promptText)\n if (!task) {\n throw new Error(\n 'ui-auditor: prompt is missing a UI_AUDIT_TASK envelope. Use uiAuditorProfile().taskToPrompt to format prompts, or pass an envelope-prefixed prompt manually.',\n )\n }\n if (task.captures.length === 0) {\n throw new Error('ui-auditor: task has zero captures; nothing to audit.')\n }\n\n yield asSandboxEvent('audit.lens', { lens: task.lens })\n\n const browser = await getBrowser()\n const context = await browser.newContext({ viewport: DEFAULT_VIEWPORT })\n // Track both the primary iteration error and any context-close failure so\n // the cleanup path never silently swallows a leaked-context bug AND a\n // close failure never shadows the real iteration error. After the\n // try/catch/finally settles, we rethrow the primary if there was one,\n // otherwise we rethrow the close error.\n let primaryError: unknown\n let closeError: unknown\n try {\n const page = await context.newPage()\n const captures: UiAuditCapture[] = []\n const fs = await import('node:fs/promises')\n const path = await import('node:path')\n const shotsDir = path.join(options.workspaceDir, 'screenshots')\n await fs.mkdir(shotsDir, { recursive: true })\n\n for (const req of task.captures) {\n signal.throwIfAborted()\n const filename = captureFilename(req)\n const absPath = path.join(shotsDir, filename)\n const relPath = `screenshots/${filename}`\n await captureOne(page, req, absPath, signal, navPolicy)\n const vp = viewportOf(req)\n const cap: UiAuditCapture = {\n path: relPath,\n viewport: `${vp.width}x${vp.height}`,\n fullPage: req.fullPage === true,\n route: req.route,\n url: req.url,\n capturedAt: new Date().toISOString(),\n }\n if (req.elementSelector) cap.elementSelector = req.elementSelector\n if (req.label) cap.label = req.label\n captures.push(cap)\n yield asSandboxEvent('audit.capture', cap)\n }\n\n const judgeOut = await options.judge({\n lens: task.lens,\n captures,\n productContext: task.productContext,\n knownFindingIds: task.knownFindingIds,\n promptText,\n signal,\n })\n\n for (const finding of judgeOut.findings) {\n yield asSandboxEvent('audit.finding', finding)\n }\n if (judgeOut.notes && judgeOut.notes.trim().length > 0) {\n yield asSandboxEvent('audit.notes', { notes: judgeOut.notes })\n }\n\n const usage = judgeOut.tokenUsage ?? { input: 0, output: 0 }\n yield asSandboxEvent('done', {\n tokenUsage: {\n inputTokens: usage.input,\n outputTokens: usage.output,\n },\n totalCostUsd: judgeOut.costUsd ?? 0,\n })\n } catch (err) {\n primaryError = err\n } finally {\n try {\n await context.close()\n } catch (err) {\n closeError = err\n }\n }\n // When both the iteration and the cleanup fail, surface both via\n // AggregateError so a leaked context bug is not silently masked by an\n // earlier iteration failure (per the fail-loud doctrine).\n if (primaryError !== undefined && closeError !== undefined) {\n throw new AggregateError(\n [primaryError, closeError],\n 'ui-auditor: iteration failed AND context.close() failed; both errors attached.',\n )\n }\n if (primaryError !== undefined) throw primaryError\n if (closeError !== undefined) throw closeError\n }\n\n function makeSyntheticSandbox(): SyntheticSandbox {\n const id = makeSandboxId()\n const instance = {\n id,\n streamPrompt(message: string, opts?: { signal?: AbortSignal }): AsyncIterable<SandboxEvent> {\n const signal = opts?.signal ?? new AbortController().signal\n return runIteration(message, signal)\n },\n async delete(): Promise<void> {\n // No per-sandbox resources to release; the browser is shared and\n // closed by `client.close()`. Intentionally a no-op so trace-time\n // `box.delete()` succeeds without doing surprising work.\n },\n }\n return instance as unknown as SyntheticSandbox\n }\n\n return {\n async create(_options?: CreateSandboxOptions) {\n return makeSyntheticSandbox()\n },\n describePlacement(box) {\n const id = (box as unknown as { id?: string }).id\n return { kind: 'sibling', sandboxId: typeof id === 'string' ? id : undefined }\n },\n async close() {\n closed = true\n const pending = browserPromise\n browserPromise = undefined\n if (pending) {\n const browser = await pending\n await browser.close()\n }\n },\n }\n}\n","/*\n @experimental\n \n Per-lens guidance the auditor inlines into its system prompt for an\n * iteration. Each entry is a self-contained brief — the same content the\n * standalone ui-issue-finder skill ships, embedded as a string constant so\n * agent-runtime carries no runtime dep on that external workspace.\n \n Briefs are deliberately concrete: they enumerate the SIGNALS to look for\n * and the cross-lens distinctions to respect, so the judge files fewer\n * pile-on findings under generic labels.\n /\n\nimport type { UiLens } from './substrate'\n\n/* @experimental /\nexport const SHARED_AUDITOR_RULES = `\nYou are auditing a UI for a specific class of problems. Stay strictly in your assigned lens — do not file issues that belong to another lens (a separate iteration will catch those).\n\nA finding is only valid if a thoughtful product designer would agree the screenshot shows something that should change. Avoid:\n- Personal taste (\"I'd prefer brand blue\").\n- Hallucinated text or controls you cannot actually see in the screenshot.\n- Suggestions that depend on requirements you don't have access to.\n- Pile-on findings about the same root cause — file ONE finding and use \\`similarTo\\` to link the rest.\n\nRequired for every finding:\n- title: concrete, names the offending element AND what's wrong (NOT \"improve UX\").\n- severity: critical=blocks a core task or accessibility blocker; high=noticeable friction; med=visible polish issue; low=nitpick.\n- observation: 1–3 sentences describing exactly what you see that is wrong.\n- impact: who is affected and how (concrete).\n- suggestedFix: a specific change a developer could apply without asking you back.\n- screenshots: refer to the captures attached to this iteration by path.\n- selector: when you can pin the offending element with a CSS selector.\n\nMost findings are med or low. Reserve high/critical for genuine blockers.\n`.trim()\n\n/* @experimental /\nexport const LENS_BRIEFS: Record<UiLens, string> = {\n consistency: `\nLENS: consistency\nLook for inconsistencies in the design system — things that look like they came from different products glued together.\nSignals: multiple font families, inconsistent weights/sizes for the same role, two shades of \"primary\", arbitrary paddings/margins that don't snap to a scale (4/8/12/16/24), same control with different border-radius or shadow on different pages, mixed icon styles (filled vs outlined), inconsistent button heights/padding for the same variant, inconsistent capitalization (Title Case vs sentence case) for the same role.\nNOT this lens: layout misalignment (use \\`layout\\`), confusing user flow (use \\`ux-flow\\`), contrast/keyboard issues (use \\`accessibility\\`).\nTitle format: \\`Inconsistent <thing> between <A> and <B>\\`.\n`.trim(),\n hierarchy: `\nLENS: hierarchy\nLook for broken visual hierarchy — places where the eye does not land on what matters most.\nSignals: primary CTA same weight as secondary/tertiary controls, headings (H1/H2/H3) nearly the same size, important data buried (headline number smaller than its label), decoration outshining content, too many emphases competing, wrong scan order, missing or overly heavy section dividers.\nNOT this lens: same-role styled differently (\\`consistency\\`), grid/alignment (\\`layout\\`), contrast-failing text (\\`accessibility\\`).\nTitle format: \\`Weak hierarchy: <element> does not read as the <intended-role>\\`.\n`.trim(),\n layout: `\nLENS: layout\nLook for layout and organization problems — alignment, grouping, whitespace, structural choices that hurt scannability.\nSignals: misalignment within rows, inconsistent gutters in grids, orphan whitespace next to crammed regions, poor grouping (related fields separated, unrelated fields adjacent), no visual sections (long wall of content), container overflow (text/content punching out of card boundaries), cramped or oversized hit targets, sidebars/headers sized wrong relative to main content.\nNOT this lens: same-role styled differently (\\`consistency\\`), click-distance/friction (\\`ux-flow\\`), overflow specifically at small viewports (\\`responsive\\`).\nTitle format: \\`<Region> alignment/spacing problem\\` or \\`<Region> grouping unclear\\`.\n`.trim(),\n 'ux-flow': `\nLENS: ux-flow\nLook for interaction-flow friction — action sequences that are slower, more annoying, or more error-prone than necessary.\nSignals: sequential clicks far apart (e.g. Next top-right while user is bottom-left), destructive action adjacent to primary with same weight, confirmations that don't say what's being confirmed, primary CTA below the fold or hidden in a kebab menu, silent state changes (toggle gives no feedback), form ordering that fights real-world order, dead-end states after submit, lost inputs on back-navigation, hidden pre-selected options.\nNOT this lens: visual style only (\\`consistency\\`), component arrangement without a flow problem (\\`layout\\`), microcopy clarity (\\`content\\`).\nTitle format: \\`<Action A> → <Action B> friction: <root cause>\\`.\n`.trim(),\n duplication: `\nLENS: duplication\nLook for redundancy — the same control, link, or piece of content appearing more than once with no good reason.\nSignals: two ways to do the same action on the same screen with no difference, repeated nav (same links in sidebar AND top nav), drifted duplicates (two copies that have diverged), content repeated verbatim, icon + label saying the same thing twice in one row, per-row + bulk actions that overlap confusingly, multiple status indicators conveying the same status.\nNOT this lens: inconsistent styling of duplicates (\\`consistency\\`) — this lens is about the existence of duplicates.\nTitle format: \\`Duplicate <thing> in <location A> and <location B>\\`.\n`.trim(),\n accessibility: `\nLENS: accessibility\nLook for accessibility blockers and degradations. Be conservative — do not assume violations you cannot see.\nSignals: insufficient contrast on body text or controls, missing/invisible focus styles, tiny tap targets (<24px on mobile), color as sole signal (red border with no message), form labels missing or not associated (placeholders standing in for labels), broken heading order (H1 → H4), modals that don't trap focus, decorative elements that take focus, errors not announced, important text rendered inside images.\nNOT this lens: generic \"looks confusing\" (\\`hierarchy\\` or \\`content\\`), layout overflow at small viewports (\\`responsive\\`).\nTitle format: \\`Accessibility: <specific blocker> in <element>\\`.\n`.trim(),\n responsive: `\nLENS: responsive\nLook for layout breakage across viewport sizes — content that works at one width but degrades at another. This iteration's captures should include the same surface at >=2 viewports; compare across them.\nSignals: horizontal scroll where content should reflow, overlapping elements (header overlaps content, fixed footer covers inputs), desktop nav crammed into mobile without collapsing, table columns that don't truncate, tap targets too close at touch sizes, controls vanishing at certain widths, layout flips that break grouping order, modals exceeding viewport height (confirm button unreachable).\nNOT this lens: issues present at every viewport (\\`consistency\\` / \\`hierarchy\\` instead).\nTitle format: \\`<Element/Region> breaks at <viewport>\\`.\n`.trim(),\n states: `\nLENS: states\nLook for missing or broken UI states — the not-happy-paths that make a product feel finished or unfinished. The iteration's captures should depict at least one non-default state.\nSignals: empty lists with no guidance, skeletons that don't match final layout (CLS on settle), error states with no message or recovery action, disabled buttons with no explanation, toasts that disappear before being read, success states that don't confirm, missing hover/focus/active/disabled variants on primary controls, no long-content view, no-permission state broken.\nNOT this lens: generic polish on the happy path (other lenses), missing focus rings specifically (\\`accessibility\\`).\nTitle format: \\`Missing/broken <state> state on <surface>\\`.\n`.trim(),\n content: `\nLENS: content\nLook for microcopy and content problems — text that is unclear, inconsistent, condescending, jargon-heavy, or wrong.\nSignals: jargon/internal language leaking (\"Provisioning a Tenant\" instead of \"Setting up your account\"), inconsistent terminology (workspace vs team), verbose button labels, empty-state copy that's just \"No results\", error messages blaming the user, tone inconsistency, truncation without affordance, mixed date/number formats on one page, placeholder used as a label, \"Saved!\" toast appearing before save completes, typos and grammar errors.\nNOT this lens: visual treatment of text (\\`hierarchy\\` / \\`consistency\\`), missing labels for a11y (\\`accessibility\\`).\nTitle format: \\`Copy: \"<actual text>\" in <location>\\` or \\`Inconsistent term: \"<A>\" vs \"<B>\"\\`.\n`.trim(),\n interaction: `\nLENS: interaction\nLook for interaction quality problems — affordances, feedback, and micro-interactions.\nSignals: no affordance (clickable areas not looking clickable, non-clickable areas looking clickable), missing feedback (>100ms click with no progress), hover surprises (whole row highlights but only title clickable), cursor inconsistency, animations that block input, missing transitions where they're needed (accordion snaps open), drag-and-drop without indicators, scroll-jacking, click-through bugs (card click handler firing alongside button), hover-only revelations on touch.\nNOT this lens: position of controls (\\`layout\\` / \\`ux-flow\\`), missing focus styles (\\`accessibility\\`).\nTitle format: \\`<Action> on <element>: <missing/wrong> feedback\\`.\n`.trim(),\n 'performance-perceived': `\nLENS: performance-perceived\nLook for perceived-performance problems — visible jank a real user would notice, not benchmark numbers. This iteration's captures should include >=2 frames during load to show shift.\nSignals: layout shift (CLS) when late-arriving images/fonts/banners settle, FOUC (flash of unstyled content), font swap jumps, late-loading hero images that shift everything, skeletons that don't match final shape, spinners on instant local actions, loading state reappearing after content paints (refetch on focus), modal open animation longer than the operation it precedes.\nNOT this lens: slow API calls (file separately), stale data after navigation (\\`states\\`).\nTitle format: \\`Layout shift / late paint on <route>: <root cause>\\`.\n`.trim(),\n other: `\nLENS: other\nUse ONLY when a finding is clearly a UI quality issue but does not fit any other lens. Strongly prefer a specific lens — \\`other\\` should be rare. Title must still be concrete.\n`.trim(),\n}\n\n/\n Build a system prompt for a single auditor iteration.\n \n @experimental\n /\nexport function buildAuditorSystemPrompt(lens: UiLens): string {\n const brief = LENS_BRIEFS[lens]\n return `${SHARED_AUDITOR_RULES}\\n\\n${brief}`\n}\n","/\n @experimental\n \n Sandbox-event stream → UiAuditOutput decoder. The custom auditor\n * `SandboxClient` emits events of the form:\n \n { type: 'audit.capture', data: UiAuditCapture }\n * { type: 'audit.finding', data: UiFinding }\n * { type: 'audit.notes', data: { notes: string } }\n * { type: 'audit.lens', data: { lens: UiLens } }\n * { type: 'done', data: { tokenUsage: { ... }, totalCostUsd?: number } }\n \n Other event types are tolerated and ignored. The adapter is pure: it\n * folds an already-collected event array into a UiAuditOutput.\n /\n\nimport type { SandboxEvent } from '@tangle-network/sandbox'\nimport { UI_LENSES, type UiFinding, type UiLens } from './substrate'\nimport type { UiAuditCapture, UiAuditOutput } from './task'\n\n// Build the lens-validation set from the canonical UI_LENSES tuple so adding\n// a lens to the substrate automatically extends the parser; otherwise a new\n// lens would silently fail isUiLens() and parseAuditorEvents would drop\n// every event using it.\nconst KNOWN_LENS_VALUES = new Set<UiLens>(UI_LENSES)\n\nfunction isUiLens(v: unknown): v is UiLens {\n return typeof v === 'string' && KNOWN_LENS_VALUES.has(v as UiLens)\n}\n\n/* @experimental /\nexport function parseAuditorEvents(events: SandboxEvent[]): UiAuditOutput {\n const findings: UiFinding[] = []\n const captures: UiAuditCapture[] = []\n let lens: UiLens \| undefined\n let notes: string \| undefined\n\n for (const evt of events) {\n if (!evt \|\| typeof evt !== 'object') continue\n const type = String(evt.type ?? '')\n const data =\n evt.data && typeof evt.data === 'object' ? (evt.data as Record<string, unknown>) : undefined\n if (!data) continue\n\n switch (type) {\n case 'audit.lens': {\n const v = data.lens\n if (isUiLens(v)) lens = v\n break\n }\n case 'audit.capture': {\n const cap = data as unknown as Partial<UiAuditCapture>\n if (\n typeof cap.path === 'string' &&\n typeof cap.viewport === 'string' &&\n typeof cap.fullPage === 'boolean' &&\n typeof cap.route === 'string' &&\n typeof cap.url === 'string' &&\n typeof cap.capturedAt === 'string'\n ) {\n const out: UiAuditCapture = {\n path: cap.path,\n viewport: cap.viewport,\n fullPage: cap.fullPage,\n route: cap.route,\n url: cap.url,\n capturedAt: cap.capturedAt,\n }\n if (cap.elementSelector) out.elementSelector = cap.elementSelector\n if (cap.label) out.label = cap.label\n captures.push(out)\n }\n break\n }\n case 'audit.finding': {\n const f = data as unknown as Partial<UiFinding>\n // Hard requirement: all the actionable fields must be present and\n // non-empty for a finding to enter the output. The validator does the\n // softer scoring; the adapter only filters structural junk.\n if (\n typeof f.title === 'string' &&\n f.title.trim().length > 0 &&\n isUiLens(f.lens) &&\n typeof f.severity === 'string' &&\n ['low', 'med', 'high', 'critical'].includes(f.severity) &&\n typeof f.route === 'string' &&\n typeof f.observation === 'string' &&\n typeof f.impact === 'string' &&\n typeof f.suggestedFix === 'string' &&\n Array.isArray(f.screenshots)\n ) {\n findings.push(f as UiFinding)\n }\n break\n }\n case 'audit.notes': {\n const n = data.notes\n if (typeof n === 'string' && n.trim().length > 0) notes = n\n break\n }\n default:\n // Tolerate cost/usage events and other backend chatter — extractLlmCallEvent\n // in run-loop.ts handles cost accounting upstream from the adapter.\n break\n }\n }\n\n const out: UiAuditOutput = { lens: lens ?? 'other', findings, captures }\n if (notes) out.notes = notes\n return out\n}\n","/\n @experimental\n \n Auditor validator — scores a single iteration's findings for actionability\n * and gates the iteration result. The kernel uses `valid` + `score` for\n * winner selection across fanned-out iterations and to detect a degenerate\n * iteration (lens-violating findings, no screenshot evidence, no findings\n * at all on a route where we expected some).\n \n Hard fails (`valid = false`):\n * - A finding is filed under a lens that does not match the iteration's\n * lens. The whole iteration is bad — the judge isn't following the\n * lens discipline and the resulting Markdown would mislead reviewers.\n * - A finding has no screenshot reference.\n * - A finding's screenshot references a path that wasn't captured in\n * this iteration.\n \n Score (0..1, max two decimals stable):\n * - 0.4 * specificityRatio — proportion of findings with a selector\n * - 0.4 * evidenceRatio — proportion of findings whose screenshots resolve to captures\n * - 0.2 * (1 - genericTitleRatio) — proportion of findings whose titles\n * are concrete (not \"improve UX\", \"fix layout\", etc.)\n \n An iteration with zero findings scores 0.5 by convention — neither a\n * confident pass nor a hard failure (the judge might just have nothing to\n * say on this lens). The driver decides what to do with it.\n /\n\nimport type { DefaultVerdict } from '@tangle-network/agent-eval'\nimport type { Validator } from '../../runtime/types'\nimport type { UiAuditOutput, UiAuditTask } from './task'\n\nconst GENERIC_TITLE_PATTERNS = [\n /^improve\\s/i,\n /^fix\\s/i,\n /^update\\s/i,\n /^better\\s/i,\n /^bad\\s/i,\n /^make\\s.+\\sbetter/i,\n /\\bUX\\b\\s$/i,\n /\\bUI\\b\\s$/i,\n]\n\nfunction isGenericTitle(title: string): boolean {\n const t = title.trim()\n if (t.length < 16) return true\n return GENERIC_TITLE_PATTERNS.some((re) => re.test(t))\n}\n\n/* @experimental /\nexport function createUiAuditorValidator(task: UiAuditTask): Validator<UiAuditOutput> {\n return {\n async validate(output) {\n const findings = output.findings\n const captures = output.captures\n const capturePaths = new Set(captures.map((c) => c.path))\n\n const offLens = findings.filter((f) => f.lens !== task.lens)\n if (offLens.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${offLens.length} finding(s) filed under wrong lens (expected ${task.lens}; got ${offLens.map((f) => f.lens).join(', ')})`,\n scores: { offLens: 0 },\n }\n return verdict\n }\n\n const missingEvidence = findings.filter(\n (f) => !Array.isArray(f.screenshots) \|\| f.screenshots.length === 0,\n )\n if (missingEvidence.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${missingEvidence.length} finding(s) have no screenshot evidence`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n const unresolvedShot = findings.filter((f) =>\n f.screenshots.some((s) => !capturePaths.has(s.path)),\n )\n if (unresolvedShot.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${unresolvedShot.length} finding(s) reference screenshot paths not captured this iteration`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n if (findings.length === 0) {\n const verdict: DefaultVerdict = {\n valid: true,\n score: 0.5,\n notes: 'No findings reported. Neither a confident pass nor a failure.',\n scores: { specificity: 0, evidence: 1, titles: 1 },\n }\n return verdict\n }\n\n const withSelector = findings.filter((f) => typeof f.selector === 'string').length\n const specificity = withSelector / findings.length\n const generic = findings.filter((f) => isGenericTitle(f.title)).length\n const titles = 1 - generic / findings.length\n // Compute evidence honestly from the data: proportion of findings whose\n // screenshots are all resolvable against this iteration's captures. The\n // guards above hard-fail when this would be < 1, so today the result is\n // always 1; if a future change relaxes those guards into a soft-fail\n // mode, this still produces a truthful evidence ratio rather than a\n // stale constant inflating the score.\n const withFullEvidence = findings.filter(\n (f) =>\n Array.isArray(f.screenshots) &&\n f.screenshots.length > 0 &&\n f.screenshots.every((s) => capturePaths.has(s.path)),\n ).length\n const evidence = withFullEvidence / findings.length\n const score = Number((0.4 specificity + 0.4 * evidence + 0.2 * titles).toFixed(4))\n\n const verdict: DefaultVerdict = {\n valid: true,\n score,\n notes: `${findings.length} finding(s) — specificity=${specificity.toFixed(2)} evidence=${evidence.toFixed(2)} titles=${titles.toFixed(2)}`,\n scores: { specificity, evidence, titles },\n }\n return verdict\n },\n }\n}\n","/*\n @experimental\n \n `uiAuditorProfile` — preset for vision-driven UI audit iterations.\n \n Mirrors the shape of `coderProfile`: returns the `AgentRunSpec`, output\n * adapter, validator, and prompt formatter the loop kernel needs. Unlike\n * `coderProfile`, the agent's \"harness\" is not a sandbox-SDK code-runner\n * — it's a vision-capable judge driving a browser. The loop kernel still\n * iterates `client.create() → box.streamPrompt() → box.delete()`; the\n * client/box pair are provided by `createInProcessUiAuditClient` (in\n * `./in-process-client.ts`) or by a consumer-supplied `SandboxClient`.\n /\n\nimport type { AgentProfile } from '@tangle-network/sandbox'\nimport type { AgentRunSpec, OutputAdapter, Validator } from '../../runtime/types'\nimport { buildAuditorSystemPrompt } from './lens-prompts'\nimport { parseAuditorEvents } from './output-adapter'\nimport { encodeAuditTaskEnvelope, formatAuditorPrompt } from './prompt'\nimport type { UiAuditOutput, UiAuditTask } from './task'\nimport { createUiAuditorValidator } from './validator'\n\n/* @experimental /\nexport interface UiAuditorProfileOptions {\n /\n Stable name surfaced in trace events. Defaults to `ui-auditor`.\n /\n name?: string\n /\n Optional model identifier passed in `AgentProfile.model.default`.\n * The consumer's `SandboxClient` chooses how to interpret it.\n /\n model?: string\n /\n Task bound to the validator. Without it the validator uses the lens\n * embedded in the iteration output as its expectation — fine for one-off\n * use; less strict than passing the task explicitly.\n /\n task?: UiAuditTask\n}\n\n/* @experimental */\nexport function uiAuditorProfile(options: UiAuditorProfileOptions = {}): {\n profile: AgentProfile\n taskToPrompt: (task: UiAuditTask) => string\n output: OutputAdapter<UiAuditOutput>\n validator: Validator<UiAuditOutput>\n agentRunSpec: AgentRunSpec<UiAuditTask>\n} {\n const name = options.name ?? 'ui-auditor'\n\n // Lens is per-task; the profile's system prompt is filled in by the\n // taskToPrompt formatter at iteration time (prefixed to the user\n // message). Keeping the profile lens-agnostic lets one AgentRunSpec\n // serve every lens-iteration of the loop.\n const profile: AgentProfile = {\n name,\n description: 'Vision-driven UI auditor. One lens per iteration.',\n prompt: { systemPrompt: '' },\n model: options.model ? { default: options.model } : undefined,\n tools: { browser: true, vision: true },\n metadata: { role: 'ui-auditor' },\n }\n\n const output: OutputAdapter<UiAuditOutput> = { parse: parseAuditorEvents }\n const validator: Validator<UiAuditOutput> = options.task\n ? createUiAuditorValidator(options.task)\n : createUiAuditorValidator({ lens: 'other', captures: [] })\n\n // Prompt shape (consumed both by sandbox-SDK harnesses AND by the\n // in-process auditor client):\n // <<UI_AUDIT_TASK>>{json}<<UI_AUDIT_TASK_END>>\n // <system-prompt for the lens>\n // <human-readable iteration brief>\n // The envelope makes the iteration self-describing so concurrent fanout\n // does not race over per-client side state. Sandbox-SDK harnesses can\n // ignore the envelope; the in-process auditor client decodes it back\n // into a typed UiAuditTask via decodeAuditTaskEnvelope.\n const taskToPrompt = (task: UiAuditTask): string =>\n `${encodeAuditTaskEnvelope(task)}\\n${buildAuditorSystemPrompt(task.lens)}\\n\\n${formatAuditorPrompt(task)}`\n\n const agentRunSpec: AgentRunSpec<UiAuditTask> = {\n name,\n profile,\n taskToPrompt,\n }\n\n return { profile, taskToPrompt, output, validator, agentRunSpec }\n}\n"],"mappings":";;;;;;;;;;;;;;;AAoBA,IAAM,iBAAiB;AACvB,IAAM,eAAe;AAGd,SAAS,wBAAwB,MAA2B;AACjE,SAAO,GAAG,cAAc,GAAG,KAAK,UAAU,IAAI,CAAC,GAAG,YAAY;AAChE;AASO,SAAS,wBAAwB,QAAyC;AAC/E,QAAM,QAAQ,OAAO,QAAQ,cAAc;AAC3C,MAAI,UAAU,GAAI,QAAO;AACzB,QAAM,eAAe,QAAQ,eAAe;AAC5C,QAAM,MAAM,OAAO,QAAQ,cAAc,YAAY;AACrD,MAAI,QAAQ,GAAI,QAAO;AACvB,QAAM,UAAU,OAAO,MAAM,cAAc,GAAG;AAC9C,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,OAAO;AACjC,QAAI,CAAC,UAAU,OAAO,WAAW,SAAU,QAAO;AAClD,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,SAAS,YAAY,CAAC,MAAM,QAAQ,EAAE,QAAQ,EAAG,QAAO;AACrE,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,oBAAoB,MAA2B;AAC7D,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,qCAAgC,KAAK,IAAI,EAAE;AACtD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,kBAAkB,KAAK,eAAe,KAAK,EAAE,SAAS,GAAG;AAChE,UAAM,KAAK,oBAAoB;AAC/B,UAAM,KAAK,KAAK,eAAe,KAAK,CAAC;AACrC,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,qBAAqB;AAChC,OAAK,SAAS,QAAQ,CAAC,KAAK,MAAM;AAChC,UAAM,KAAK,IAAI,WAAW,GAAG,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,MAAM,KAAK;AAC3E,UAAM,SAAS;AAAA,MACb,YAAY,EAAE;AAAA,MACd,IAAI,WAAW,kBAAkB;AAAA,MACjC,IAAI,kBAAkB,cAAc,IAAI,eAAe,OAAO;AAAA,MAC9D,IAAI,UAAU,aAAa,IAAI,OAAO,OAAO;AAAA,MAC7C,IAAI,WAAW,SAAY,UAAU,IAAI,MAAM,KAAK;AAAA,MACpD,IAAI,QAAQ,SAAS,IAAI,KAAK,KAAK;AAAA,IACrC,EACG,OAAO,CAAC,MAAmB,MAAM,IAAI,EACrC,KAAK,QAAK;AACb,UAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,KAAK,UAAU,IAAI,GAAG,IAAI,SAAS,IAAI,MAAM,MAAM,EAAE,EAAE;AAAA,EAC7F,CAAC;AACD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,mBAAmB,KAAK,gBAAgB,SAAS,GAAG;AAC3D,UAAM,KAAK,uDAAuD;AAClE,UAAM,KAAK,KAAK,gBAAgB,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AACvF,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,kBAAkB;AAC7B,QAAM;AAAA,IACJ;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;;;AClDA,SAAS,eAAkB,MAAc,MAAuB;AAC9D,SAAO,EAAE,MAAM,KAAK;AACtB;AAuDA,IAAM,mBAAmB,EAAE,OAAO,MAAM,QAAQ,IAAI;AACpD,IAAM,iBAAiB;AAEvB,eAAe,gBAAwC;AACrD,QAAM,MAAO,MAAM,OAAO,YAAY;AAGtC,MAAI,CAAC,KAAK,YAAY,OAAO,IAAI,SAAS,WAAW,YAAY;AAC/D,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO,IAAI,SAAS,OAAO,EAAE,UAAU,KAAK,CAAC;AAC/C;AAEA,SAAS,WAAmB;AAC1B,QAAM,IAAI,oBAAI,KAAK;AACnB,QAAM,MAAM,CAAC,MAAc,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG;AAIpD,SACE,GAAG,EAAE,eAAe,CAAC,GAAG,IAAI,EAAE,YAAY,IAAI,CAAC,CAAC,GAAG,IAAI,EAAE,WAAW,CAAC,CAAC,IACnE,IAAI,EAAE,YAAY,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,IACrE,OAAO,EAAE,mBAAmB,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC;AAEvD;AAEA,SAAS,WAAW,KAA+D;AACjF,SAAO,IAAI,YAAY;AACzB;AAEA,SAAS,gBAAgB,KAAoC;AAC3D,QAAM,KAAK,WAAW,GAAG;AACzB,QAAM,YAAY,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK;AACnE,SAAO,GAAG,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK,GAAG,KAAK,IAAI,GAAG,MAAM,GAAG,SAAS,KAAK,SAAS,CAAC;AAC5F;AAEA,SAAS,cAAc,KAAmB;AACxC,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,iDAAiD,KAAK,UAAU,GAAG,CAAC,GAAG;AAAA,EACzF;AAMA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI;AAAA,MACR,uDAAuD,OAAO,QAAQ,OAAO,KAAK,UAAU,GAAG,CAAC;AAAA,IAClG;AAAA,EACF;AACF;AAEA,eAAe,WACb,MACA,KACA,YACA,QACA,WACe;AACf,SAAO,eAAe;AACtB,gBAAc,IAAI,GAAG;AAGrB,QAAM,KAAK,gBAAgB,WAAW,GAAG,CAAC;AAC1C,QAAM,YAAY,cAAc,QAAQ,qBAAqB;AAC7D,QAAM,KAAK,KAAK,IAAI,KAAK,EAAE,WAAW,SAAS,eAAe,CAAC;AAC/D,MAAI,IAAI,SAAS;AACf,UAAM,KAAK,gBAAgB,IAAI,SAAS,EAAE,SAAS,KAAO,CAAC;AAAA,EAC7D;AACA,QAAM,QAAQ,IAAI,UAAU;AAC5B,MAAI,QAAQ,EAAG,OAAM,KAAK,eAAe,KAAK;AAC9C,SAAO,eAAe;AACtB,MAAI,IAAI,iBAAiB;AACvB,UAAM,KAAK,QAAQ,IAAI,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAAA,EACjF,OAAO;AACL,UAAM,KAAK,WAAW,EAAE,MAAM,YAAY,UAAU,IAAI,aAAa,KAAK,CAAC;AAAA,EAC7E;AACF;AAIA,SAAS,gBAAwB;AAC/B,QAAM,OAAO,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACzD,SAAO,YAAY,KAAK,CAAC,GAAG,KAAK,CAAC;AACpC;AAGO,SAAS,6BACd,SAeA;AACA,QAAM,SAAS,QAAQ,iBAAiB;AACxC,QAAM,YAAY,QAAQ,aAAa;AACvC,MAAI;AACJ,MAAI,SAAS;AAEb,iBAAe,aAAqC;AAClD,QAAI,QAAQ;AACV,YAAM,IAAI,MAAM,4EAA4E;AAAA,IAC9F;AACA,QAAI,CAAC,eAAgB,kBAAiB,OAAO;AAC7C,WAAO;AAAA,EACT;AAEA,kBAAgB,aACd,YACA,QAC6B;AAC7B,UAAM,OAAO,wBAAwB,UAAU;AAC/C,QAAI,CAAC,MAAM;AACT,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAK,SAAS,WAAW,GAAG;AAC9B,YAAM,IAAI,MAAM,uDAAuD;AAAA,IACzE;AAEA,UAAM,eAAe,cAAc,EAAE,MAAM,KAAK,KAAK,CAAC;AAEtD,UAAM,UAAU,MAAM,WAAW;AACjC,UAAM,UAAU,MAAM,QAAQ,WAAW,EAAE,UAAU,iBAAiB,CAAC;AAMvE,QAAI;AACJ,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,MAAM,QAAQ,QAAQ;AACnC,YAAM,WAA6B,CAAC;AACpC,YAAM,KAAK,MAAM,OAAO,aAAkB;AAC1C,YAAM,OAAO,MAAM,OAAO,MAAW;AACrC,YAAM,WAAW,KAAK,KAAK,QAAQ,cAAc,aAAa;AAC9D,YAAM,GAAG,MAAM,UAAU,EAAE,WAAW,KAAK,CAAC;AAE5C,iBAAW,OAAO,KAAK,UAAU;AAC/B,eAAO,eAAe;AACtB,cAAM,WAAW,gBAAgB,GAAG;AACpC,cAAM,UAAU,KAAK,KAAK,UAAU,QAAQ;AAC5C,cAAM,UAAU,eAAe,QAAQ;AACvC,cAAM,WAAW,MAAM,KAAK,SAAS,QAAQ,SAAS;AACtD,cAAM,KAAK,WAAW,GAAG;AACzB,cAAM,MAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,UAAU,GAAG,GAAG,KAAK,IAAI,GAAG,MAAM;AAAA,UAClC,UAAU,IAAI,aAAa;AAAA,UAC3B,OAAO,IAAI;AAAA,UACX,KAAK,IAAI;AAAA,UACT,aAAY,oBAAI,KAAK,GAAE,YAAY;AAAA,QACrC;AACA,YAAI,IAAI,gBAAiB,KAAI,kBAAkB,IAAI;AACnD,YAAI,IAAI,MAAO,KAAI,QAAQ,IAAI;AAC/B,iBAAS,KAAK,GAAG;AACjB,cAAM,eAAe,iBAAiB,GAAG;AAAA,MAC3C;AAEA,YAAM,WAAW,MAAM,QAAQ,MAAM;AAAA,QACnC,MAAM,KAAK;AAAA,QACX;AAAA,QACA,gBAAgB,KAAK;AAAA,QACrB,iBAAiB,KAAK;AAAA,QACtB;AAAA,QACA;AAAA,MACF,CAAC;AAED,iBAAW,WAAW,SAAS,UAAU;AACvC,cAAM,eAAe,iBAAiB,OAAO;AAAA,MAC/C;AACA,UAAI,SAAS,SAAS,SAAS,MAAM,KAAK,EAAE,SAAS,GAAG;AACtD,cAAM,eAAe,eAAe,EAAE,OAAO,SAAS,MAAM,CAAC;AAAA,MAC/D;AAEA,YAAM,QAAQ,SAAS,cAAc,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC3D,YAAM,eAAe,QAAQ;AAAA,QAC3B,YAAY;AAAA,UACV,aAAa,MAAM;AAAA,UACnB,cAAc,MAAM;AAAA,QACtB;AAAA,QACA,cAAc,SAAS,WAAW;AAAA,MACpC,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,qBAAe;AAAA,IACjB,UAAE;AACA,UAAI;AACF,cAAM,QAAQ,MAAM;AAAA,MACtB,SAAS,KAAK;AACZ,qBAAa;AAAA,MACf;AAAA,IACF;AAIA,QAAI,iBAAiB,UAAa,eAAe,QAAW;AAC1D,YAAM,IAAI;AAAA,QACR,CAAC,cAAc,UAAU;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AACA,QAAI,iBAAiB,OAAW,OAAM;AACtC,QAAI,eAAe,OAAW,OAAM;AAAA,EACtC;AAEA,WAAS,uBAAyC;AAChD,UAAM,KAAK,cAAc;AACzB,UAAM,WAAW;AAAA,MACf;AAAA,MACA,aAAa,SAAiB,MAA8D;AAC1F,cAAM,SAAS,MAAM,UAAU,IAAI,gBAAgB,EAAE;AACrD,eAAO,aAAa,SAAS,MAAM;AAAA,MACrC;AAAA,MACA,MAAM,SAAwB;AAAA,MAI9B;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,UAAiC;AAC5C,aAAO,qBAAqB;AAAA,IAC9B;AAAA,IACA,kBAAkB,KAAK;AACrB,YAAM,KAAM,IAAmC;AAC/C,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,OAAO,WAAW,KAAK,OAAU;AAAA,IAC/E;AAAA,IACA,MAAM,QAAQ;AACZ,eAAS;AACT,YAAM,UAAU;AAChB,uBAAiB;AACjB,UAAI,SAAS;AACX,cAAM,UAAU,MAAM;AACtB,cAAM,QAAQ,MAAM;AAAA,MACtB;AAAA,IACF;AAAA,EACF;AACF;;;ACjVO,IAAM,uBAAuB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBlC,KAAK;AAGA,IAAM,cAAsC;AAAA,EACjD,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,eAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMf,KAAK;AAAA,EACL,YAAY;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMZ,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMT,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,yBAAyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMzB,KAAK;AAAA,EACL,OAAO;AAAA;AAAA;AAAA,EAGP,KAAK;AACP;AAOO,SAAS,yBAAyB,MAAsB;AAC7D,QAAM,QAAQ,YAAY,IAAI;AAC9B,SAAO,GAAG,oBAAoB;AAAA;AAAA,EAAO,KAAK;AAC5C;;;AC1GA,IAAM,oBAAoB,IAAI,IAAY,SAAS;AAEnD,SAAS,SAAS,GAAyB;AACzC,SAAO,OAAO,MAAM,YAAY,kBAAkB,IAAI,CAAW;AACnE;AAGO,SAAS,mBAAmB,QAAuC;AACxE,QAAM,WAAwB,CAAC;AAC/B,QAAM,WAA6B,CAAC;AACpC,MAAI;AACJ,MAAI;AAEJ,aAAW,OAAO,QAAQ;AACxB,QAAI,CAAC,OAAO,OAAO,QAAQ,SAAU;AACrC,UAAM,OAAO,OAAO,IAAI,QAAQ,EAAE;AAClC,UAAM,OACJ,IAAI,QAAQ,OAAO,IAAI,SAAS,WAAY,IAAI,OAAmC;AACrF,QAAI,CAAC,KAAM;AAEX,YAAQ,MAAM;AAAA,MACZ,KAAK,cAAc;AACjB,cAAM,IAAI,KAAK;AACf,YAAI,SAAS,CAAC,EAAG,QAAO;AACxB;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,MAAM;AACZ,YACE,OAAO,IAAI,SAAS,YACpB,OAAO,IAAI,aAAa,YACxB,OAAO,IAAI,aAAa,aACxB,OAAO,IAAI,UAAU,YACrB,OAAO,IAAI,QAAQ,YACnB,OAAO,IAAI,eAAe,UAC1B;AACA,gBAAMA,OAAsB;AAAA,YAC1B,MAAM,IAAI;AAAA,YACV,UAAU,IAAI;AAAA,YACd,UAAU,IAAI;AAAA,YACd,OAAO,IAAI;AAAA,YACX,KAAK,IAAI;AAAA,YACT,YAAY,IAAI;AAAA,UAClB;AACA,cAAI,IAAI,gBAAiB,CAAAA,KAAI,kBAAkB,IAAI;AACnD,cAAI,IAAI,MAAO,CAAAA,KAAI,QAAQ,IAAI;AAC/B,mBAAS,KAAKA,IAAG;AAAA,QACnB;AACA;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,IAAI;AAIV,YACE,OAAO,EAAE,UAAU,YACnB,EAAE,MAAM,KAAK,EAAE,SAAS,KACxB,SAAS,EAAE,IAAI,KACf,OAAO,EAAE,aAAa,YACtB,CAAC,OAAO,OAAO,QAAQ,UAAU,EAAE,SAAS,EAAE,QAAQ,KACtD,OAAO,EAAE,UAAU,YACnB,OAAO,EAAE,gBAAgB,YACzB,OAAO,EAAE,WAAW,YACpB,OAAO,EAAE,iBAAiB,YAC1B,MAAM,QAAQ,EAAE,WAAW,GAC3B;AACA,mBAAS,KAAK,CAAc;AAAA,QAC9B;AACA;AAAA,MACF;AAAA,MACA,KAAK,eAAe;AAClB,cAAM,IAAI,KAAK;AACf,YAAI,OAAO,MAAM,YAAY,EAAE,KAAK,EAAE,SAAS,EAAG,SAAQ;AAC1D;AAAA,MACF;AAAA,MACA;AAGE;AAAA,IACJ;AAAA,EACF;AAEA,QAAM,MAAqB,EAAE,MAAM,QAAQ,SAAS,UAAU,SAAS;AACvE,MAAI,MAAO,KAAI,QAAQ;AACvB,SAAO;AACT;;;AC9EA,IAAM,yBAAyB;AAAA,EAC7B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,SAAS,eAAe,OAAwB;AAC9C,QAAM,IAAI,MAAM,KAAK;AACrB,MAAI,EAAE,SAAS,GAAI,QAAO;AAC1B,SAAO,uBAAuB,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,CAAC;AACvD;AAGO,SAAS,yBAAyB,MAA6C;AACpF,SAAO;AAAA,IACL,MAAM,SAAS,QAAQ;AACrB,YAAM,WAAW,OAAO;AACxB,YAAM,WAAW,OAAO;AACxB,YAAM,eAAe,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,EAAE,SAAS,KAAK,IAAI;AAC3D,UAAI,QAAQ,SAAS,GAAG;AACtB,cAAMC,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,QAAQ,MAAM,gDAAgD,KAAK,IAAI,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,UAC/H,QAAQ,EAAE,SAAS,EAAE;AAAA,QACvB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,kBAAkB,SAAS;AAAA,QAC/B,CAAC,MAAM,CAAC,MAAM,QAAQ,EAAE,WAAW,KAAK,EAAE,YAAY,WAAW;AAAA,MACnE;AACA,UAAI,gBAAgB,SAAS,GAAG;AAC9B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,gBAAgB,MAAM;AAAA,UAChC,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,iBAAiB,SAAS;AAAA,QAAO,CAAC,MACtC,EAAE,YAAY,KAAK,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACrD;AACA,UAAI,eAAe,SAAS,GAAG;AAC7B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,eAAe,MAAM;AAAA,UAC/B,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,UAAI,SAAS,WAAW,GAAG;AACzB,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO;AAAA,UACP,QAAQ,EAAE,aAAa,GAAG,UAAU,GAAG,QAAQ,EAAE;AAAA,QACnD;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,eAAe,SAAS,OAAO,CAAC,MAAM,OAAO,EAAE,aAAa,QAAQ,EAAE;AAC5E,YAAM,cAAc,eAAe,SAAS;AAC5C,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,eAAe,EAAE,KAAK,CAAC,EAAE;AAChE,YAAM,SAAS,IAAI,UAAU,SAAS;AAOtC,YAAM,mBAAmB,SAAS;AAAA,QAChC,CAAC,MACC,MAAM,QAAQ,EAAE,WAAW,KAC3B,EAAE,YAAY,SAAS,KACvB,EAAE,YAAY,MAAM,CAAC,MAAM,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACvD,EAAE;AACF,YAAM,WAAW,mBAAmB,SAAS;AAC7C,YAAM,QAAQ,QAAQ,MAAM,cAAc,MAAM,WAAW,MAAM,QAAQ,QAAQ,CAAC,CAAC;AAEnF,YAAM,UAA0B;AAAA,QAC9B,OAAO;AAAA,QACP;AAAA,QACA,OAAO,GAAG,SAAS,MAAM,kCAA6B,YAAY,QAAQ,CAAC,CAAC,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW,OAAO,QAAQ,CAAC,CAAC;AAAA,QACxI,QAAQ,EAAE,aAAa,UAAU,OAAO;AAAA,MAC1C;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;;;AC1FO,SAAS,iBAAiB,UAAmC,CAAC,GAMnE;AACA,QAAM,OAAO,QAAQ,QAAQ;AAM7B,QAAM,UAAwB;AAAA,IAC5B;AAAA,IACA,aAAa;AAAA,IACb,QAAQ,EAAE,cAAc,GAAG;AAAA,IAC3B,OAAO,QAAQ,QAAQ,EAAE,SAAS,QAAQ,MAAM,IAAI;AAAA,IACpD,OAAO,EAAE,SAAS,MAAM,QAAQ,KAAK;AAAA,IACrC,UAAU,EAAE,MAAM,aAAa;AAAA,EACjC;AAEA,QAAM,SAAuC,EAAE,OAAO,mBAAmB;AACzE,QAAM,YAAsC,QAAQ,OAChD,yBAAyB,QAAQ,IAAI,IACrC,yBAAyB,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE,CAAC;AAW5D,QAAM,eAAe,CAAC,SACpB,GAAG,wBAAwB,IAAI,CAAC;AAAA,EAAK,yBAAyB,KAAK,IAAI,CAAC;AAAA;AAAA,EAAO,oBAAoB,IAAI,CAAC;AAE1G,QAAM,eAA0C;AAAA,IAC9C;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,cAAc,QAAQ,WAAW,aAAa;AAClE;","names":["out","verdict"]}

package/dist/{run-loop--hSoIknW.d.ts → run-loop-CU2Y00Si.d.ts} RENAMED Viewed

@@ -1,5 +1,5 @@
 import { SandboxInstance } from '@tangle-network/sandbox';
-import { D as Driver, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, I as Iteration, L as LoopWinner, a as LoopLineageOptions, b as LoopSandboxClient, c as LoopResult } from './types-DdzkffAm.js';
+import { D as Driver, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, I as Iteration, L as LoopWinner, a as LoopLineageOptions, S as SandboxClient, b as LoopResult } from './types-DnYoHvvZ.js';
 /**
  * @experimental
@@ -99,7 +99,7 @@ declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, O
  * it) and merges `sandboxOverrides`. Shared by the loop kernel and the
  * `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
  */
-declare function createSandboxForSpec<Task>(client: LoopSandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
+declare function createSandboxForSpec<Task>(client: SandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
 /**
  * The kernel's winner argmax — best-valid-score, ties broken by earliest index,
  * falling back to the best-scoring non-errored output when none is valid. Exported

package/dist/runtime.d.ts CHANGED Viewed

@@ -1,15 +1,15 @@
 import { AgentProfile as AgentProfile$1, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
 export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
-import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec, E as ExecutorRegistry, B as Budget, A as Agent, e as RootHandle, f as SupervisedResult, g as Spend, S as Scope, U as UsageEvent, L as LeafExecutorFactory, h as Supervisor } from './types-1HbsFa7H.js';
-export { i as ExecutorContext, H as Handle, j as LeafExecutor, k as LeafResult, l as NodeSnapshot, m as NodeStatus, n as Restart, o as RootSignal, p as Runtime, q as SpawnOpts, r as SupervisorOpts, W as WidenGate } from './types-1HbsFa7H.js';
-export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDynamicDriverOptions, D as DynamicDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDynamicDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './dynamic-BvllHV6M.js';
+import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, E as ExecutorFactory, d as AgentSpec, e as ExecutorRegistry, B as Budget, A as Agent, f as RootHandle, g as SupervisedResult, h as Spend, S as Scope, U as UsageEvent, i as Supervisor } from './types-BfoeiQRZ.js';
+export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-BfoeiQRZ.js';
+export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-C-mtBo7h.js';
+import { S as SandboxClient, b as LoopResult, c as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DnYoHvvZ.js';
+export { D as Driver, C as LoopDecisionPayload, F as LoopEndedPayload, G as LoopIterationDispatchPayload, H as LoopIterationEndedPayload, J as LoopIterationStartedPayload, a as LoopLineageOptions, M as LoopPlanDescription, N as LoopPlanPayload, f as LoopSandboxPlacement, P as LoopStartedPayload, Q as LoopTeardownFailedPayload, e as LoopTraceEmitter, T as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-DnYoHvvZ.js';
 import { AgentProfile, AnalystFinding, DefaultVerdict } from '@tangle-network/agent-eval';
 export { DefaultVerdict } from '@tangle-network/agent-eval';
 import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
-import { R as RunLoopOptions } from './run-loop--hSoIknW.js';
-export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop--hSoIknW.js';
-import { b as LoopSandboxClient, c as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DdzkffAm.js';
-export { D as Driver, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
+import { R as RunLoopOptions } from './run-loop-CU2Y00Si.js';
+export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-CU2Y00Si.js';
 import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
 /**
@@ -113,6 +113,13 @@ declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore,
  */
 declare function materializeTreeView(events: SpawnEvent[]): TreeView;
+/**
+ * Adapt an `ExecutorFactory` into a `SandboxClient` for `runLoop`. The factory is
+ * instantiated fresh per `streamPrompt` (mirrors the per-spawn executor lifecycle):
+ * run once on the prompt, emit the terminal result event, tear down.
+ */
+declare function inlineSandboxClient(factory: ExecutorFactory<unknown>): SandboxClient;
 /**
  * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
  *
@@ -146,7 +153,7 @@ declare function materializeTreeView(events: SpawnEvent[]): TreeView;
 type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
 interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
     /** Sandbox client used for every cell's `runLoop`. Supplied once. */
-    sandboxClient: LoopSandboxClient;
+    sandboxClient: SandboxClient;
     /** Build the per-cell runLoop options from the scenario (+ profile, when
      *  used with `runProfileMatrix`). */
     toLoopOptions: (scenario: TScenario, profile: AgentProfile) => LoopOptionsForDispatch<Task, Output, Decision>;
@@ -317,7 +324,7 @@ interface ShapeContext<D = unknown> {
      * Wrap an `AgentSpec` into a leaf `Agent` carrying it as `executorSpec`, so the shape can
      * `scope.spawn(spawnChild(spec), task, opts)`. `name` labels the child for traces. The
      * returned agent's `act` is never invoked by the keystone (it is spawned, not run) — the
-     * spec drives the resolved `LeafExecutor`; `act` exists only to satisfy the `Agent` shape.
+     * spec drives the resolved `Executor`; `act` exists only to satisfy the `Agent` shape.
      */
     spawnChild(name: string, spec: AgentSpec): Agent<unknown, Outcome<D>>;
     /** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
@@ -1299,7 +1306,7 @@ interface AcquireOptions {
     sleep?: (ms: number) => Promise<void>;
 }
 /** @experimental */
-declare function acquireSandbox(client: LoopSandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
+declare function acquireSandbox(client: SandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
 /**
  * @experimental
@@ -1340,9 +1347,9 @@ interface SandboxCapabilities {
  *
  * @experimental
  */
-declare function probeSandboxCapabilities(client: LoopSandboxClient): Promise<SandboxCapabilities>;
+declare function probeSandboxCapabilities(client: SandboxClient): Promise<SandboxCapabilities>;
 /**
- * Narrowed view of the optional CRIU probe. The loop-side `LoopSandboxClient`
+ * Narrowed view of the optional CRIU probe. The loop-side `SandboxClient`
  * does not require `criuStatus`; this widens it optionally so the probe can be
  * read without importing sandbox-backend specifics. @experimental
  */
@@ -1514,12 +1521,13 @@ interface SandboxLineage {
  *
  * @experimental
  */
-declare function createSandboxLineage(client: LoopSandboxClient, capabilities: SandboxCapabilities, options?: {
+declare function createSandboxLineage(client: SandboxClient, capabilities: SandboxCapabilities, options?: {
     maxConcurrency?: number;
+    streaming?: 'sse' | 'poll';
 }): SandboxLineage;
 /**
  * Loop-side widening of the box's optional checkpoint method. The
- * `LoopSandboxClient`/`SandboxInstance` surface the kernel relies on does not
+ * `SandboxClient`/`SandboxInstance` surface the kernel relies on does not
  * require checkpointing; this reads it optionally so the lineage can probe-gate
  * without importing sandbox-backend specifics. @experimental
  */
@@ -1550,6 +1558,104 @@ interface SessionCapableBox {
     };
 }
+/**
+ * `openSandboxRun` — the ONE harness-agnostic seam for running an agent in a
+ * sandbox over a persistent artifact: run it, stream it, RESUME the same session
+ * across turns. Domain-agnostic: a coding agent, a research agent, a tax/legal
+ * agent — all flow through this; the domain lives only in the `Deliverable<Out>`
+ * the caller supplies, never in a per-domain copy of this function.
+ *
+ * It is a thin facade (NOT a new layer) over code that already exists and is
+ * already hardened:
+ *   - `acquireSandbox` — cold-start / 502-503-504 / gateway-timeout recovery,
+ *   - `buildBackendOptions` — the harness IS `backend.type` (opencode / codex /
+ *     claude-code / kimi-code / hermes / pi); the only "which agent" knob,
+ *   - `createSandboxLineage` — `start` mints a session; `resume` continues the
+ *     SAME server-side session with a fail-loud `assertSessionLive`.
+ *
+ * The one genuinely-new piece is {@link Deliverable}: it widens the pure
+ * `OutputAdapter.parse(events)` to ALSO admit a post-turn read off the box FS —
+ * the structural gap that made the bench gates hand-roll `box.fs.read`, because a
+ * large produced file (a git diff, a generated document) truncates in the chat
+ * stream and a pure events-parser cannot reach the workspace. Per the SDK, a
+ * RELATIVE `deliverable.path` resolves from the workspace root and an ABSOLUTE one
+ * (e.g. `/tmp/solution.patch`) reads the container filesystem directly — both are
+ * valid; pick the one the agent actually wrote to. Avoid `..` traversal segments.
+ *
+ * What this deliberately does NOT do (so it stays a facade, not slop): no custom
+ * reconnect/replay (the SDK + platform own per-session buffering + `Last-Event-ID`);
+ * no fork verb (platform CRIU is probe-gated and currently absent — fork lives in
+ * `SandboxLineage.fork` behind the capability probe, surfaced only if it returns).
+ * It is also distinct from `runLoop`: `runLoop` is the multi-round, driver-driven
+ * kernel (fresh box per round, events deliverable); this is a SINGLE rollout +
+ * artifact-or-events deliverable + resume over ONE persistent box.
+ */
+/**
+ * @experimental
+ * How a typed deliverable `Out` is materialized from a finished turn.
+ * - `events`   — pure parse over the event array (identical to `OutputAdapter`).
+ * - `artifact` — read a file off the box AFTER the turn drains, then map it (+ the
+ *                events). For diffs/codebases/documents that don't fit the chat
+ *                stream. `path` relative ⇒ workspace root; absolute ⇒ container FS.
+ */
+type Deliverable<Out> = {
+    kind: 'events';
+    fromEvents: (events: SandboxEvent[]) => Out;
+} | {
+    kind: 'artifact';
+    path: string;
+    fromArtifact: (raw: string, events: SandboxEvent[]) => Out;
+};
+/**
+ * @experimental
+ * One finished turn over the artifact. A failed FS read is surfaced in `readError`
+ * (never masked as an empty deliverable) so a caller distinguishes "agent produced
+ * nothing" from a transport/FS fault.
+ */
+interface TurnResult<Out> {
+    out: Out;
+    events: SandboxEvent[];
+    readError?: string;
+}
+/** @experimental A live run over ONE persistent artifact (box + session). Close it
+ *  when done — `close()` tears the box down. */
+interface SandboxRun<Out> {
+    readonly box: SandboxInstance;
+    readonly sessionId: string;
+    /** First turn over the fresh box (mints the session). Throws if already started. */
+    start(prompt: string): Promise<TurnResult<Out>>;
+    /** Continue THE SAME session over THE SAME artifact — a resumed turn/rollout. */
+    resume(prompt: string): Promise<TurnResult<Out>>;
+    close(): Promise<void>;
+}
+/** @experimental */
+interface OpenSandboxRunOptions {
+    /** Profile + sandbox env/overrides. `sandboxOverrides.backend.type` is the harness. */
+    agentRun: AgentRunSpec<string>;
+    signal: AbortSignal;
+    /** Optional execution-scoped observers. Hook failures never fail the run. */
+    hooks?: RuntimeHooks;
+    /** Stable run id for trace joins. Defaults to a short runtime-minted id. */
+    runId?: string;
+    /** Optional benchmark/scenario id carried into emitted hook events. */
+    scenarioId?: string;
+    /** Test seam for deterministic hook timestamps. Defaults to `Date.now`. */
+    now?: () => number;
+    /** Bounds box-creation bursts inside lineage fanout. Default from lineage. */
+    maxConcurrency?: number;
+    /** Base backoff (ms) for retrying a transient artifact `fs.read` failure; the i-th
+     *  retry waits `readRetryDelayMs * i`. Default 1000. Set 0 to disable the wait (tests). */
+    readRetryDelayMs?: number;
+}
+/**
+ * @experimental
+ * Open a sandbox run. Harness-agnostic: the harness lives in
+ * `options.agentRun.sandboxOverrides.backend.type`, so opencode/codex/claude-code/
+ * kimi-code all flow through this one entrypoint with identical env/auth wiring.
+ */
+declare function openSandboxRun<Out>(client: SandboxClient, options: OpenSandboxRunOptions, deliverable: Deliverable<Out>): Promise<SandboxRun<Out>>;
 /**
  * @experimental
  *
@@ -1635,7 +1741,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
 /**
  * @experimental
  *
- * The leaf runtime — the built-in `LeafExecutor` IMPLEMENTATIONS behind the ONE
+ * The leaf runtime — the built-in `Executor` IMPLEMENTATIONS behind the ONE
  * open interface frozen in `./types`, plus the open resolver/registry that maps
  * an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
  *
@@ -1649,7 +1755,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
  *                     excluded from the equal-k arms by construction (streaming).
  * Every metered runtime reports through the SAME normalized `UsageEvent` channel
  * so the conserved budget pool meters them identically. A user's own agent is
- * first-class the moment it implements `LeafExecutor` — register it by name or
+ * first-class the moment it implements `Executor` — register it by name or
  * pass it as `AgentSpec.executor`.
  *
  * Layering: `estimateCost`/`isModelPriced` are substrate primitives from
@@ -1675,7 +1781,7 @@ interface RouterSeam {
  * checkpoint/fork.
  */
 interface SandboxSeam {
-    sandboxClient: LoopSandboxClient;
+    sandboxClient: SandboxClient;
     /** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
     loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>;
     /** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
@@ -1694,40 +1800,38 @@ interface CliSeam {
     cwd?: string;
 }
 /**
- * A direct OpenAI-compatible Router chat-completion. One-shot: resolves a
- * `LeafResult` and reports its terminal usage as `UsageEvent`s through the
- * conserved pool. Reports REAL token usage — when the provider omits `usage`,
- * the spend records zero tokens but the call still counts one iteration (a
- * phantom fabricated 0 is never emitted as a priced cost).
- *
- * NOTE for the Integrate phase: this duplicates the minimal body of
- * `bench/src/router-client.ts#routerChatWithUsage`. `bench/` is a sub-package
- * outside this package's `rootDir: "src"`, so it cannot be imported here without
- * breaking the build. Integrate should lift that helper into `src/loops/` and
- * have both call sites share it (do not re-copy a third time).
- */
-declare const routerInlineExecutor: LeafExecutorFactory<unknown>;
-/**
- * COMPOSES `runLoop` as a single-task leaf: one box, a refine driver bounded to
- * the seam's `maxIterations` (default 1), the spec's profile as the agent run.
- * Surfaces the loop's aggregated `tokenUsage` + `costUsd` as `UsageEvent`s after
- * it drains, and yields one `iteration` event per loop iteration. Forwards the
- * optional `lineage` passthrough WITHOUT importing sandbox-lineage / reinventing
- * checkpoint/fork.
- *
- * Streaming shape: the loop runs to completion inside the first `next()`, then
- * the recorded usage events are yielded; the terminal artifact is read from
- * `resultArtifact()` after the stream drains.
- */
-declare const sandboxExecutor: LeafExecutorFactory<unknown>;
+ * cli-bridge seam. A local OpenAI-compatible bridge that fronts harness CLIs
+ * (claude-code / opencode / kimi / pi) behind one HTTP surface; `model` doubles
+ * as the harness selector (e.g. `claude-code/sonnet`, `opencode/<provider>/<model>`).
+ * `agentProfile` is the bridge-dialect profile (metadata.disallowedTools, mcp)
+ * forwarded verbatim per request — how an arm disables native tools or injects
+ * a provider search MCP.
+ */
+interface BridgeSeam {
+    bridgeUrl: string;
+    bridgeBearer: string;
+    model: string;
+    agentProfile?: Record<string, unknown>;
+    timeoutMs?: number;
+}
 /**
- * Spawns a subprocess (`bin` + `args`). It cannot account tokens, so it is
- * `budgetExempt: true`: its spend is NOT metered against the conserved pool and
- * its iterations are EXCLUDED from the equal-k arms by construction (the
- * resolver/equal-k path checks `budgetExempt`). teardown is SIGTERM → SIGKILL
- * with a grace window. Streaming: yields one `iteration` event on clean exit.
- */
-declare const cliExecutor: LeafExecutorFactory<unknown>;
+ * The single built-in executor entrypoint. The backend is DATA — the cost dial a
+ * profile, an experiment config, or a replay journal can name — not an import
+ * choice. Injects the matching seam and delegates to the built-in implementation;
+ * the port stays OPEN: bring-your-own agents implement `Executor` directly and
+ * never pass through here.
+ */
+type ExecutorConfig = ({
+    backend: 'router';
+} & RouterSeam) | ({
+    backend: 'bridge';
+} & BridgeSeam) | ({
+    backend: 'cli';
+} & CliSeam) | ({
+    backend: 'sandbox';
+    harness?: BackendType;
+} & SandboxSeam);
+declare function createExecutor(config: ExecutorConfig): ExecutorFactory<unknown>;
 /**
  * The open resolver/registry. Pre-registers the three built-ins under their
  * runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
@@ -1749,7 +1853,7 @@ declare function createExecutorRegistry(): ExecutorRegistry;
  * An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
  * them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
  * and is the single place that drives a child's lifecycle: reserve budget atomically,
- * resolve a `LeafExecutor` through the open registry, run it (one-shot OR streaming),
+ * resolve a `Executor` through the open registry, run it (one-shot OR streaming),
  * fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
  * (refunding the unspent remainder), persist the result blob + journal records, and
  * deliver the `Settled` through the `next()` cursor.
@@ -1857,4 +1961,4 @@ declare function createSupervisor<Task, Out>(): Supervisor<Task, Out>;
  */
 declare function createRootHandle<Out>(): RootHandle<Out>;
-export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, LeafExecutorFactory, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, type SandboxLineage, type SandboxLineageHandle, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, cliExecutor, contentAddress, createBudgetPool, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, routerInlineExecutor, runPersonified, sandboxExecutor, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };
+export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, inlineSandboxClient, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, openSandboxRun, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, runPersonified, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };