@tangle-network/agent-runtime 0.45.0 → 0.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/agent.d.ts +5 -5
- package/dist/agent.js +2 -2
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +5 -40
- package/dist/analyst-loop.js +2 -4
- package/dist/{chunk-KEWO4KI6.js → chunk-65FQLI4V.js} +628 -138
- package/dist/chunk-65FQLI4V.js.map +1 -0
- package/dist/{chunk-NYN5RTLP.js → chunk-GN75RGM6.js} +7 -7
- package/dist/chunk-GN75RGM6.js.map +1 -0
- package/dist/{chunk-PRX45WE2.js → chunk-GSUO5QS6.js} +1 -119
- package/dist/chunk-GSUO5QS6.js.map +1 -0
- package/dist/{chunk-FK53TXOP.js → chunk-HNUXAZIJ.js} +4 -27
- package/dist/chunk-HNUXAZIJ.js.map +1 -0
- package/dist/{chunk-IJ6FGOPO.js → chunk-I42NHLKX.js} +3 -3
- package/dist/chunk-I42NHLKX.js.map +1 -0
- package/dist/{chunk-IJGS6J7X.js → chunk-JNPK46YH.js} +2 -2
- package/dist/{chunk-QR4UUC5P.js → chunk-KADIJAD4.js} +33 -19
- package/dist/chunk-KADIJAD4.js.map +1 -0
- package/dist/{chunk-Z2QXVBA6.js → chunk-KPN7OQ64.js} +4 -4
- package/dist/chunk-KPN7OQ64.js.map +1 -0
- package/dist/{chunk-KSMX62JF.js → chunk-VR4JIC5H.js} +2 -2
- package/dist/{coder-CczgMqFx.d.ts → coder-DCWFQpmJ.d.ts} +1 -1
- package/dist/{dynamic-BvllHV6M.d.ts → driver-C-mtBo7h.d.ts} +6 -6
- package/dist/{improvement-adapter-CWegd3vw.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -2
- package/dist/index.d.ts +8 -8
- package/dist/index.js +8 -8
- package/dist/{kb-gate-D9GBocLN.d.ts → kb-gate-2Gwpz_27.d.ts} +7 -7
- package/dist/{loop-runner-bin-CPrCoKqC.d.ts → loop-runner-bin-D-K6bRp3.d.ts} +11 -11
- package/dist/loop-runner-bin.d.ts +6 -6
- package/dist/loop-runner-bin.js +6 -6
- package/dist/loops.d.ts +5 -5
- package/dist/loops.js +10 -10
- package/dist/mcp/bin.js +6 -6
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +11 -11
- package/dist/mcp/index.js +7 -7
- package/dist/{otel-export-Dy2DyUCU.d.ts → otel-export-nurzFwuJ.d.ts} +1 -1
- package/dist/profiles.d.ts +8 -8
- package/dist/profiles.js +1 -1
- package/dist/profiles.js.map +1 -1
- package/dist/{run-loop--hSoIknW.d.ts → run-loop-CU2Y00Si.d.ts} +2 -2
- package/dist/runtime.d.ts +156 -52
- package/dist/runtime.js +10 -10
- package/dist/{types-1HbsFa7H.d.ts → types-BfoeiQRZ.d.ts} +20 -20
- package/dist/{types-DdzkffAm.d.ts → types-DnYoHvvZ.d.ts} +17 -5
- package/dist/{types-BtRLF2U3.d.ts → types-p8dWBIXL.d.ts} +1 -1
- package/dist/workflow.d.ts +3 -3
- package/dist/workflow.js +2 -2
- package/dist/workflow.js.map +1 -1
- package/package.json +13 -24
- package/skills/agent-runtime-adoption/SKILL.md +3 -3
- package/skills/generate-eval/SKILL.md +60 -0
- package/dist/chunk-FK53TXOP.js.map +0 -1
- package/dist/chunk-IJ6FGOPO.js.map +0 -1
- package/dist/chunk-KEWO4KI6.js.map +0 -1
- package/dist/chunk-NYN5RTLP.js.map +0 -1
- package/dist/chunk-PRX45WE2.js.map +0 -1
- package/dist/chunk-QR4UUC5P.js.map +0 -1
- package/dist/chunk-Z2QXVBA6.js.map +0 -1
- /package/dist/{chunk-IJGS6J7X.js.map → chunk-JNPK46YH.js.map} +0 -0
- /package/dist/{chunk-KSMX62JF.js.map → chunk-VR4JIC5H.js.map} +0 -0
package/dist/profiles.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/profiles/ui-auditor/prompt.ts","../src/profiles/ui-auditor/in-process-client.ts","../src/profiles/ui-auditor/lens-prompts.ts","../src/profiles/ui-auditor/output-adapter.ts","../src/profiles/ui-auditor/validator.ts","../src/profiles/ui-auditor/profile.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Prompt formatter for the auditor profile. `formatAuditorPrompt` produces\n * the user message handed to the iteration — describes the captures to be\n * taken and the lens to apply. The system prompt comes from\n * `buildAuditorSystemPrompt(lens)` (lens-prompts.ts).\n *\n * The formatter prepends a machine-readable envelope (`<<UI_AUDIT_TASK>>`\n * … `<<UI_AUDIT_TASK_END>>`) carrying a JSON-serialised task. The\n * in-process auditor client recovers the task from this envelope so the\n * iteration is self-describing — robust to concurrent fanout, where any\n * per-client side state (e.g. a \"current task\" register) would race.\n *\n * The formatter is pure and deterministic — re-run on the same task\n * produces the same prompt. Tests and trace replays rely on this.\n */\n\nimport type { UiAuditTask } from './task'\n\nconst ENVELOPE_BEGIN = '<<UI_AUDIT_TASK>>'\nconst ENVELOPE_END = '<<UI_AUDIT_TASK_END>>'\n\n/** @experimental */\nexport function encodeAuditTaskEnvelope(task: UiAuditTask): string {\n return `${ENVELOPE_BEGIN}${JSON.stringify(task)}${ENVELOPE_END}`\n}\n\n/**\n * Parse a task envelope back out of a prompt string. Returns undefined if\n * the prompt does not contain a complete envelope OR if the payload is\n * not valid JSON.\n *\n * @experimental\n */\nexport function decodeAuditTaskEnvelope(prompt: string): UiAuditTask | undefined {\n const start = prompt.indexOf(ENVELOPE_BEGIN)\n if (start === -1) return undefined\n const payloadStart = start + ENVELOPE_BEGIN.length\n const end = prompt.indexOf(ENVELOPE_END, payloadStart)\n if (end === -1) return undefined\n const payload = prompt.slice(payloadStart, end)\n try {\n const parsed = JSON.parse(payload) as unknown\n if (!parsed || typeof parsed !== 'object') return undefined\n const t = parsed as Partial<UiAuditTask>\n if (typeof t.lens !== 'string' || !Array.isArray(t.captures)) return undefined\n return t as UiAuditTask\n } catch {\n return undefined\n }\n}\n\n/** @experimental */\nexport function formatAuditorPrompt(task: UiAuditTask): string {\n const lines: string[] = []\n lines.push(`# UI audit iteration — lens: ${task.lens}`)\n lines.push('')\n if (task.productContext && task.productContext.trim().length > 0) {\n lines.push('## Product context')\n lines.push(task.productContext.trim())\n lines.push('')\n }\n lines.push('## Captures to take')\n task.captures.forEach((cap, i) => {\n const vp = cap.viewport ? `${cap.viewport.width}x${cap.viewport.height}` : '1280x800 (default)'\n const detail = [\n `viewport=${vp}`,\n cap.fullPage ? 'fullPage=true' : null,\n cap.elementSelector ? `selector=\\`${cap.elementSelector}\\`` : null,\n cap.waitFor ? `waitFor=\\`${cap.waitFor}\\`` : null,\n cap.waitMs !== undefined ? `waitMs=${cap.waitMs}` : null,\n cap.label ? `label=${cap.label}` : null,\n ]\n .filter((s): s is string => s !== null)\n .join(' · ')\n lines.push(`${i + 1}. route=\\`${cap.route}\\` url=${cap.url} ${detail ? `(${detail})` : ''}`)\n })\n lines.push('')\n if (task.knownFindingIds && task.knownFindingIds.length > 0) {\n lines.push('## Known findings (link via similarTo, do not refile)')\n lines.push(task.knownFindingIds.map((n) => `#${String(n).padStart(3, '0')}`).join(', '))\n lines.push('')\n }\n lines.push('## Output format')\n lines.push(\n 'Emit a single JSON object with the shape `{ findings: UiFinding[], notes?: string }` where every finding has the fields enumerated in your system prompt. The screenshots field on each finding must reference the captures above by path. Do not emit findings outside the lens.',\n )\n return lines.join('\\n')\n}\n","/**\n * @experimental\n *\n * `createInProcessUiAuditClient` — a `LoopSandboxClient` that drives a\n * Playwright browser in-process and delegates finding identification to a\n * consumer-supplied {@link UiJudge}.\n *\n * Why this exists: `runLoop` is built around a sandbox-SDK seam — each\n * iteration is `client.create() → box.streamPrompt() → box.delete()`.\n * For UI audit, spinning up a real container running a coding harness\n * per iteration is overkill: the work is one browser capture + one\n * vision LLM call. This client satisfies the kernel contract while\n * doing the audit in-process; no container, no sandbox-SDK backend.\n *\n * The client owns ONE browser for its lifetime and creates a fresh\n * context per iteration (isolated cookies/storage). Playwright is\n * dynamically imported so consumers who use a different `LoopSandboxClient`\n * — e.g. a fleet executor that drives Playwright remotely — do not pay\n * the peer dep cost.\n *\n * Concurrency: each iteration's prompt carries a self-describing task\n * envelope (see `prompt.ts`), so concurrent fanout iterations do not race\n * over per-client side state.\n */\n\nimport type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'\nimport type { LoopSandboxClient } from '../../runtime/types'\nimport type { UiJudge } from './judge'\nimport { decodeAuditTaskEnvelope } from './prompt'\nimport { slugify } from './slugify'\nimport type { UiAuditCapture, UiAuditCaptureRequest } from './task'\n\n// All synthetic events the auditor emits flow through this helper. Reason:\n// `SandboxEvent.data` is a sandbox-SDK shape (effectively `Record<string,\n// unknown>`) that our typed payloads (`UiAuditCapture`, `UiFinding`, …) do not\n// satisfy structurally. The cast moves the type-system smell into a single,\n// named, documented call site so the call sites in `runIteration` stay clean.\n// The runtime contract — `{ type, data }` — is what the output adapter reads;\n// the static type is what the kernel collects into `SandboxEvent[]`.\nfunction asSandboxEvent<T>(type: string, data: T): SandboxEvent {\n return { type, data } as unknown as SandboxEvent\n}\n\n/** @experimental */\nexport interface InProcessUiAuditClientOptions {\n /**\n * Absolute path under which screenshots are written. Each capture lands\n * at `<workspaceDir>/screenshots/<filename>`; finding screenshot paths\n * are workspace-relative (`screenshots/<filename>`).\n */\n workspaceDir: string\n /** The vision judge that turns captures into findings. */\n judge: UiJudge\n /**\n * Navigation policy.\n *\n * `'strict'` (default) waits for `networkidle` and fails the iteration\n * if the page does not settle. `'spa'` waits for `domcontentloaded` —\n * use for single-page apps that hold open long-poll/websocket\n * connections and never settle.\n */\n navPolicy?: 'strict' | 'spa'\n /**\n * Browser launch override. Default: chromium headless via Playwright.\n * Consumers pass a custom factory to target a remote browser, a\n * different channel, or a fleet adapter.\n */\n launchBrowser?: () => Promise<BrowserHandle>\n}\n\n/** @experimental */\nexport interface BrowserHandle {\n newContext(options?: {\n viewport?: { width: number; height: number }\n }): Promise<BrowserContextHandle>\n close(): Promise<void>\n}\n\n/** @experimental */\nexport interface BrowserContextHandle {\n newPage(): Promise<PageHandle>\n close(): Promise<void>\n}\n\n/** @experimental */\nexport interface PageHandle {\n setViewportSize(size: { width: number; height: number }): Promise<void>\n goto(url: string, options?: { waitUntil?: string; timeout?: number }): Promise<unknown>\n waitForSelector(selector: string, options?: { timeout?: number }): Promise<unknown>\n waitForTimeout(ms: number): Promise<void>\n screenshot(options: { path: string; fullPage?: boolean }): Promise<void>\n locator(selector: string): {\n first(): { screenshot(options: { path: string }): Promise<void> }\n }\n}\n\nconst DEFAULT_VIEWPORT = { width: 1280, height: 800 } as const\nconst NAV_TIMEOUT_MS = 30_000\n\nasync function defaultLaunch(): Promise<BrowserHandle> {\n const mod = (await import('playwright')) as unknown as {\n chromium?: { launch(options?: { headless?: boolean }): Promise<BrowserHandle> }\n }\n if (!mod?.chromium || typeof mod.chromium.launch !== 'function') {\n throw new Error(\n 'ui-auditor: playwright is not installed. Install `playwright` (and run `playwright install chromium`) or pass a custom `launchBrowser` to createInProcessUiAuditClient.',\n )\n }\n return mod.chromium.launch({ headless: true })\n}\n\nfunction nowStamp(): string {\n const d = new Date()\n const pad = (n: number) => String(n).padStart(2, '0')\n // Millisecond resolution so parallel fanout iterations capturing the same\n // route/viewport/label within the same second don't collide on filename and\n // silently overwrite each other.\n return (\n `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}-` +\n `${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}` +\n `-${String(d.getUTCMilliseconds()).padStart(3, '0')}`\n )\n}\n\nfunction viewportOf(req: UiAuditCaptureRequest): { width: number; height: number } {\n return req.viewport ?? DEFAULT_VIEWPORT\n}\n\nfunction captureFilename(req: UiAuditCaptureRequest): string {\n const vp = viewportOf(req)\n const labelPart = req.label ? `--${slugify(req.label, 'label')}` : ''\n return `${slugify(req.route, 'route')}--${vp.width}x${vp.height}${labelPart}--${nowStamp()}.png`\n}\n\nfunction assertHttpUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`ui-auditor: capture url is not parseable (got ${JSON.stringify(url)})`)\n }\n // SSRF defense at the client boundary. The MCP tool already restricts to\n // http(s), but `createInProcessUiAuditClient` is exported and can be wired\n // up directly by consumers (the example does this). A crafted task envelope\n // could otherwise navigate Playwright to `file://`, `data:`, `javascript:`\n // and read local files or execute inline content.\n if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {\n throw new Error(\n `ui-auditor: capture url must use http or https (got ${parsed.protocol} in ${JSON.stringify(url)})`,\n )\n }\n}\n\nasync function captureOne(\n page: PageHandle,\n req: UiAuditCaptureRequest,\n outAbsPath: string,\n signal: AbortSignal,\n navPolicy: 'strict' | 'spa',\n): Promise<void> {\n signal.throwIfAborted()\n assertHttpUrl(req.url)\n // Apply the per-capture viewport before navigation. The capture metadata\n // and filename both encode this viewport; the rendered page must match.\n await page.setViewportSize(viewportOf(req))\n const waitUntil = navPolicy === 'spa' ? 'domcontentloaded' : 'networkidle'\n await page.goto(req.url, { waitUntil, timeout: NAV_TIMEOUT_MS })\n if (req.waitFor) {\n await page.waitForSelector(req.waitFor, { timeout: 15_000 })\n }\n const extra = req.waitMs ?? 500\n if (extra > 0) await page.waitForTimeout(extra)\n signal.throwIfAborted()\n if (req.elementSelector) {\n await page.locator(req.elementSelector).first().screenshot({ path: outAbsPath })\n } else {\n await page.screenshot({ path: outAbsPath, fullPage: req.fullPage === true })\n }\n}\n\ninterface SyntheticSandbox extends SandboxInstance {}\n\nfunction makeSandboxId(): string {\n const rand = () => Math.random().toString(16).slice(2, 10)\n return `ui-audit-${rand()}${rand()}`\n}\n\n/** @experimental */\nexport function createInProcessUiAuditClient(\n options: InProcessUiAuditClientOptions,\n): LoopSandboxClient & {\n /**\n * Close the underlying browser. Idempotent.\n *\n * Contract: callers MUST ensure no iterations are in flight when this is\n * called. The kernel respects this — `runLoop` awaits every iteration\n * before returning, so `await runLoop(...); await client.close()` is the\n * intended pattern (see `examples/ui-audit`). If `close()` is invoked\n * concurrently with a running iteration, the browser teardown will race\n * against in-flight page operations; the iteration will surface an\n * AggregateError carrying both the iteration error and the close error,\n * but no work is lost silently.\n */\n close(): Promise<void>\n} {\n const launch = options.launchBrowser ?? defaultLaunch\n const navPolicy = options.navPolicy ?? 'strict'\n let browserPromise: Promise<BrowserHandle> | undefined\n let closed = false\n\n async function getBrowser(): Promise<BrowserHandle> {\n if (closed) {\n throw new Error('ui-auditor: client is closed; create a new client to run another iteration')\n }\n if (!browserPromise) browserPromise = launch()\n return browserPromise\n }\n\n async function* runIteration(\n promptText: string,\n signal: AbortSignal,\n ): AsyncIterable<SandboxEvent> {\n const task = decodeAuditTaskEnvelope(promptText)\n if (!task) {\n throw new Error(\n 'ui-auditor: prompt is missing a UI_AUDIT_TASK envelope. Use uiAuditorProfile().taskToPrompt to format prompts, or pass an envelope-prefixed prompt manually.',\n )\n }\n if (task.captures.length === 0) {\n throw new Error('ui-auditor: task has zero captures; nothing to audit.')\n }\n\n yield asSandboxEvent('audit.lens', { lens: task.lens })\n\n const browser = await getBrowser()\n const context = await browser.newContext({ viewport: DEFAULT_VIEWPORT })\n // Track both the primary iteration error and any context-close failure so\n // the cleanup path never silently swallows a leaked-context bug AND a\n // close failure never shadows the real iteration error. After the\n // try/catch/finally settles, we rethrow the primary if there was one,\n // otherwise we rethrow the close error.\n let primaryError: unknown\n let closeError: unknown\n try {\n const page = await context.newPage()\n const captures: UiAuditCapture[] = []\n const fs = await import('node:fs/promises')\n const path = await import('node:path')\n const shotsDir = path.join(options.workspaceDir, 'screenshots')\n await fs.mkdir(shotsDir, { recursive: true })\n\n for (const req of task.captures) {\n signal.throwIfAborted()\n const filename = captureFilename(req)\n const absPath = path.join(shotsDir, filename)\n const relPath = `screenshots/${filename}`\n await captureOne(page, req, absPath, signal, navPolicy)\n const vp = viewportOf(req)\n const cap: UiAuditCapture = {\n path: relPath,\n viewport: `${vp.width}x${vp.height}`,\n fullPage: req.fullPage === true,\n route: req.route,\n url: req.url,\n capturedAt: new Date().toISOString(),\n }\n if (req.elementSelector) cap.elementSelector = req.elementSelector\n if (req.label) cap.label = req.label\n captures.push(cap)\n yield asSandboxEvent('audit.capture', cap)\n }\n\n const judgeOut = await options.judge({\n lens: task.lens,\n captures,\n productContext: task.productContext,\n knownFindingIds: task.knownFindingIds,\n promptText,\n signal,\n })\n\n for (const finding of judgeOut.findings) {\n yield asSandboxEvent('audit.finding', finding)\n }\n if (judgeOut.notes && judgeOut.notes.trim().length > 0) {\n yield asSandboxEvent('audit.notes', { notes: judgeOut.notes })\n }\n\n const usage = judgeOut.tokenUsage ?? { input: 0, output: 0 }\n yield asSandboxEvent('done', {\n tokenUsage: {\n inputTokens: usage.input,\n outputTokens: usage.output,\n },\n totalCostUsd: judgeOut.costUsd ?? 0,\n })\n } catch (err) {\n primaryError = err\n } finally {\n try {\n await context.close()\n } catch (err) {\n closeError = err\n }\n }\n // When both the iteration and the cleanup fail, surface both via\n // AggregateError so a leaked context bug is not silently masked by an\n // earlier iteration failure (per the fail-loud doctrine).\n if (primaryError !== undefined && closeError !== undefined) {\n throw new AggregateError(\n [primaryError, closeError],\n 'ui-auditor: iteration failed AND context.close() failed; both errors attached.',\n )\n }\n if (primaryError !== undefined) throw primaryError\n if (closeError !== undefined) throw closeError\n }\n\n function makeSyntheticSandbox(): SyntheticSandbox {\n const id = makeSandboxId()\n const instance = {\n id,\n streamPrompt(message: string, opts?: { signal?: AbortSignal }): AsyncIterable<SandboxEvent> {\n const signal = opts?.signal ?? new AbortController().signal\n return runIteration(message, signal)\n },\n async delete(): Promise<void> {\n // No per-sandbox resources to release; the browser is shared and\n // closed by `client.close()`. Intentionally a no-op so trace-time\n // `box.delete()` succeeds without doing surprising work.\n },\n }\n return instance as unknown as SyntheticSandbox\n }\n\n return {\n async create(_options?: CreateSandboxOptions) {\n return makeSyntheticSandbox()\n },\n describePlacement(box) {\n const id = (box as unknown as { id?: string }).id\n return { kind: 'sibling', sandboxId: typeof id === 'string' ? id : undefined }\n },\n async close() {\n closed = true\n const pending = browserPromise\n browserPromise = undefined\n if (pending) {\n const browser = await pending\n await browser.close()\n }\n },\n }\n}\n","/**\n * @experimental\n *\n * Per-lens guidance the auditor inlines into its system prompt for an\n * iteration. Each entry is a self-contained brief — the same content the\n * standalone ui-issue-finder skill ships, embedded as a string constant so\n * agent-runtime carries no runtime dep on that external workspace.\n *\n * Briefs are deliberately concrete: they enumerate the SIGNALS to look for\n * and the cross-lens distinctions to respect, so the judge files fewer\n * pile-on findings under generic labels.\n */\n\nimport type { UiLens } from './substrate'\n\n/** @experimental */\nexport const SHARED_AUDITOR_RULES = `\nYou are auditing a UI for a specific class of problems. Stay strictly in your assigned lens — do not file issues that belong to another lens (a separate iteration will catch those).\n\nA finding is only valid if a thoughtful product designer would agree the screenshot shows something that should change. Avoid:\n- Personal taste (\"I'd prefer brand blue\").\n- Hallucinated text or controls you cannot actually see in the screenshot.\n- Suggestions that depend on requirements you don't have access to.\n- Pile-on findings about the same root cause — file ONE finding and use \\`similarTo\\` to link the rest.\n\nRequired for every finding:\n- title: concrete, names the offending element AND what's wrong (NOT \"improve UX\").\n- severity: critical=blocks a core task or accessibility blocker; high=noticeable friction; med=visible polish issue; low=nitpick.\n- observation: 1–3 sentences describing exactly what you see that is wrong.\n- impact: who is affected and how (concrete).\n- suggestedFix: a specific change a developer could apply without asking you back.\n- screenshots: refer to the captures attached to this iteration by path.\n- selector: when you can pin the offending element with a CSS selector.\n\nMost findings are med or low. Reserve high/critical for genuine blockers.\n`.trim()\n\n/** @experimental */\nexport const LENS_BRIEFS: Record<UiLens, string> = {\n consistency: `\nLENS: consistency\nLook for inconsistencies in the design system — things that look like they came from different products glued together.\nSignals: multiple font families, inconsistent weights/sizes for the same role, two shades of \"primary\", arbitrary paddings/margins that don't snap to a scale (4/8/12/16/24), same control with different border-radius or shadow on different pages, mixed icon styles (filled vs outlined), inconsistent button heights/padding for the same variant, inconsistent capitalization (Title Case vs sentence case) for the same role.\nNOT this lens: layout misalignment (use \\`layout\\`), confusing user flow (use \\`ux-flow\\`), contrast/keyboard issues (use \\`accessibility\\`).\nTitle format: \\`Inconsistent <thing> between <A> and <B>\\`.\n`.trim(),\n hierarchy: `\nLENS: hierarchy\nLook for broken visual hierarchy — places where the eye does not land on what matters most.\nSignals: primary CTA same weight as secondary/tertiary controls, headings (H1/H2/H3) nearly the same size, important data buried (headline number smaller than its label), decoration outshining content, too many emphases competing, wrong scan order, missing or overly heavy section dividers.\nNOT this lens: same-role styled differently (\\`consistency\\`), grid/alignment (\\`layout\\`), contrast-failing text (\\`accessibility\\`).\nTitle format: \\`Weak hierarchy: <element> does not read as the <intended-role>\\`.\n`.trim(),\n layout: `\nLENS: layout\nLook for layout and organization problems — alignment, grouping, whitespace, structural choices that hurt scannability.\nSignals: misalignment within rows, inconsistent gutters in grids, orphan whitespace next to crammed regions, poor grouping (related fields separated, unrelated fields adjacent), no visual sections (long wall of content), container overflow (text/content punching out of card boundaries), cramped or oversized hit targets, sidebars/headers sized wrong relative to main content.\nNOT this lens: same-role styled differently (\\`consistency\\`), click-distance/friction (\\`ux-flow\\`), overflow specifically at small viewports (\\`responsive\\`).\nTitle format: \\`<Region> alignment/spacing problem\\` or \\`<Region> grouping unclear\\`.\n`.trim(),\n 'ux-flow': `\nLENS: ux-flow\nLook for interaction-flow friction — action sequences that are slower, more annoying, or more error-prone than necessary.\nSignals: sequential clicks far apart (e.g. Next top-right while user is bottom-left), destructive action adjacent to primary with same weight, confirmations that don't say what's being confirmed, primary CTA below the fold or hidden in a kebab menu, silent state changes (toggle gives no feedback), form ordering that fights real-world order, dead-end states after submit, lost inputs on back-navigation, hidden pre-selected options.\nNOT this lens: visual style only (\\`consistency\\`), component arrangement without a flow problem (\\`layout\\`), microcopy clarity (\\`content\\`).\nTitle format: \\`<Action A> → <Action B> friction: <root cause>\\`.\n`.trim(),\n duplication: `\nLENS: duplication\nLook for redundancy — the same control, link, or piece of content appearing more than once with no good reason.\nSignals: two ways to do the same action on the same screen with no difference, repeated nav (same links in sidebar AND top nav), drifted duplicates (two copies that have diverged), content repeated verbatim, icon + label saying the same thing twice in one row, per-row + bulk actions that overlap confusingly, multiple status indicators conveying the same status.\nNOT this lens: inconsistent styling of duplicates (\\`consistency\\`) — this lens is about the existence of duplicates.\nTitle format: \\`Duplicate <thing> in <location A> and <location B>\\`.\n`.trim(),\n accessibility: `\nLENS: accessibility\nLook for accessibility blockers and degradations. Be conservative — do not assume violations you cannot see.\nSignals: insufficient contrast on body text or controls, missing/invisible focus styles, tiny tap targets (<24px on mobile), color as sole signal (red border with no message), form labels missing or not associated (placeholders standing in for labels), broken heading order (H1 → H4), modals that don't trap focus, decorative elements that take focus, errors not announced, important text rendered inside images.\nNOT this lens: generic \"looks confusing\" (\\`hierarchy\\` or \\`content\\`), layout overflow at small viewports (\\`responsive\\`).\nTitle format: \\`Accessibility: <specific blocker> in <element>\\`.\n`.trim(),\n responsive: `\nLENS: responsive\nLook for layout breakage across viewport sizes — content that works at one width but degrades at another. This iteration's captures should include the same surface at >=2 viewports; compare across them.\nSignals: horizontal scroll where content should reflow, overlapping elements (header overlaps content, fixed footer covers inputs), desktop nav crammed into mobile without collapsing, table columns that don't truncate, tap targets too close at touch sizes, controls vanishing at certain widths, layout flips that break grouping order, modals exceeding viewport height (confirm button unreachable).\nNOT this lens: issues present at every viewport (\\`consistency\\` / \\`hierarchy\\` instead).\nTitle format: \\`<Element/Region> breaks at <viewport>\\`.\n`.trim(),\n states: `\nLENS: states\nLook for missing or broken UI states — the not-happy-paths that make a product feel finished or unfinished. The iteration's captures should depict at least one non-default state.\nSignals: empty lists with no guidance, skeletons that don't match final layout (CLS on settle), error states with no message or recovery action, disabled buttons with no explanation, toasts that disappear before being read, success states that don't confirm, missing hover/focus/active/disabled variants on primary controls, no long-content view, no-permission state broken.\nNOT this lens: generic polish on the happy path (other lenses), missing focus rings specifically (\\`accessibility\\`).\nTitle format: \\`Missing/broken <state> state on <surface>\\`.\n`.trim(),\n content: `\nLENS: content\nLook for microcopy and content problems — text that is unclear, inconsistent, condescending, jargon-heavy, or wrong.\nSignals: jargon/internal language leaking (\"Provisioning a Tenant\" instead of \"Setting up your account\"), inconsistent terminology (workspace vs team), verbose button labels, empty-state copy that's just \"No results\", error messages blaming the user, tone inconsistency, truncation without affordance, mixed date/number formats on one page, placeholder used as a label, \"Saved!\" toast appearing before save completes, typos and grammar errors.\nNOT this lens: visual treatment of text (\\`hierarchy\\` / \\`consistency\\`), missing labels for a11y (\\`accessibility\\`).\nTitle format: \\`Copy: \"<actual text>\" in <location>\\` or \\`Inconsistent term: \"<A>\" vs \"<B>\"\\`.\n`.trim(),\n interaction: `\nLENS: interaction\nLook for interaction quality problems — affordances, feedback, and micro-interactions.\nSignals: no affordance (clickable areas not looking clickable, non-clickable areas looking clickable), missing feedback (>100ms click with no progress), hover surprises (whole row highlights but only title clickable), cursor inconsistency, animations that block input, missing transitions where they're needed (accordion snaps open), drag-and-drop without indicators, scroll-jacking, click-through bugs (card click handler firing alongside button), hover-only revelations on touch.\nNOT this lens: position of controls (\\`layout\\` / \\`ux-flow\\`), missing focus styles (\\`accessibility\\`).\nTitle format: \\`<Action> on <element>: <missing/wrong> feedback\\`.\n`.trim(),\n 'performance-perceived': `\nLENS: performance-perceived\nLook for perceived-performance problems — visible jank a real user would notice, not benchmark numbers. This iteration's captures should include >=2 frames during load to show shift.\nSignals: layout shift (CLS) when late-arriving images/fonts/banners settle, FOUC (flash of unstyled content), font swap jumps, late-loading hero images that shift everything, skeletons that don't match final shape, spinners on instant local actions, loading state reappearing after content paints (refetch on focus), modal open animation longer than the operation it precedes.\nNOT this lens: slow API calls (file separately), stale data after navigation (\\`states\\`).\nTitle format: \\`Layout shift / late paint on <route>: <root cause>\\`.\n`.trim(),\n other: `\nLENS: other\nUse ONLY when a finding is clearly a UI quality issue but does not fit any other lens. Strongly prefer a specific lens — \\`other\\` should be rare. Title must still be concrete.\n`.trim(),\n}\n\n/**\n * Build a system prompt for a single auditor iteration.\n *\n * @experimental\n */\nexport function buildAuditorSystemPrompt(lens: UiLens): string {\n const brief = LENS_BRIEFS[lens]\n return `${SHARED_AUDITOR_RULES}\\n\\n${brief}`\n}\n","/**\n * @experimental\n *\n * Sandbox-event stream → UiAuditOutput decoder. The custom auditor\n * `LoopSandboxClient` emits events of the form:\n *\n * { type: 'audit.capture', data: UiAuditCapture }\n * { type: 'audit.finding', data: UiFinding }\n * { type: 'audit.notes', data: { notes: string } }\n * { type: 'audit.lens', data: { lens: UiLens } }\n * { type: 'done', data: { tokenUsage: { ... }, totalCostUsd?: number } }\n *\n * Other event types are tolerated and ignored. The adapter is pure: it\n * folds an already-collected event array into a UiAuditOutput.\n */\n\nimport type { SandboxEvent } from '@tangle-network/sandbox'\nimport { UI_LENSES, type UiFinding, type UiLens } from './substrate'\nimport type { UiAuditCapture, UiAuditOutput } from './task'\n\n// Build the lens-validation set from the canonical UI_LENSES tuple so adding\n// a lens to the substrate automatically extends the parser; otherwise a new\n// lens would silently fail isUiLens() and parseAuditorEvents would drop\n// every event using it.\nconst KNOWN_LENS_VALUES = new Set<UiLens>(UI_LENSES)\n\nfunction isUiLens(v: unknown): v is UiLens {\n return typeof v === 'string' && KNOWN_LENS_VALUES.has(v as UiLens)\n}\n\n/** @experimental */\nexport function parseAuditorEvents(events: SandboxEvent[]): UiAuditOutput {\n const findings: UiFinding[] = []\n const captures: UiAuditCapture[] = []\n let lens: UiLens | undefined\n let notes: string | undefined\n\n for (const evt of events) {\n if (!evt || typeof evt !== 'object') continue\n const type = String(evt.type ?? '')\n const data =\n evt.data && typeof evt.data === 'object' ? (evt.data as Record<string, unknown>) : undefined\n if (!data) continue\n\n switch (type) {\n case 'audit.lens': {\n const v = data.lens\n if (isUiLens(v)) lens = v\n break\n }\n case 'audit.capture': {\n const cap = data as unknown as Partial<UiAuditCapture>\n if (\n typeof cap.path === 'string' &&\n typeof cap.viewport === 'string' &&\n typeof cap.fullPage === 'boolean' &&\n typeof cap.route === 'string' &&\n typeof cap.url === 'string' &&\n typeof cap.capturedAt === 'string'\n ) {\n const out: UiAuditCapture = {\n path: cap.path,\n viewport: cap.viewport,\n fullPage: cap.fullPage,\n route: cap.route,\n url: cap.url,\n capturedAt: cap.capturedAt,\n }\n if (cap.elementSelector) out.elementSelector = cap.elementSelector\n if (cap.label) out.label = cap.label\n captures.push(out)\n }\n break\n }\n case 'audit.finding': {\n const f = data as unknown as Partial<UiFinding>\n // Hard requirement: all the actionable fields must be present and\n // non-empty for a finding to enter the output. The validator does the\n // softer scoring; the adapter only filters structural junk.\n if (\n typeof f.title === 'string' &&\n f.title.trim().length > 0 &&\n isUiLens(f.lens) &&\n typeof f.severity === 'string' &&\n ['low', 'med', 'high', 'critical'].includes(f.severity) &&\n typeof f.route === 'string' &&\n typeof f.observation === 'string' &&\n typeof f.impact === 'string' &&\n typeof f.suggestedFix === 'string' &&\n Array.isArray(f.screenshots)\n ) {\n findings.push(f as UiFinding)\n }\n break\n }\n case 'audit.notes': {\n const n = data.notes\n if (typeof n === 'string' && n.trim().length > 0) notes = n\n break\n }\n default:\n // Tolerate cost/usage events and other backend chatter — extractLlmCallEvent\n // in run-loop.ts handles cost accounting upstream from the adapter.\n break\n }\n }\n\n const out: UiAuditOutput = { lens: lens ?? 'other', findings, captures }\n if (notes) out.notes = notes\n return out\n}\n","/**\n * @experimental\n *\n * Auditor validator — scores a single iteration's findings for actionability\n * and gates the iteration result. The kernel uses `valid` + `score` for\n * winner selection across fanned-out iterations and to detect a degenerate\n * iteration (lens-violating findings, no screenshot evidence, no findings\n * at all on a route where we expected some).\n *\n * Hard fails (`valid = false`):\n * - A finding is filed under a lens that does not match the iteration's\n * lens. The whole iteration is bad — the judge isn't following the\n * lens discipline and the resulting Markdown would mislead reviewers.\n * - A finding has no screenshot reference.\n * - A finding's screenshot references a path that wasn't captured in\n * this iteration.\n *\n * Score (0..1, max two decimals stable):\n * - 0.4 * specificityRatio — proportion of findings with a selector\n * - 0.4 * evidenceRatio — proportion of findings whose screenshots resolve to captures\n * - 0.2 * (1 - genericTitleRatio) — proportion of findings whose titles\n * are concrete (not \"improve UX\", \"fix layout\", etc.)\n *\n * An iteration with zero findings scores 0.5 by convention — neither a\n * confident pass nor a hard failure (the judge might just have nothing to\n * say on this lens). The driver decides what to do with it.\n */\n\nimport type { DefaultVerdict } from '@tangle-network/agent-eval'\nimport type { Validator } from '../../runtime/types'\nimport type { UiAuditOutput, UiAuditTask } from './task'\n\nconst GENERIC_TITLE_PATTERNS = [\n /^improve\\s/i,\n /^fix\\s/i,\n /^update\\s/i,\n /^better\\s/i,\n /^bad\\s/i,\n /^make\\s.+\\sbetter/i,\n /\\bUX\\b\\s*$/i,\n /\\bUI\\b\\s*$/i,\n]\n\nfunction isGenericTitle(title: string): boolean {\n const t = title.trim()\n if (t.length < 16) return true\n return GENERIC_TITLE_PATTERNS.some((re) => re.test(t))\n}\n\n/** @experimental */\nexport function createUiAuditorValidator(task: UiAuditTask): Validator<UiAuditOutput> {\n return {\n async validate(output) {\n const findings = output.findings\n const captures = output.captures\n const capturePaths = new Set(captures.map((c) => c.path))\n\n const offLens = findings.filter((f) => f.lens !== task.lens)\n if (offLens.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${offLens.length} finding(s) filed under wrong lens (expected ${task.lens}; got ${offLens.map((f) => f.lens).join(', ')})`,\n scores: { offLens: 0 },\n }\n return verdict\n }\n\n const missingEvidence = findings.filter(\n (f) => !Array.isArray(f.screenshots) || f.screenshots.length === 0,\n )\n if (missingEvidence.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${missingEvidence.length} finding(s) have no screenshot evidence`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n const unresolvedShot = findings.filter((f) =>\n f.screenshots.some((s) => !capturePaths.has(s.path)),\n )\n if (unresolvedShot.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${unresolvedShot.length} finding(s) reference screenshot paths not captured this iteration`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n if (findings.length === 0) {\n const verdict: DefaultVerdict = {\n valid: true,\n score: 0.5,\n notes: 'No findings reported. Neither a confident pass nor a failure.',\n scores: { specificity: 0, evidence: 1, titles: 1 },\n }\n return verdict\n }\n\n const withSelector = findings.filter((f) => typeof f.selector === 'string').length\n const specificity = withSelector / findings.length\n const generic = findings.filter((f) => isGenericTitle(f.title)).length\n const titles = 1 - generic / findings.length\n // Compute evidence honestly from the data: proportion of findings whose\n // screenshots are all resolvable against this iteration's captures. The\n // guards above hard-fail when this would be < 1, so today the result is\n // always 1; if a future change relaxes those guards into a soft-fail\n // mode, this still produces a truthful evidence ratio rather than a\n // stale constant inflating the score.\n const withFullEvidence = findings.filter(\n (f) =>\n Array.isArray(f.screenshots) &&\n f.screenshots.length > 0 &&\n f.screenshots.every((s) => capturePaths.has(s.path)),\n ).length\n const evidence = withFullEvidence / findings.length\n const score = Number((0.4 * specificity + 0.4 * evidence + 0.2 * titles).toFixed(4))\n\n const verdict: DefaultVerdict = {\n valid: true,\n score,\n notes: `${findings.length} finding(s) — specificity=${specificity.toFixed(2)} evidence=${evidence.toFixed(2)} titles=${titles.toFixed(2)}`,\n scores: { specificity, evidence, titles },\n }\n return verdict\n },\n }\n}\n","/**\n * @experimental\n *\n * `uiAuditorProfile` — preset for vision-driven UI audit iterations.\n *\n * Mirrors the shape of `coderProfile`: returns the `AgentRunSpec`, output\n * adapter, validator, and prompt formatter the loop kernel needs. Unlike\n * `coderProfile`, the agent's \"harness\" is not a sandbox-SDK code-runner\n * — it's a vision-capable judge driving a browser. The loop kernel still\n * iterates `client.create() → box.streamPrompt() → box.delete()`; the\n * client/box pair are provided by `createInProcessUiAuditClient` (in\n * `./in-process-client.ts`) or by a consumer-supplied `LoopSandboxClient`.\n */\n\nimport type { AgentProfile } from '@tangle-network/sandbox'\nimport type { AgentRunSpec, OutputAdapter, Validator } from '../../runtime/types'\nimport { buildAuditorSystemPrompt } from './lens-prompts'\nimport { parseAuditorEvents } from './output-adapter'\nimport { encodeAuditTaskEnvelope, formatAuditorPrompt } from './prompt'\nimport type { UiAuditOutput, UiAuditTask } from './task'\nimport { createUiAuditorValidator } from './validator'\n\n/** @experimental */\nexport interface UiAuditorProfileOptions {\n /**\n * Stable name surfaced in trace events. Defaults to `ui-auditor`.\n */\n name?: string\n /**\n * Optional model identifier passed in `AgentProfile.model.default`.\n * The consumer's `LoopSandboxClient` chooses how to interpret it.\n */\n model?: string\n /**\n * Task bound to the validator. Without it the validator uses the lens\n * embedded in the iteration output as its expectation — fine for one-off\n * use; less strict than passing the task explicitly.\n */\n task?: UiAuditTask\n}\n\n/** @experimental */\nexport function uiAuditorProfile(options: UiAuditorProfileOptions = {}): {\n profile: AgentProfile\n taskToPrompt: (task: UiAuditTask) => string\n output: OutputAdapter<UiAuditOutput>\n validator: Validator<UiAuditOutput>\n agentRunSpec: AgentRunSpec<UiAuditTask>\n} {\n const name = options.name ?? 'ui-auditor'\n\n // Lens is per-task; the profile's system prompt is filled in by the\n // taskToPrompt formatter at iteration time (prefixed to the user\n // message). Keeping the profile lens-agnostic lets one AgentRunSpec\n // serve every lens-iteration of the loop.\n const profile: AgentProfile = {\n name,\n description: 'Vision-driven UI auditor. One lens per iteration.',\n prompt: { systemPrompt: '' },\n model: options.model ? { default: options.model } : undefined,\n tools: { browser: true, vision: true },\n metadata: { role: 'ui-auditor' },\n }\n\n const output: OutputAdapter<UiAuditOutput> = { parse: parseAuditorEvents }\n const validator: Validator<UiAuditOutput> = options.task\n ? createUiAuditorValidator(options.task)\n : createUiAuditorValidator({ lens: 'other', captures: [] })\n\n // Prompt shape (consumed both by sandbox-SDK harnesses AND by the\n // in-process auditor client):\n // <<UI_AUDIT_TASK>>{json}<<UI_AUDIT_TASK_END>>\n // <system-prompt for the lens>\n // <human-readable iteration brief>\n // The envelope makes the iteration self-describing so concurrent fanout\n // does not race over per-client side state. Sandbox-SDK harnesses can\n // ignore the envelope; the in-process auditor client decodes it back\n // into a typed UiAuditTask via decodeAuditTaskEnvelope.\n const taskToPrompt = (task: UiAuditTask): string =>\n `${encodeAuditTaskEnvelope(task)}\\n${buildAuditorSystemPrompt(task.lens)}\\n\\n${formatAuditorPrompt(task)}`\n\n const agentRunSpec: AgentRunSpec<UiAuditTask> = {\n name,\n profile,\n taskToPrompt,\n }\n\n return { profile, taskToPrompt, output, validator, agentRunSpec }\n}\n"],"mappings":";;;;;;;;;;;;;;;AAoBA,IAAM,iBAAiB;AACvB,IAAM,eAAe;AAGd,SAAS,wBAAwB,MAA2B;AACjE,SAAO,GAAG,cAAc,GAAG,KAAK,UAAU,IAAI,CAAC,GAAG,YAAY;AAChE;AASO,SAAS,wBAAwB,QAAyC;AAC/E,QAAM,QAAQ,OAAO,QAAQ,cAAc;AAC3C,MAAI,UAAU,GAAI,QAAO;AACzB,QAAM,eAAe,QAAQ,eAAe;AAC5C,QAAM,MAAM,OAAO,QAAQ,cAAc,YAAY;AACrD,MAAI,QAAQ,GAAI,QAAO;AACvB,QAAM,UAAU,OAAO,MAAM,cAAc,GAAG;AAC9C,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,OAAO;AACjC,QAAI,CAAC,UAAU,OAAO,WAAW,SAAU,QAAO;AAClD,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,SAAS,YAAY,CAAC,MAAM,QAAQ,EAAE,QAAQ,EAAG,QAAO;AACrE,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,oBAAoB,MAA2B;AAC7D,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,qCAAgC,KAAK,IAAI,EAAE;AACtD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,kBAAkB,KAAK,eAAe,KAAK,EAAE,SAAS,GAAG;AAChE,UAAM,KAAK,oBAAoB;AAC/B,UAAM,KAAK,KAAK,eAAe,KAAK,CAAC;AACrC,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,qBAAqB;AAChC,OAAK,SAAS,QAAQ,CAAC,KAAK,MAAM;AAChC,UAAM,KAAK,IAAI,WAAW,GAAG,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,MAAM,KAAK;AAC3E,UAAM,SAAS;AAAA,MACb,YAAY,EAAE;AAAA,MACd,IAAI,WAAW,kBAAkB;AAAA,MACjC,IAAI,kBAAkB,cAAc,IAAI,eAAe,OAAO;AAAA,MAC9D,IAAI,UAAU,aAAa,IAAI,OAAO,OAAO;AAAA,MAC7C,IAAI,WAAW,SAAY,UAAU,IAAI,MAAM,KAAK;AAAA,MACpD,IAAI,QAAQ,SAAS,IAAI,KAAK,KAAK;AAAA,IACrC,EACG,OAAO,CAAC,MAAmB,MAAM,IAAI,EACrC,KAAK,QAAK;AACb,UAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,KAAK,UAAU,IAAI,GAAG,IAAI,SAAS,IAAI,MAAM,MAAM,EAAE,EAAE;AAAA,EAC7F,CAAC;AACD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,mBAAmB,KAAK,gBAAgB,SAAS,GAAG;AAC3D,UAAM,KAAK,uDAAuD;AAClE,UAAM,KAAK,KAAK,gBAAgB,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AACvF,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,kBAAkB;AAC7B,QAAM;AAAA,IACJ;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;;;AClDA,SAAS,eAAkB,MAAc,MAAuB;AAC9D,SAAO,EAAE,MAAM,KAAK;AACtB;AAuDA,IAAM,mBAAmB,EAAE,OAAO,MAAM,QAAQ,IAAI;AACpD,IAAM,iBAAiB;AAEvB,eAAe,gBAAwC;AACrD,QAAM,MAAO,MAAM,OAAO,YAAY;AAGtC,MAAI,CAAC,KAAK,YAAY,OAAO,IAAI,SAAS,WAAW,YAAY;AAC/D,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO,IAAI,SAAS,OAAO,EAAE,UAAU,KAAK,CAAC;AAC/C;AAEA,SAAS,WAAmB;AAC1B,QAAM,IAAI,oBAAI,KAAK;AACnB,QAAM,MAAM,CAAC,MAAc,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG;AAIpD,SACE,GAAG,EAAE,eAAe,CAAC,GAAG,IAAI,EAAE,YAAY,IAAI,CAAC,CAAC,GAAG,IAAI,EAAE,WAAW,CAAC,CAAC,IACnE,IAAI,EAAE,YAAY,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,IACrE,OAAO,EAAE,mBAAmB,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC;AAEvD;AAEA,SAAS,WAAW,KAA+D;AACjF,SAAO,IAAI,YAAY;AACzB;AAEA,SAAS,gBAAgB,KAAoC;AAC3D,QAAM,KAAK,WAAW,GAAG;AACzB,QAAM,YAAY,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK;AACnE,SAAO,GAAG,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK,GAAG,KAAK,IAAI,GAAG,MAAM,GAAG,SAAS,KAAK,SAAS,CAAC;AAC5F;AAEA,SAAS,cAAc,KAAmB;AACxC,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,iDAAiD,KAAK,UAAU,GAAG,CAAC,GAAG;AAAA,EACzF;AAMA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI;AAAA,MACR,uDAAuD,OAAO,QAAQ,OAAO,KAAK,UAAU,GAAG,CAAC;AAAA,IAClG;AAAA,EACF;AACF;AAEA,eAAe,WACb,MACA,KACA,YACA,QACA,WACe;AACf,SAAO,eAAe;AACtB,gBAAc,IAAI,GAAG;AAGrB,QAAM,KAAK,gBAAgB,WAAW,GAAG,CAAC;AAC1C,QAAM,YAAY,cAAc,QAAQ,qBAAqB;AAC7D,QAAM,KAAK,KAAK,IAAI,KAAK,EAAE,WAAW,SAAS,eAAe,CAAC;AAC/D,MAAI,IAAI,SAAS;AACf,UAAM,KAAK,gBAAgB,IAAI,SAAS,EAAE,SAAS,KAAO,CAAC;AAAA,EAC7D;AACA,QAAM,QAAQ,IAAI,UAAU;AAC5B,MAAI,QAAQ,EAAG,OAAM,KAAK,eAAe,KAAK;AAC9C,SAAO,eAAe;AACtB,MAAI,IAAI,iBAAiB;AACvB,UAAM,KAAK,QAAQ,IAAI,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAAA,EACjF,OAAO;AACL,UAAM,KAAK,WAAW,EAAE,MAAM,YAAY,UAAU,IAAI,aAAa,KAAK,CAAC;AAAA,EAC7E;AACF;AAIA,SAAS,gBAAwB;AAC/B,QAAM,OAAO,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACzD,SAAO,YAAY,KAAK,CAAC,GAAG,KAAK,CAAC;AACpC;AAGO,SAAS,6BACd,SAeA;AACA,QAAM,SAAS,QAAQ,iBAAiB;AACxC,QAAM,YAAY,QAAQ,aAAa;AACvC,MAAI;AACJ,MAAI,SAAS;AAEb,iBAAe,aAAqC;AAClD,QAAI,QAAQ;AACV,YAAM,IAAI,MAAM,4EAA4E;AAAA,IAC9F;AACA,QAAI,CAAC,eAAgB,kBAAiB,OAAO;AAC7C,WAAO;AAAA,EACT;AAEA,kBAAgB,aACd,YACA,QAC6B;AAC7B,UAAM,OAAO,wBAAwB,UAAU;AAC/C,QAAI,CAAC,MAAM;AACT,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAK,SAAS,WAAW,GAAG;AAC9B,YAAM,IAAI,MAAM,uDAAuD;AAAA,IACzE;AAEA,UAAM,eAAe,cAAc,EAAE,MAAM,KAAK,KAAK,CAAC;AAEtD,UAAM,UAAU,MAAM,WAAW;AACjC,UAAM,UAAU,MAAM,QAAQ,WAAW,EAAE,UAAU,iBAAiB,CAAC;AAMvE,QAAI;AACJ,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,MAAM,QAAQ,QAAQ;AACnC,YAAM,WAA6B,CAAC;AACpC,YAAM,KAAK,MAAM,OAAO,aAAkB;AAC1C,YAAM,OAAO,MAAM,OAAO,MAAW;AACrC,YAAM,WAAW,KAAK,KAAK,QAAQ,cAAc,aAAa;AAC9D,YAAM,GAAG,MAAM,UAAU,EAAE,WAAW,KAAK,CAAC;AAE5C,iBAAW,OAAO,KAAK,UAAU;AAC/B,eAAO,eAAe;AACtB,cAAM,WAAW,gBAAgB,GAAG;AACpC,cAAM,UAAU,KAAK,KAAK,UAAU,QAAQ;AAC5C,cAAM,UAAU,eAAe,QAAQ;AACvC,cAAM,WAAW,MAAM,KAAK,SAAS,QAAQ,SAAS;AACtD,cAAM,KAAK,WAAW,GAAG;AACzB,cAAM,MAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,UAAU,GAAG,GAAG,KAAK,IAAI,GAAG,MAAM;AAAA,UAClC,UAAU,IAAI,aAAa;AAAA,UAC3B,OAAO,IAAI;AAAA,UACX,KAAK,IAAI;AAAA,UACT,aAAY,oBAAI,KAAK,GAAE,YAAY;AAAA,QACrC;AACA,YAAI,IAAI,gBAAiB,KAAI,kBAAkB,IAAI;AACnD,YAAI,IAAI,MAAO,KAAI,QAAQ,IAAI;AAC/B,iBAAS,KAAK,GAAG;AACjB,cAAM,eAAe,iBAAiB,GAAG;AAAA,MAC3C;AAEA,YAAM,WAAW,MAAM,QAAQ,MAAM;AAAA,QACnC,MAAM,KAAK;AAAA,QACX;AAAA,QACA,gBAAgB,KAAK;AAAA,QACrB,iBAAiB,KAAK;AAAA,QACtB;AAAA,QACA;AAAA,MACF,CAAC;AAED,iBAAW,WAAW,SAAS,UAAU;AACvC,cAAM,eAAe,iBAAiB,OAAO;AAAA,MAC/C;AACA,UAAI,SAAS,SAAS,SAAS,MAAM,KAAK,EAAE,SAAS,GAAG;AACtD,cAAM,eAAe,eAAe,EAAE,OAAO,SAAS,MAAM,CAAC;AAAA,MAC/D;AAEA,YAAM,QAAQ,SAAS,cAAc,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC3D,YAAM,eAAe,QAAQ;AAAA,QAC3B,YAAY;AAAA,UACV,aAAa,MAAM;AAAA,UACnB,cAAc,MAAM;AAAA,QACtB;AAAA,QACA,cAAc,SAAS,WAAW;AAAA,MACpC,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,qBAAe;AAAA,IACjB,UAAE;AACA,UAAI;AACF,cAAM,QAAQ,MAAM;AAAA,MACtB,SAAS,KAAK;AACZ,qBAAa;AAAA,MACf;AAAA,IACF;AAIA,QAAI,iBAAiB,UAAa,eAAe,QAAW;AAC1D,YAAM,IAAI;AAAA,QACR,CAAC,cAAc,UAAU;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AACA,QAAI,iBAAiB,OAAW,OAAM;AACtC,QAAI,eAAe,OAAW,OAAM;AAAA,EACtC;AAEA,WAAS,uBAAyC;AAChD,UAAM,KAAK,cAAc;AACzB,UAAM,WAAW;AAAA,MACf;AAAA,MACA,aAAa,SAAiB,MAA8D;AAC1F,cAAM,SAAS,MAAM,UAAU,IAAI,gBAAgB,EAAE;AACrD,eAAO,aAAa,SAAS,MAAM;AAAA,MACrC;AAAA,MACA,MAAM,SAAwB;AAAA,MAI9B;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,UAAiC;AAC5C,aAAO,qBAAqB;AAAA,IAC9B;AAAA,IACA,kBAAkB,KAAK;AACrB,YAAM,KAAM,IAAmC;AAC/C,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,OAAO,WAAW,KAAK,OAAU;AAAA,IAC/E;AAAA,IACA,MAAM,QAAQ;AACZ,eAAS;AACT,YAAM,UAAU;AAChB,uBAAiB;AACjB,UAAI,SAAS;AACX,cAAM,UAAU,MAAM;AACtB,cAAM,QAAQ,MAAM;AAAA,MACtB;AAAA,IACF;AAAA,EACF;AACF;;;ACjVO,IAAM,uBAAuB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBlC,KAAK;AAGA,IAAM,cAAsC;AAAA,EACjD,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,eAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMf,KAAK;AAAA,EACL,YAAY;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMZ,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMT,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,yBAAyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMzB,KAAK;AAAA,EACL,OAAO;AAAA;AAAA;AAAA,EAGP,KAAK;AACP;AAOO,SAAS,yBAAyB,MAAsB;AAC7D,QAAM,QAAQ,YAAY,IAAI;AAC9B,SAAO,GAAG,oBAAoB;AAAA;AAAA,EAAO,KAAK;AAC5C;;;AC1GA,IAAM,oBAAoB,IAAI,IAAY,SAAS;AAEnD,SAAS,SAAS,GAAyB;AACzC,SAAO,OAAO,MAAM,YAAY,kBAAkB,IAAI,CAAW;AACnE;AAGO,SAAS,mBAAmB,QAAuC;AACxE,QAAM,WAAwB,CAAC;AAC/B,QAAM,WAA6B,CAAC;AACpC,MAAI;AACJ,MAAI;AAEJ,aAAW,OAAO,QAAQ;AACxB,QAAI,CAAC,OAAO,OAAO,QAAQ,SAAU;AACrC,UAAM,OAAO,OAAO,IAAI,QAAQ,EAAE;AAClC,UAAM,OACJ,IAAI,QAAQ,OAAO,IAAI,SAAS,WAAY,IAAI,OAAmC;AACrF,QAAI,CAAC,KAAM;AAEX,YAAQ,MAAM;AAAA,MACZ,KAAK,cAAc;AACjB,cAAM,IAAI,KAAK;AACf,YAAI,SAAS,CAAC,EAAG,QAAO;AACxB;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,MAAM;AACZ,YACE,OAAO,IAAI,SAAS,YACpB,OAAO,IAAI,aAAa,YACxB,OAAO,IAAI,aAAa,aACxB,OAAO,IAAI,UAAU,YACrB,OAAO,IAAI,QAAQ,YACnB,OAAO,IAAI,eAAe,UAC1B;AACA,gBAAMA,OAAsB;AAAA,YAC1B,MAAM,IAAI;AAAA,YACV,UAAU,IAAI;AAAA,YACd,UAAU,IAAI;AAAA,YACd,OAAO,IAAI;AAAA,YACX,KAAK,IAAI;AAAA,YACT,YAAY,IAAI;AAAA,UAClB;AACA,cAAI,IAAI,gBAAiB,CAAAA,KAAI,kBAAkB,IAAI;AACnD,cAAI,IAAI,MAAO,CAAAA,KAAI,QAAQ,IAAI;AAC/B,mBAAS,KAAKA,IAAG;AAAA,QACnB;AACA;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,IAAI;AAIV,YACE,OAAO,EAAE,UAAU,YACnB,EAAE,MAAM,KAAK,EAAE,SAAS,KACxB,SAAS,EAAE,IAAI,KACf,OAAO,EAAE,aAAa,YACtB,CAAC,OAAO,OAAO,QAAQ,UAAU,EAAE,SAAS,EAAE,QAAQ,KACtD,OAAO,EAAE,UAAU,YACnB,OAAO,EAAE,gBAAgB,YACzB,OAAO,EAAE,WAAW,YACpB,OAAO,EAAE,iBAAiB,YAC1B,MAAM,QAAQ,EAAE,WAAW,GAC3B;AACA,mBAAS,KAAK,CAAc;AAAA,QAC9B;AACA;AAAA,MACF;AAAA,MACA,KAAK,eAAe;AAClB,cAAM,IAAI,KAAK;AACf,YAAI,OAAO,MAAM,YAAY,EAAE,KAAK,EAAE,SAAS,EAAG,SAAQ;AAC1D;AAAA,MACF;AAAA,MACA;AAGE;AAAA,IACJ;AAAA,EACF;AAEA,QAAM,MAAqB,EAAE,MAAM,QAAQ,SAAS,UAAU,SAAS;AACvE,MAAI,MAAO,KAAI,QAAQ;AACvB,SAAO;AACT;;;AC9EA,IAAM,yBAAyB;AAAA,EAC7B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,SAAS,eAAe,OAAwB;AAC9C,QAAM,IAAI,MAAM,KAAK;AACrB,MAAI,EAAE,SAAS,GAAI,QAAO;AAC1B,SAAO,uBAAuB,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,CAAC;AACvD;AAGO,SAAS,yBAAyB,MAA6C;AACpF,SAAO;AAAA,IACL,MAAM,SAAS,QAAQ;AACrB,YAAM,WAAW,OAAO;AACxB,YAAM,WAAW,OAAO;AACxB,YAAM,eAAe,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,EAAE,SAAS,KAAK,IAAI;AAC3D,UAAI,QAAQ,SAAS,GAAG;AACtB,cAAMC,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,QAAQ,MAAM,gDAAgD,KAAK,IAAI,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,UAC/H,QAAQ,EAAE,SAAS,EAAE;AAAA,QACvB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,kBAAkB,SAAS;AAAA,QAC/B,CAAC,MAAM,CAAC,MAAM,QAAQ,EAAE,WAAW,KAAK,EAAE,YAAY,WAAW;AAAA,MACnE;AACA,UAAI,gBAAgB,SAAS,GAAG;AAC9B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,gBAAgB,MAAM;AAAA,UAChC,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,iBAAiB,SAAS;AAAA,QAAO,CAAC,MACtC,EAAE,YAAY,KAAK,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACrD;AACA,UAAI,eAAe,SAAS,GAAG;AAC7B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,eAAe,MAAM;AAAA,UAC/B,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,UAAI,SAAS,WAAW,GAAG;AACzB,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO;AAAA,UACP,QAAQ,EAAE,aAAa,GAAG,UAAU,GAAG,QAAQ,EAAE;AAAA,QACnD;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,eAAe,SAAS,OAAO,CAAC,MAAM,OAAO,EAAE,aAAa,QAAQ,EAAE;AAC5E,YAAM,cAAc,eAAe,SAAS;AAC5C,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,eAAe,EAAE,KAAK,CAAC,EAAE;AAChE,YAAM,SAAS,IAAI,UAAU,SAAS;AAOtC,YAAM,mBAAmB,SAAS;AAAA,QAChC,CAAC,MACC,MAAM,QAAQ,EAAE,WAAW,KAC3B,EAAE,YAAY,SAAS,KACvB,EAAE,YAAY,MAAM,CAAC,MAAM,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACvD,EAAE;AACF,YAAM,WAAW,mBAAmB,SAAS;AAC7C,YAAM,QAAQ,QAAQ,MAAM,cAAc,MAAM,WAAW,MAAM,QAAQ,QAAQ,CAAC,CAAC;AAEnF,YAAM,UAA0B;AAAA,QAC9B,OAAO;AAAA,QACP;AAAA,QACA,OAAO,GAAG,SAAS,MAAM,kCAA6B,YAAY,QAAQ,CAAC,CAAC,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW,OAAO,QAAQ,CAAC,CAAC;AAAA,QACxI,QAAQ,EAAE,aAAa,UAAU,OAAO;AAAA,MAC1C;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;;;AC1FO,SAAS,iBAAiB,UAAmC,CAAC,GAMnE;AACA,QAAM,OAAO,QAAQ,QAAQ;AAM7B,QAAM,UAAwB;AAAA,IAC5B;AAAA,IACA,aAAa;AAAA,IACb,QAAQ,EAAE,cAAc,GAAG;AAAA,IAC3B,OAAO,QAAQ,QAAQ,EAAE,SAAS,QAAQ,MAAM,IAAI;AAAA,IACpD,OAAO,EAAE,SAAS,MAAM,QAAQ,KAAK;AAAA,IACrC,UAAU,EAAE,MAAM,aAAa;AAAA,EACjC;AAEA,QAAM,SAAuC,EAAE,OAAO,mBAAmB;AACzE,QAAM,YAAsC,QAAQ,OAChD,yBAAyB,QAAQ,IAAI,IACrC,yBAAyB,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE,CAAC;AAW5D,QAAM,eAAe,CAAC,SACpB,GAAG,wBAAwB,IAAI,CAAC;AAAA,EAAK,yBAAyB,KAAK,IAAI,CAAC;AAAA;AAAA,EAAO,oBAAoB,IAAI,CAAC;AAE1G,QAAM,eAA0C;AAAA,IAC9C;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,cAAc,QAAQ,WAAW,aAAa;AAClE;","names":["out","verdict"]}
|
|
1
|
+
{"version":3,"sources":["../src/profiles/ui-auditor/prompt.ts","../src/profiles/ui-auditor/in-process-client.ts","../src/profiles/ui-auditor/lens-prompts.ts","../src/profiles/ui-auditor/output-adapter.ts","../src/profiles/ui-auditor/validator.ts","../src/profiles/ui-auditor/profile.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Prompt formatter for the auditor profile. `formatAuditorPrompt` produces\n * the user message handed to the iteration — describes the captures to be\n * taken and the lens to apply. The system prompt comes from\n * `buildAuditorSystemPrompt(lens)` (lens-prompts.ts).\n *\n * The formatter prepends a machine-readable envelope (`<<UI_AUDIT_TASK>>`\n * … `<<UI_AUDIT_TASK_END>>`) carrying a JSON-serialised task. The\n * in-process auditor client recovers the task from this envelope so the\n * iteration is self-describing — robust to concurrent fanout, where any\n * per-client side state (e.g. a \"current task\" register) would race.\n *\n * The formatter is pure and deterministic — re-run on the same task\n * produces the same prompt. Tests and trace replays rely on this.\n */\n\nimport type { UiAuditTask } from './task'\n\nconst ENVELOPE_BEGIN = '<<UI_AUDIT_TASK>>'\nconst ENVELOPE_END = '<<UI_AUDIT_TASK_END>>'\n\n/** @experimental */\nexport function encodeAuditTaskEnvelope(task: UiAuditTask): string {\n return `${ENVELOPE_BEGIN}${JSON.stringify(task)}${ENVELOPE_END}`\n}\n\n/**\n * Parse a task envelope back out of a prompt string. Returns undefined if\n * the prompt does not contain a complete envelope OR if the payload is\n * not valid JSON.\n *\n * @experimental\n */\nexport function decodeAuditTaskEnvelope(prompt: string): UiAuditTask | undefined {\n const start = prompt.indexOf(ENVELOPE_BEGIN)\n if (start === -1) return undefined\n const payloadStart = start + ENVELOPE_BEGIN.length\n const end = prompt.indexOf(ENVELOPE_END, payloadStart)\n if (end === -1) return undefined\n const payload = prompt.slice(payloadStart, end)\n try {\n const parsed = JSON.parse(payload) as unknown\n if (!parsed || typeof parsed !== 'object') return undefined\n const t = parsed as Partial<UiAuditTask>\n if (typeof t.lens !== 'string' || !Array.isArray(t.captures)) return undefined\n return t as UiAuditTask\n } catch {\n return undefined\n }\n}\n\n/** @experimental */\nexport function formatAuditorPrompt(task: UiAuditTask): string {\n const lines: string[] = []\n lines.push(`# UI audit iteration — lens: ${task.lens}`)\n lines.push('')\n if (task.productContext && task.productContext.trim().length > 0) {\n lines.push('## Product context')\n lines.push(task.productContext.trim())\n lines.push('')\n }\n lines.push('## Captures to take')\n task.captures.forEach((cap, i) => {\n const vp = cap.viewport ? `${cap.viewport.width}x${cap.viewport.height}` : '1280x800 (default)'\n const detail = [\n `viewport=${vp}`,\n cap.fullPage ? 'fullPage=true' : null,\n cap.elementSelector ? `selector=\\`${cap.elementSelector}\\`` : null,\n cap.waitFor ? `waitFor=\\`${cap.waitFor}\\`` : null,\n cap.waitMs !== undefined ? `waitMs=${cap.waitMs}` : null,\n cap.label ? `label=${cap.label}` : null,\n ]\n .filter((s): s is string => s !== null)\n .join(' · ')\n lines.push(`${i + 1}. route=\\`${cap.route}\\` url=${cap.url} ${detail ? `(${detail})` : ''}`)\n })\n lines.push('')\n if (task.knownFindingIds && task.knownFindingIds.length > 0) {\n lines.push('## Known findings (link via similarTo, do not refile)')\n lines.push(task.knownFindingIds.map((n) => `#${String(n).padStart(3, '0')}`).join(', '))\n lines.push('')\n }\n lines.push('## Output format')\n lines.push(\n 'Emit a single JSON object with the shape `{ findings: UiFinding[], notes?: string }` where every finding has the fields enumerated in your system prompt. The screenshots field on each finding must reference the captures above by path. Do not emit findings outside the lens.',\n )\n return lines.join('\\n')\n}\n","/**\n * @experimental\n *\n * `createInProcessUiAuditClient` — a `SandboxClient` that drives a\n * Playwright browser in-process and delegates finding identification to a\n * consumer-supplied {@link UiJudge}.\n *\n * Why this exists: `runLoop` is built around a sandbox-SDK seam — each\n * iteration is `client.create() → box.streamPrompt() → box.delete()`.\n * For UI audit, spinning up a real container running a coding harness\n * per iteration is overkill: the work is one browser capture + one\n * vision LLM call. This client satisfies the kernel contract while\n * doing the audit in-process; no container, no sandbox-SDK backend.\n *\n * The client owns ONE browser for its lifetime and creates a fresh\n * context per iteration (isolated cookies/storage). Playwright is\n * dynamically imported so consumers who use a different `SandboxClient`\n * — e.g. a fleet executor that drives Playwright remotely — do not pay\n * the peer dep cost.\n *\n * Concurrency: each iteration's prompt carries a self-describing task\n * envelope (see `prompt.ts`), so concurrent fanout iterations do not race\n * over per-client side state.\n */\n\nimport type { CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox'\nimport type { SandboxClient } from '../../runtime/types'\nimport type { UiJudge } from './judge'\nimport { decodeAuditTaskEnvelope } from './prompt'\nimport { slugify } from './slugify'\nimport type { UiAuditCapture, UiAuditCaptureRequest } from './task'\n\n// All synthetic events the auditor emits flow through this helper. Reason:\n// `SandboxEvent.data` is a sandbox-SDK shape (effectively `Record<string,\n// unknown>`) that our typed payloads (`UiAuditCapture`, `UiFinding`, …) do not\n// satisfy structurally. The cast moves the type-system smell into a single,\n// named, documented call site so the call sites in `runIteration` stay clean.\n// The runtime contract — `{ type, data }` — is what the output adapter reads;\n// the static type is what the kernel collects into `SandboxEvent[]`.\nfunction asSandboxEvent<T>(type: string, data: T): SandboxEvent {\n return { type, data } as unknown as SandboxEvent\n}\n\n/** @experimental */\nexport interface InProcessUiAuditClientOptions {\n /**\n * Absolute path under which screenshots are written. Each capture lands\n * at `<workspaceDir>/screenshots/<filename>`; finding screenshot paths\n * are workspace-relative (`screenshots/<filename>`).\n */\n workspaceDir: string\n /** The vision judge that turns captures into findings. */\n judge: UiJudge\n /**\n * Navigation policy.\n *\n * `'strict'` (default) waits for `networkidle` and fails the iteration\n * if the page does not settle. `'spa'` waits for `domcontentloaded` —\n * use for single-page apps that hold open long-poll/websocket\n * connections and never settle.\n */\n navPolicy?: 'strict' | 'spa'\n /**\n * Browser launch override. Default: chromium headless via Playwright.\n * Consumers pass a custom factory to target a remote browser, a\n * different channel, or a fleet adapter.\n */\n launchBrowser?: () => Promise<BrowserHandle>\n}\n\n/** @experimental */\nexport interface BrowserHandle {\n newContext(options?: {\n viewport?: { width: number; height: number }\n }): Promise<BrowserContextHandle>\n close(): Promise<void>\n}\n\n/** @experimental */\nexport interface BrowserContextHandle {\n newPage(): Promise<PageHandle>\n close(): Promise<void>\n}\n\n/** @experimental */\nexport interface PageHandle {\n setViewportSize(size: { width: number; height: number }): Promise<void>\n goto(url: string, options?: { waitUntil?: string; timeout?: number }): Promise<unknown>\n waitForSelector(selector: string, options?: { timeout?: number }): Promise<unknown>\n waitForTimeout(ms: number): Promise<void>\n screenshot(options: { path: string; fullPage?: boolean }): Promise<void>\n locator(selector: string): {\n first(): { screenshot(options: { path: string }): Promise<void> }\n }\n}\n\nconst DEFAULT_VIEWPORT = { width: 1280, height: 800 } as const\nconst NAV_TIMEOUT_MS = 30_000\n\nasync function defaultLaunch(): Promise<BrowserHandle> {\n const mod = (await import('playwright')) as unknown as {\n chromium?: { launch(options?: { headless?: boolean }): Promise<BrowserHandle> }\n }\n if (!mod?.chromium || typeof mod.chromium.launch !== 'function') {\n throw new Error(\n 'ui-auditor: playwright is not installed. Install `playwright` (and run `playwright install chromium`) or pass a custom `launchBrowser` to createInProcessUiAuditClient.',\n )\n }\n return mod.chromium.launch({ headless: true })\n}\n\nfunction nowStamp(): string {\n const d = new Date()\n const pad = (n: number) => String(n).padStart(2, '0')\n // Millisecond resolution so parallel fanout iterations capturing the same\n // route/viewport/label within the same second don't collide on filename and\n // silently overwrite each other.\n return (\n `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}-` +\n `${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}` +\n `-${String(d.getUTCMilliseconds()).padStart(3, '0')}`\n )\n}\n\nfunction viewportOf(req: UiAuditCaptureRequest): { width: number; height: number } {\n return req.viewport ?? DEFAULT_VIEWPORT\n}\n\nfunction captureFilename(req: UiAuditCaptureRequest): string {\n const vp = viewportOf(req)\n const labelPart = req.label ? `--${slugify(req.label, 'label')}` : ''\n return `${slugify(req.route, 'route')}--${vp.width}x${vp.height}${labelPart}--${nowStamp()}.png`\n}\n\nfunction assertHttpUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`ui-auditor: capture url is not parseable (got ${JSON.stringify(url)})`)\n }\n // SSRF defense at the client boundary. The MCP tool already restricts to\n // http(s), but `createInProcessUiAuditClient` is exported and can be wired\n // up directly by consumers (the example does this). A crafted task envelope\n // could otherwise navigate Playwright to `file://`, `data:`, `javascript:`\n // and read local files or execute inline content.\n if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {\n throw new Error(\n `ui-auditor: capture url must use http or https (got ${parsed.protocol} in ${JSON.stringify(url)})`,\n )\n }\n}\n\nasync function captureOne(\n page: PageHandle,\n req: UiAuditCaptureRequest,\n outAbsPath: string,\n signal: AbortSignal,\n navPolicy: 'strict' | 'spa',\n): Promise<void> {\n signal.throwIfAborted()\n assertHttpUrl(req.url)\n // Apply the per-capture viewport before navigation. The capture metadata\n // and filename both encode this viewport; the rendered page must match.\n await page.setViewportSize(viewportOf(req))\n const waitUntil = navPolicy === 'spa' ? 'domcontentloaded' : 'networkidle'\n await page.goto(req.url, { waitUntil, timeout: NAV_TIMEOUT_MS })\n if (req.waitFor) {\n await page.waitForSelector(req.waitFor, { timeout: 15_000 })\n }\n const extra = req.waitMs ?? 500\n if (extra > 0) await page.waitForTimeout(extra)\n signal.throwIfAborted()\n if (req.elementSelector) {\n await page.locator(req.elementSelector).first().screenshot({ path: outAbsPath })\n } else {\n await page.screenshot({ path: outAbsPath, fullPage: req.fullPage === true })\n }\n}\n\ninterface SyntheticSandbox extends SandboxInstance {}\n\nfunction makeSandboxId(): string {\n const rand = () => Math.random().toString(16).slice(2, 10)\n return `ui-audit-${rand()}${rand()}`\n}\n\n/** @experimental */\nexport function createInProcessUiAuditClient(\n options: InProcessUiAuditClientOptions,\n): SandboxClient & {\n /**\n * Close the underlying browser. Idempotent.\n *\n * Contract: callers MUST ensure no iterations are in flight when this is\n * called. The kernel respects this — `runLoop` awaits every iteration\n * before returning, so `await runLoop(...); await client.close()` is the\n * intended pattern (see `examples/ui-audit`). If `close()` is invoked\n * concurrently with a running iteration, the browser teardown will race\n * against in-flight page operations; the iteration will surface an\n * AggregateError carrying both the iteration error and the close error,\n * but no work is lost silently.\n */\n close(): Promise<void>\n} {\n const launch = options.launchBrowser ?? defaultLaunch\n const navPolicy = options.navPolicy ?? 'strict'\n let browserPromise: Promise<BrowserHandle> | undefined\n let closed = false\n\n async function getBrowser(): Promise<BrowserHandle> {\n if (closed) {\n throw new Error('ui-auditor: client is closed; create a new client to run another iteration')\n }\n if (!browserPromise) browserPromise = launch()\n return browserPromise\n }\n\n async function* runIteration(\n promptText: string,\n signal: AbortSignal,\n ): AsyncIterable<SandboxEvent> {\n const task = decodeAuditTaskEnvelope(promptText)\n if (!task) {\n throw new Error(\n 'ui-auditor: prompt is missing a UI_AUDIT_TASK envelope. Use uiAuditorProfile().taskToPrompt to format prompts, or pass an envelope-prefixed prompt manually.',\n )\n }\n if (task.captures.length === 0) {\n throw new Error('ui-auditor: task has zero captures; nothing to audit.')\n }\n\n yield asSandboxEvent('audit.lens', { lens: task.lens })\n\n const browser = await getBrowser()\n const context = await browser.newContext({ viewport: DEFAULT_VIEWPORT })\n // Track both the primary iteration error and any context-close failure so\n // the cleanup path never silently swallows a leaked-context bug AND a\n // close failure never shadows the real iteration error. After the\n // try/catch/finally settles, we rethrow the primary if there was one,\n // otherwise we rethrow the close error.\n let primaryError: unknown\n let closeError: unknown\n try {\n const page = await context.newPage()\n const captures: UiAuditCapture[] = []\n const fs = await import('node:fs/promises')\n const path = await import('node:path')\n const shotsDir = path.join(options.workspaceDir, 'screenshots')\n await fs.mkdir(shotsDir, { recursive: true })\n\n for (const req of task.captures) {\n signal.throwIfAborted()\n const filename = captureFilename(req)\n const absPath = path.join(shotsDir, filename)\n const relPath = `screenshots/${filename}`\n await captureOne(page, req, absPath, signal, navPolicy)\n const vp = viewportOf(req)\n const cap: UiAuditCapture = {\n path: relPath,\n viewport: `${vp.width}x${vp.height}`,\n fullPage: req.fullPage === true,\n route: req.route,\n url: req.url,\n capturedAt: new Date().toISOString(),\n }\n if (req.elementSelector) cap.elementSelector = req.elementSelector\n if (req.label) cap.label = req.label\n captures.push(cap)\n yield asSandboxEvent('audit.capture', cap)\n }\n\n const judgeOut = await options.judge({\n lens: task.lens,\n captures,\n productContext: task.productContext,\n knownFindingIds: task.knownFindingIds,\n promptText,\n signal,\n })\n\n for (const finding of judgeOut.findings) {\n yield asSandboxEvent('audit.finding', finding)\n }\n if (judgeOut.notes && judgeOut.notes.trim().length > 0) {\n yield asSandboxEvent('audit.notes', { notes: judgeOut.notes })\n }\n\n const usage = judgeOut.tokenUsage ?? { input: 0, output: 0 }\n yield asSandboxEvent('done', {\n tokenUsage: {\n inputTokens: usage.input,\n outputTokens: usage.output,\n },\n totalCostUsd: judgeOut.costUsd ?? 0,\n })\n } catch (err) {\n primaryError = err\n } finally {\n try {\n await context.close()\n } catch (err) {\n closeError = err\n }\n }\n // When both the iteration and the cleanup fail, surface both via\n // AggregateError so a leaked context bug is not silently masked by an\n // earlier iteration failure (per the fail-loud doctrine).\n if (primaryError !== undefined && closeError !== undefined) {\n throw new AggregateError(\n [primaryError, closeError],\n 'ui-auditor: iteration failed AND context.close() failed; both errors attached.',\n )\n }\n if (primaryError !== undefined) throw primaryError\n if (closeError !== undefined) throw closeError\n }\n\n function makeSyntheticSandbox(): SyntheticSandbox {\n const id = makeSandboxId()\n const instance = {\n id,\n streamPrompt(message: string, opts?: { signal?: AbortSignal }): AsyncIterable<SandboxEvent> {\n const signal = opts?.signal ?? new AbortController().signal\n return runIteration(message, signal)\n },\n async delete(): Promise<void> {\n // No per-sandbox resources to release; the browser is shared and\n // closed by `client.close()`. Intentionally a no-op so trace-time\n // `box.delete()` succeeds without doing surprising work.\n },\n }\n return instance as unknown as SyntheticSandbox\n }\n\n return {\n async create(_options?: CreateSandboxOptions) {\n return makeSyntheticSandbox()\n },\n describePlacement(box) {\n const id = (box as unknown as { id?: string }).id\n return { kind: 'sibling', sandboxId: typeof id === 'string' ? id : undefined }\n },\n async close() {\n closed = true\n const pending = browserPromise\n browserPromise = undefined\n if (pending) {\n const browser = await pending\n await browser.close()\n }\n },\n }\n}\n","/**\n * @experimental\n *\n * Per-lens guidance the auditor inlines into its system prompt for an\n * iteration. Each entry is a self-contained brief — the same content the\n * standalone ui-issue-finder skill ships, embedded as a string constant so\n * agent-runtime carries no runtime dep on that external workspace.\n *\n * Briefs are deliberately concrete: they enumerate the SIGNALS to look for\n * and the cross-lens distinctions to respect, so the judge files fewer\n * pile-on findings under generic labels.\n */\n\nimport type { UiLens } from './substrate'\n\n/** @experimental */\nexport const SHARED_AUDITOR_RULES = `\nYou are auditing a UI for a specific class of problems. Stay strictly in your assigned lens — do not file issues that belong to another lens (a separate iteration will catch those).\n\nA finding is only valid if a thoughtful product designer would agree the screenshot shows something that should change. Avoid:\n- Personal taste (\"I'd prefer brand blue\").\n- Hallucinated text or controls you cannot actually see in the screenshot.\n- Suggestions that depend on requirements you don't have access to.\n- Pile-on findings about the same root cause — file ONE finding and use \\`similarTo\\` to link the rest.\n\nRequired for every finding:\n- title: concrete, names the offending element AND what's wrong (NOT \"improve UX\").\n- severity: critical=blocks a core task or accessibility blocker; high=noticeable friction; med=visible polish issue; low=nitpick.\n- observation: 1–3 sentences describing exactly what you see that is wrong.\n- impact: who is affected and how (concrete).\n- suggestedFix: a specific change a developer could apply without asking you back.\n- screenshots: refer to the captures attached to this iteration by path.\n- selector: when you can pin the offending element with a CSS selector.\n\nMost findings are med or low. Reserve high/critical for genuine blockers.\n`.trim()\n\n/** @experimental */\nexport const LENS_BRIEFS: Record<UiLens, string> = {\n consistency: `\nLENS: consistency\nLook for inconsistencies in the design system — things that look like they came from different products glued together.\nSignals: multiple font families, inconsistent weights/sizes for the same role, two shades of \"primary\", arbitrary paddings/margins that don't snap to a scale (4/8/12/16/24), same control with different border-radius or shadow on different pages, mixed icon styles (filled vs outlined), inconsistent button heights/padding for the same variant, inconsistent capitalization (Title Case vs sentence case) for the same role.\nNOT this lens: layout misalignment (use \\`layout\\`), confusing user flow (use \\`ux-flow\\`), contrast/keyboard issues (use \\`accessibility\\`).\nTitle format: \\`Inconsistent <thing> between <A> and <B>\\`.\n`.trim(),\n hierarchy: `\nLENS: hierarchy\nLook for broken visual hierarchy — places where the eye does not land on what matters most.\nSignals: primary CTA same weight as secondary/tertiary controls, headings (H1/H2/H3) nearly the same size, important data buried (headline number smaller than its label), decoration outshining content, too many emphases competing, wrong scan order, missing or overly heavy section dividers.\nNOT this lens: same-role styled differently (\\`consistency\\`), grid/alignment (\\`layout\\`), contrast-failing text (\\`accessibility\\`).\nTitle format: \\`Weak hierarchy: <element> does not read as the <intended-role>\\`.\n`.trim(),\n layout: `\nLENS: layout\nLook for layout and organization problems — alignment, grouping, whitespace, structural choices that hurt scannability.\nSignals: misalignment within rows, inconsistent gutters in grids, orphan whitespace next to crammed regions, poor grouping (related fields separated, unrelated fields adjacent), no visual sections (long wall of content), container overflow (text/content punching out of card boundaries), cramped or oversized hit targets, sidebars/headers sized wrong relative to main content.\nNOT this lens: same-role styled differently (\\`consistency\\`), click-distance/friction (\\`ux-flow\\`), overflow specifically at small viewports (\\`responsive\\`).\nTitle format: \\`<Region> alignment/spacing problem\\` or \\`<Region> grouping unclear\\`.\n`.trim(),\n 'ux-flow': `\nLENS: ux-flow\nLook for interaction-flow friction — action sequences that are slower, more annoying, or more error-prone than necessary.\nSignals: sequential clicks far apart (e.g. Next top-right while user is bottom-left), destructive action adjacent to primary with same weight, confirmations that don't say what's being confirmed, primary CTA below the fold or hidden in a kebab menu, silent state changes (toggle gives no feedback), form ordering that fights real-world order, dead-end states after submit, lost inputs on back-navigation, hidden pre-selected options.\nNOT this lens: visual style only (\\`consistency\\`), component arrangement without a flow problem (\\`layout\\`), microcopy clarity (\\`content\\`).\nTitle format: \\`<Action A> → <Action B> friction: <root cause>\\`.\n`.trim(),\n duplication: `\nLENS: duplication\nLook for redundancy — the same control, link, or piece of content appearing more than once with no good reason.\nSignals: two ways to do the same action on the same screen with no difference, repeated nav (same links in sidebar AND top nav), drifted duplicates (two copies that have diverged), content repeated verbatim, icon + label saying the same thing twice in one row, per-row + bulk actions that overlap confusingly, multiple status indicators conveying the same status.\nNOT this lens: inconsistent styling of duplicates (\\`consistency\\`) — this lens is about the existence of duplicates.\nTitle format: \\`Duplicate <thing> in <location A> and <location B>\\`.\n`.trim(),\n accessibility: `\nLENS: accessibility\nLook for accessibility blockers and degradations. Be conservative — do not assume violations you cannot see.\nSignals: insufficient contrast on body text or controls, missing/invisible focus styles, tiny tap targets (<24px on mobile), color as sole signal (red border with no message), form labels missing or not associated (placeholders standing in for labels), broken heading order (H1 → H4), modals that don't trap focus, decorative elements that take focus, errors not announced, important text rendered inside images.\nNOT this lens: generic \"looks confusing\" (\\`hierarchy\\` or \\`content\\`), layout overflow at small viewports (\\`responsive\\`).\nTitle format: \\`Accessibility: <specific blocker> in <element>\\`.\n`.trim(),\n responsive: `\nLENS: responsive\nLook for layout breakage across viewport sizes — content that works at one width but degrades at another. This iteration's captures should include the same surface at >=2 viewports; compare across them.\nSignals: horizontal scroll where content should reflow, overlapping elements (header overlaps content, fixed footer covers inputs), desktop nav crammed into mobile without collapsing, table columns that don't truncate, tap targets too close at touch sizes, controls vanishing at certain widths, layout flips that break grouping order, modals exceeding viewport height (confirm button unreachable).\nNOT this lens: issues present at every viewport (\\`consistency\\` / \\`hierarchy\\` instead).\nTitle format: \\`<Element/Region> breaks at <viewport>\\`.\n`.trim(),\n states: `\nLENS: states\nLook for missing or broken UI states — the not-happy-paths that make a product feel finished or unfinished. The iteration's captures should depict at least one non-default state.\nSignals: empty lists with no guidance, skeletons that don't match final layout (CLS on settle), error states with no message or recovery action, disabled buttons with no explanation, toasts that disappear before being read, success states that don't confirm, missing hover/focus/active/disabled variants on primary controls, no long-content view, no-permission state broken.\nNOT this lens: generic polish on the happy path (other lenses), missing focus rings specifically (\\`accessibility\\`).\nTitle format: \\`Missing/broken <state> state on <surface>\\`.\n`.trim(),\n content: `\nLENS: content\nLook for microcopy and content problems — text that is unclear, inconsistent, condescending, jargon-heavy, or wrong.\nSignals: jargon/internal language leaking (\"Provisioning a Tenant\" instead of \"Setting up your account\"), inconsistent terminology (workspace vs team), verbose button labels, empty-state copy that's just \"No results\", error messages blaming the user, tone inconsistency, truncation without affordance, mixed date/number formats on one page, placeholder used as a label, \"Saved!\" toast appearing before save completes, typos and grammar errors.\nNOT this lens: visual treatment of text (\\`hierarchy\\` / \\`consistency\\`), missing labels for a11y (\\`accessibility\\`).\nTitle format: \\`Copy: \"<actual text>\" in <location>\\` or \\`Inconsistent term: \"<A>\" vs \"<B>\"\\`.\n`.trim(),\n interaction: `\nLENS: interaction\nLook for interaction quality problems — affordances, feedback, and micro-interactions.\nSignals: no affordance (clickable areas not looking clickable, non-clickable areas looking clickable), missing feedback (>100ms click with no progress), hover surprises (whole row highlights but only title clickable), cursor inconsistency, animations that block input, missing transitions where they're needed (accordion snaps open), drag-and-drop without indicators, scroll-jacking, click-through bugs (card click handler firing alongside button), hover-only revelations on touch.\nNOT this lens: position of controls (\\`layout\\` / \\`ux-flow\\`), missing focus styles (\\`accessibility\\`).\nTitle format: \\`<Action> on <element>: <missing/wrong> feedback\\`.\n`.trim(),\n 'performance-perceived': `\nLENS: performance-perceived\nLook for perceived-performance problems — visible jank a real user would notice, not benchmark numbers. This iteration's captures should include >=2 frames during load to show shift.\nSignals: layout shift (CLS) when late-arriving images/fonts/banners settle, FOUC (flash of unstyled content), font swap jumps, late-loading hero images that shift everything, skeletons that don't match final shape, spinners on instant local actions, loading state reappearing after content paints (refetch on focus), modal open animation longer than the operation it precedes.\nNOT this lens: slow API calls (file separately), stale data after navigation (\\`states\\`).\nTitle format: \\`Layout shift / late paint on <route>: <root cause>\\`.\n`.trim(),\n other: `\nLENS: other\nUse ONLY when a finding is clearly a UI quality issue but does not fit any other lens. Strongly prefer a specific lens — \\`other\\` should be rare. Title must still be concrete.\n`.trim(),\n}\n\n/**\n * Build a system prompt for a single auditor iteration.\n *\n * @experimental\n */\nexport function buildAuditorSystemPrompt(lens: UiLens): string {\n const brief = LENS_BRIEFS[lens]\n return `${SHARED_AUDITOR_RULES}\\n\\n${brief}`\n}\n","/**\n * @experimental\n *\n * Sandbox-event stream → UiAuditOutput decoder. The custom auditor\n * `SandboxClient` emits events of the form:\n *\n * { type: 'audit.capture', data: UiAuditCapture }\n * { type: 'audit.finding', data: UiFinding }\n * { type: 'audit.notes', data: { notes: string } }\n * { type: 'audit.lens', data: { lens: UiLens } }\n * { type: 'done', data: { tokenUsage: { ... }, totalCostUsd?: number } }\n *\n * Other event types are tolerated and ignored. The adapter is pure: it\n * folds an already-collected event array into a UiAuditOutput.\n */\n\nimport type { SandboxEvent } from '@tangle-network/sandbox'\nimport { UI_LENSES, type UiFinding, type UiLens } from './substrate'\nimport type { UiAuditCapture, UiAuditOutput } from './task'\n\n// Build the lens-validation set from the canonical UI_LENSES tuple so adding\n// a lens to the substrate automatically extends the parser; otherwise a new\n// lens would silently fail isUiLens() and parseAuditorEvents would drop\n// every event using it.\nconst KNOWN_LENS_VALUES = new Set<UiLens>(UI_LENSES)\n\nfunction isUiLens(v: unknown): v is UiLens {\n return typeof v === 'string' && KNOWN_LENS_VALUES.has(v as UiLens)\n}\n\n/** @experimental */\nexport function parseAuditorEvents(events: SandboxEvent[]): UiAuditOutput {\n const findings: UiFinding[] = []\n const captures: UiAuditCapture[] = []\n let lens: UiLens | undefined\n let notes: string | undefined\n\n for (const evt of events) {\n if (!evt || typeof evt !== 'object') continue\n const type = String(evt.type ?? '')\n const data =\n evt.data && typeof evt.data === 'object' ? (evt.data as Record<string, unknown>) : undefined\n if (!data) continue\n\n switch (type) {\n case 'audit.lens': {\n const v = data.lens\n if (isUiLens(v)) lens = v\n break\n }\n case 'audit.capture': {\n const cap = data as unknown as Partial<UiAuditCapture>\n if (\n typeof cap.path === 'string' &&\n typeof cap.viewport === 'string' &&\n typeof cap.fullPage === 'boolean' &&\n typeof cap.route === 'string' &&\n typeof cap.url === 'string' &&\n typeof cap.capturedAt === 'string'\n ) {\n const out: UiAuditCapture = {\n path: cap.path,\n viewport: cap.viewport,\n fullPage: cap.fullPage,\n route: cap.route,\n url: cap.url,\n capturedAt: cap.capturedAt,\n }\n if (cap.elementSelector) out.elementSelector = cap.elementSelector\n if (cap.label) out.label = cap.label\n captures.push(out)\n }\n break\n }\n case 'audit.finding': {\n const f = data as unknown as Partial<UiFinding>\n // Hard requirement: all the actionable fields must be present and\n // non-empty for a finding to enter the output. The validator does the\n // softer scoring; the adapter only filters structural junk.\n if (\n typeof f.title === 'string' &&\n f.title.trim().length > 0 &&\n isUiLens(f.lens) &&\n typeof f.severity === 'string' &&\n ['low', 'med', 'high', 'critical'].includes(f.severity) &&\n typeof f.route === 'string' &&\n typeof f.observation === 'string' &&\n typeof f.impact === 'string' &&\n typeof f.suggestedFix === 'string' &&\n Array.isArray(f.screenshots)\n ) {\n findings.push(f as UiFinding)\n }\n break\n }\n case 'audit.notes': {\n const n = data.notes\n if (typeof n === 'string' && n.trim().length > 0) notes = n\n break\n }\n default:\n // Tolerate cost/usage events and other backend chatter — extractLlmCallEvent\n // in run-loop.ts handles cost accounting upstream from the adapter.\n break\n }\n }\n\n const out: UiAuditOutput = { lens: lens ?? 'other', findings, captures }\n if (notes) out.notes = notes\n return out\n}\n","/**\n * @experimental\n *\n * Auditor validator — scores a single iteration's findings for actionability\n * and gates the iteration result. The kernel uses `valid` + `score` for\n * winner selection across fanned-out iterations and to detect a degenerate\n * iteration (lens-violating findings, no screenshot evidence, no findings\n * at all on a route where we expected some).\n *\n * Hard fails (`valid = false`):\n * - A finding is filed under a lens that does not match the iteration's\n * lens. The whole iteration is bad — the judge isn't following the\n * lens discipline and the resulting Markdown would mislead reviewers.\n * - A finding has no screenshot reference.\n * - A finding's screenshot references a path that wasn't captured in\n * this iteration.\n *\n * Score (0..1, max two decimals stable):\n * - 0.4 * specificityRatio — proportion of findings with a selector\n * - 0.4 * evidenceRatio — proportion of findings whose screenshots resolve to captures\n * - 0.2 * (1 - genericTitleRatio) — proportion of findings whose titles\n * are concrete (not \"improve UX\", \"fix layout\", etc.)\n *\n * An iteration with zero findings scores 0.5 by convention — neither a\n * confident pass nor a hard failure (the judge might just have nothing to\n * say on this lens). The driver decides what to do with it.\n */\n\nimport type { DefaultVerdict } from '@tangle-network/agent-eval'\nimport type { Validator } from '../../runtime/types'\nimport type { UiAuditOutput, UiAuditTask } from './task'\n\nconst GENERIC_TITLE_PATTERNS = [\n /^improve\\s/i,\n /^fix\\s/i,\n /^update\\s/i,\n /^better\\s/i,\n /^bad\\s/i,\n /^make\\s.+\\sbetter/i,\n /\\bUX\\b\\s*$/i,\n /\\bUI\\b\\s*$/i,\n]\n\nfunction isGenericTitle(title: string): boolean {\n const t = title.trim()\n if (t.length < 16) return true\n return GENERIC_TITLE_PATTERNS.some((re) => re.test(t))\n}\n\n/** @experimental */\nexport function createUiAuditorValidator(task: UiAuditTask): Validator<UiAuditOutput> {\n return {\n async validate(output) {\n const findings = output.findings\n const captures = output.captures\n const capturePaths = new Set(captures.map((c) => c.path))\n\n const offLens = findings.filter((f) => f.lens !== task.lens)\n if (offLens.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${offLens.length} finding(s) filed under wrong lens (expected ${task.lens}; got ${offLens.map((f) => f.lens).join(', ')})`,\n scores: { offLens: 0 },\n }\n return verdict\n }\n\n const missingEvidence = findings.filter(\n (f) => !Array.isArray(f.screenshots) || f.screenshots.length === 0,\n )\n if (missingEvidence.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${missingEvidence.length} finding(s) have no screenshot evidence`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n const unresolvedShot = findings.filter((f) =>\n f.screenshots.some((s) => !capturePaths.has(s.path)),\n )\n if (unresolvedShot.length > 0) {\n const verdict: DefaultVerdict = {\n valid: false,\n score: 0,\n notes: `${unresolvedShot.length} finding(s) reference screenshot paths not captured this iteration`,\n scores: { evidence: 0 },\n }\n return verdict\n }\n\n if (findings.length === 0) {\n const verdict: DefaultVerdict = {\n valid: true,\n score: 0.5,\n notes: 'No findings reported. Neither a confident pass nor a failure.',\n scores: { specificity: 0, evidence: 1, titles: 1 },\n }\n return verdict\n }\n\n const withSelector = findings.filter((f) => typeof f.selector === 'string').length\n const specificity = withSelector / findings.length\n const generic = findings.filter((f) => isGenericTitle(f.title)).length\n const titles = 1 - generic / findings.length\n // Compute evidence honestly from the data: proportion of findings whose\n // screenshots are all resolvable against this iteration's captures. The\n // guards above hard-fail when this would be < 1, so today the result is\n // always 1; if a future change relaxes those guards into a soft-fail\n // mode, this still produces a truthful evidence ratio rather than a\n // stale constant inflating the score.\n const withFullEvidence = findings.filter(\n (f) =>\n Array.isArray(f.screenshots) &&\n f.screenshots.length > 0 &&\n f.screenshots.every((s) => capturePaths.has(s.path)),\n ).length\n const evidence = withFullEvidence / findings.length\n const score = Number((0.4 * specificity + 0.4 * evidence + 0.2 * titles).toFixed(4))\n\n const verdict: DefaultVerdict = {\n valid: true,\n score,\n notes: `${findings.length} finding(s) — specificity=${specificity.toFixed(2)} evidence=${evidence.toFixed(2)} titles=${titles.toFixed(2)}`,\n scores: { specificity, evidence, titles },\n }\n return verdict\n },\n }\n}\n","/**\n * @experimental\n *\n * `uiAuditorProfile` — preset for vision-driven UI audit iterations.\n *\n * Mirrors the shape of `coderProfile`: returns the `AgentRunSpec`, output\n * adapter, validator, and prompt formatter the loop kernel needs. Unlike\n * `coderProfile`, the agent's \"harness\" is not a sandbox-SDK code-runner\n * — it's a vision-capable judge driving a browser. The loop kernel still\n * iterates `client.create() → box.streamPrompt() → box.delete()`; the\n * client/box pair are provided by `createInProcessUiAuditClient` (in\n * `./in-process-client.ts`) or by a consumer-supplied `SandboxClient`.\n */\n\nimport type { AgentProfile } from '@tangle-network/sandbox'\nimport type { AgentRunSpec, OutputAdapter, Validator } from '../../runtime/types'\nimport { buildAuditorSystemPrompt } from './lens-prompts'\nimport { parseAuditorEvents } from './output-adapter'\nimport { encodeAuditTaskEnvelope, formatAuditorPrompt } from './prompt'\nimport type { UiAuditOutput, UiAuditTask } from './task'\nimport { createUiAuditorValidator } from './validator'\n\n/** @experimental */\nexport interface UiAuditorProfileOptions {\n /**\n * Stable name surfaced in trace events. Defaults to `ui-auditor`.\n */\n name?: string\n /**\n * Optional model identifier passed in `AgentProfile.model.default`.\n * The consumer's `SandboxClient` chooses how to interpret it.\n */\n model?: string\n /**\n * Task bound to the validator. Without it the validator uses the lens\n * embedded in the iteration output as its expectation — fine for one-off\n * use; less strict than passing the task explicitly.\n */\n task?: UiAuditTask\n}\n\n/** @experimental */\nexport function uiAuditorProfile(options: UiAuditorProfileOptions = {}): {\n profile: AgentProfile\n taskToPrompt: (task: UiAuditTask) => string\n output: OutputAdapter<UiAuditOutput>\n validator: Validator<UiAuditOutput>\n agentRunSpec: AgentRunSpec<UiAuditTask>\n} {\n const name = options.name ?? 'ui-auditor'\n\n // Lens is per-task; the profile's system prompt is filled in by the\n // taskToPrompt formatter at iteration time (prefixed to the user\n // message). Keeping the profile lens-agnostic lets one AgentRunSpec\n // serve every lens-iteration of the loop.\n const profile: AgentProfile = {\n name,\n description: 'Vision-driven UI auditor. One lens per iteration.',\n prompt: { systemPrompt: '' },\n model: options.model ? { default: options.model } : undefined,\n tools: { browser: true, vision: true },\n metadata: { role: 'ui-auditor' },\n }\n\n const output: OutputAdapter<UiAuditOutput> = { parse: parseAuditorEvents }\n const validator: Validator<UiAuditOutput> = options.task\n ? createUiAuditorValidator(options.task)\n : createUiAuditorValidator({ lens: 'other', captures: [] })\n\n // Prompt shape (consumed both by sandbox-SDK harnesses AND by the\n // in-process auditor client):\n // <<UI_AUDIT_TASK>>{json}<<UI_AUDIT_TASK_END>>\n // <system-prompt for the lens>\n // <human-readable iteration brief>\n // The envelope makes the iteration self-describing so concurrent fanout\n // does not race over per-client side state. Sandbox-SDK harnesses can\n // ignore the envelope; the in-process auditor client decodes it back\n // into a typed UiAuditTask via decodeAuditTaskEnvelope.\n const taskToPrompt = (task: UiAuditTask): string =>\n `${encodeAuditTaskEnvelope(task)}\\n${buildAuditorSystemPrompt(task.lens)}\\n\\n${formatAuditorPrompt(task)}`\n\n const agentRunSpec: AgentRunSpec<UiAuditTask> = {\n name,\n profile,\n taskToPrompt,\n }\n\n return { profile, taskToPrompt, output, validator, agentRunSpec }\n}\n"],"mappings":";;;;;;;;;;;;;;;AAoBA,IAAM,iBAAiB;AACvB,IAAM,eAAe;AAGd,SAAS,wBAAwB,MAA2B;AACjE,SAAO,GAAG,cAAc,GAAG,KAAK,UAAU,IAAI,CAAC,GAAG,YAAY;AAChE;AASO,SAAS,wBAAwB,QAAyC;AAC/E,QAAM,QAAQ,OAAO,QAAQ,cAAc;AAC3C,MAAI,UAAU,GAAI,QAAO;AACzB,QAAM,eAAe,QAAQ,eAAe;AAC5C,QAAM,MAAM,OAAO,QAAQ,cAAc,YAAY;AACrD,MAAI,QAAQ,GAAI,QAAO;AACvB,QAAM,UAAU,OAAO,MAAM,cAAc,GAAG;AAC9C,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,OAAO;AACjC,QAAI,CAAC,UAAU,OAAO,WAAW,SAAU,QAAO;AAClD,UAAM,IAAI;AACV,QAAI,OAAO,EAAE,SAAS,YAAY,CAAC,MAAM,QAAQ,EAAE,QAAQ,EAAG,QAAO;AACrE,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAGO,SAAS,oBAAoB,MAA2B;AAC7D,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,qCAAgC,KAAK,IAAI,EAAE;AACtD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,kBAAkB,KAAK,eAAe,KAAK,EAAE,SAAS,GAAG;AAChE,UAAM,KAAK,oBAAoB;AAC/B,UAAM,KAAK,KAAK,eAAe,KAAK,CAAC;AACrC,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,qBAAqB;AAChC,OAAK,SAAS,QAAQ,CAAC,KAAK,MAAM;AAChC,UAAM,KAAK,IAAI,WAAW,GAAG,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,MAAM,KAAK;AAC3E,UAAM,SAAS;AAAA,MACb,YAAY,EAAE;AAAA,MACd,IAAI,WAAW,kBAAkB;AAAA,MACjC,IAAI,kBAAkB,cAAc,IAAI,eAAe,OAAO;AAAA,MAC9D,IAAI,UAAU,aAAa,IAAI,OAAO,OAAO;AAAA,MAC7C,IAAI,WAAW,SAAY,UAAU,IAAI,MAAM,KAAK;AAAA,MACpD,IAAI,QAAQ,SAAS,IAAI,KAAK,KAAK;AAAA,IACrC,EACG,OAAO,CAAC,MAAmB,MAAM,IAAI,EACrC,KAAK,QAAK;AACb,UAAM,KAAK,GAAG,IAAI,CAAC,aAAa,IAAI,KAAK,UAAU,IAAI,GAAG,IAAI,SAAS,IAAI,MAAM,MAAM,EAAE,EAAE;AAAA,EAC7F,CAAC;AACD,QAAM,KAAK,EAAE;AACb,MAAI,KAAK,mBAAmB,KAAK,gBAAgB,SAAS,GAAG;AAC3D,UAAM,KAAK,uDAAuD;AAClE,UAAM,KAAK,KAAK,gBAAgB,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AACvF,UAAM,KAAK,EAAE;AAAA,EACf;AACA,QAAM,KAAK,kBAAkB;AAC7B,QAAM;AAAA,IACJ;AAAA,EACF;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;;;AClDA,SAAS,eAAkB,MAAc,MAAuB;AAC9D,SAAO,EAAE,MAAM,KAAK;AACtB;AAuDA,IAAM,mBAAmB,EAAE,OAAO,MAAM,QAAQ,IAAI;AACpD,IAAM,iBAAiB;AAEvB,eAAe,gBAAwC;AACrD,QAAM,MAAO,MAAM,OAAO,YAAY;AAGtC,MAAI,CAAC,KAAK,YAAY,OAAO,IAAI,SAAS,WAAW,YAAY;AAC/D,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AACA,SAAO,IAAI,SAAS,OAAO,EAAE,UAAU,KAAK,CAAC;AAC/C;AAEA,SAAS,WAAmB;AAC1B,QAAM,IAAI,oBAAI,KAAK;AACnB,QAAM,MAAM,CAAC,MAAc,OAAO,CAAC,EAAE,SAAS,GAAG,GAAG;AAIpD,SACE,GAAG,EAAE,eAAe,CAAC,GAAG,IAAI,EAAE,YAAY,IAAI,CAAC,CAAC,GAAG,IAAI,EAAE,WAAW,CAAC,CAAC,IACnE,IAAI,EAAE,YAAY,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,GAAG,IAAI,EAAE,cAAc,CAAC,CAAC,IACrE,OAAO,EAAE,mBAAmB,CAAC,EAAE,SAAS,GAAG,GAAG,CAAC;AAEvD;AAEA,SAAS,WAAW,KAA+D;AACjF,SAAO,IAAI,YAAY;AACzB;AAEA,SAAS,gBAAgB,KAAoC;AAC3D,QAAM,KAAK,WAAW,GAAG;AACzB,QAAM,YAAY,IAAI,QAAQ,KAAK,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK;AACnE,SAAO,GAAG,QAAQ,IAAI,OAAO,OAAO,CAAC,KAAK,GAAG,KAAK,IAAI,GAAG,MAAM,GAAG,SAAS,KAAK,SAAS,CAAC;AAC5F;AAEA,SAAS,cAAc,KAAmB;AACxC,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,iDAAiD,KAAK,UAAU,GAAG,CAAC,GAAG;AAAA,EACzF;AAMA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI;AAAA,MACR,uDAAuD,OAAO,QAAQ,OAAO,KAAK,UAAU,GAAG,CAAC;AAAA,IAClG;AAAA,EACF;AACF;AAEA,eAAe,WACb,MACA,KACA,YACA,QACA,WACe;AACf,SAAO,eAAe;AACtB,gBAAc,IAAI,GAAG;AAGrB,QAAM,KAAK,gBAAgB,WAAW,GAAG,CAAC;AAC1C,QAAM,YAAY,cAAc,QAAQ,qBAAqB;AAC7D,QAAM,KAAK,KAAK,IAAI,KAAK,EAAE,WAAW,SAAS,eAAe,CAAC;AAC/D,MAAI,IAAI,SAAS;AACf,UAAM,KAAK,gBAAgB,IAAI,SAAS,EAAE,SAAS,KAAO,CAAC;AAAA,EAC7D;AACA,QAAM,QAAQ,IAAI,UAAU;AAC5B,MAAI,QAAQ,EAAG,OAAM,KAAK,eAAe,KAAK;AAC9C,SAAO,eAAe;AACtB,MAAI,IAAI,iBAAiB;AACvB,UAAM,KAAK,QAAQ,IAAI,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAAA,EACjF,OAAO;AACL,UAAM,KAAK,WAAW,EAAE,MAAM,YAAY,UAAU,IAAI,aAAa,KAAK,CAAC;AAAA,EAC7E;AACF;AAIA,SAAS,gBAAwB;AAC/B,QAAM,OAAO,MAAM,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,EAAE;AACzD,SAAO,YAAY,KAAK,CAAC,GAAG,KAAK,CAAC;AACpC;AAGO,SAAS,6BACd,SAeA;AACA,QAAM,SAAS,QAAQ,iBAAiB;AACxC,QAAM,YAAY,QAAQ,aAAa;AACvC,MAAI;AACJ,MAAI,SAAS;AAEb,iBAAe,aAAqC;AAClD,QAAI,QAAQ;AACV,YAAM,IAAI,MAAM,4EAA4E;AAAA,IAC9F;AACA,QAAI,CAAC,eAAgB,kBAAiB,OAAO;AAC7C,WAAO;AAAA,EACT;AAEA,kBAAgB,aACd,YACA,QAC6B;AAC7B,UAAM,OAAO,wBAAwB,UAAU;AAC/C,QAAI,CAAC,MAAM;AACT,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAK,SAAS,WAAW,GAAG;AAC9B,YAAM,IAAI,MAAM,uDAAuD;AAAA,IACzE;AAEA,UAAM,eAAe,cAAc,EAAE,MAAM,KAAK,KAAK,CAAC;AAEtD,UAAM,UAAU,MAAM,WAAW;AACjC,UAAM,UAAU,MAAM,QAAQ,WAAW,EAAE,UAAU,iBAAiB,CAAC;AAMvE,QAAI;AACJ,QAAI;AACJ,QAAI;AACF,YAAM,OAAO,MAAM,QAAQ,QAAQ;AACnC,YAAM,WAA6B,CAAC;AACpC,YAAM,KAAK,MAAM,OAAO,aAAkB;AAC1C,YAAM,OAAO,MAAM,OAAO,MAAW;AACrC,YAAM,WAAW,KAAK,KAAK,QAAQ,cAAc,aAAa;AAC9D,YAAM,GAAG,MAAM,UAAU,EAAE,WAAW,KAAK,CAAC;AAE5C,iBAAW,OAAO,KAAK,UAAU;AAC/B,eAAO,eAAe;AACtB,cAAM,WAAW,gBAAgB,GAAG;AACpC,cAAM,UAAU,KAAK,KAAK,UAAU,QAAQ;AAC5C,cAAM,UAAU,eAAe,QAAQ;AACvC,cAAM,WAAW,MAAM,KAAK,SAAS,QAAQ,SAAS;AACtD,cAAM,KAAK,WAAW,GAAG;AACzB,cAAM,MAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,UAAU,GAAG,GAAG,KAAK,IAAI,GAAG,MAAM;AAAA,UAClC,UAAU,IAAI,aAAa;AAAA,UAC3B,OAAO,IAAI;AAAA,UACX,KAAK,IAAI;AAAA,UACT,aAAY,oBAAI,KAAK,GAAE,YAAY;AAAA,QACrC;AACA,YAAI,IAAI,gBAAiB,KAAI,kBAAkB,IAAI;AACnD,YAAI,IAAI,MAAO,KAAI,QAAQ,IAAI;AAC/B,iBAAS,KAAK,GAAG;AACjB,cAAM,eAAe,iBAAiB,GAAG;AAAA,MAC3C;AAEA,YAAM,WAAW,MAAM,QAAQ,MAAM;AAAA,QACnC,MAAM,KAAK;AAAA,QACX;AAAA,QACA,gBAAgB,KAAK;AAAA,QACrB,iBAAiB,KAAK;AAAA,QACtB;AAAA,QACA;AAAA,MACF,CAAC;AAED,iBAAW,WAAW,SAAS,UAAU;AACvC,cAAM,eAAe,iBAAiB,OAAO;AAAA,MAC/C;AACA,UAAI,SAAS,SAAS,SAAS,MAAM,KAAK,EAAE,SAAS,GAAG;AACtD,cAAM,eAAe,eAAe,EAAE,OAAO,SAAS,MAAM,CAAC;AAAA,MAC/D;AAEA,YAAM,QAAQ,SAAS,cAAc,EAAE,OAAO,GAAG,QAAQ,EAAE;AAC3D,YAAM,eAAe,QAAQ;AAAA,QAC3B,YAAY;AAAA,UACV,aAAa,MAAM;AAAA,UACnB,cAAc,MAAM;AAAA,QACtB;AAAA,QACA,cAAc,SAAS,WAAW;AAAA,MACpC,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,qBAAe;AAAA,IACjB,UAAE;AACA,UAAI;AACF,cAAM,QAAQ,MAAM;AAAA,MACtB,SAAS,KAAK;AACZ,qBAAa;AAAA,MACf;AAAA,IACF;AAIA,QAAI,iBAAiB,UAAa,eAAe,QAAW;AAC1D,YAAM,IAAI;AAAA,QACR,CAAC,cAAc,UAAU;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AACA,QAAI,iBAAiB,OAAW,OAAM;AACtC,QAAI,eAAe,OAAW,OAAM;AAAA,EACtC;AAEA,WAAS,uBAAyC;AAChD,UAAM,KAAK,cAAc;AACzB,UAAM,WAAW;AAAA,MACf;AAAA,MACA,aAAa,SAAiB,MAA8D;AAC1F,cAAM,SAAS,MAAM,UAAU,IAAI,gBAAgB,EAAE;AACrD,eAAO,aAAa,SAAS,MAAM;AAAA,MACrC;AAAA,MACA,MAAM,SAAwB;AAAA,MAI9B;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AAAA,IACL,MAAM,OAAO,UAAiC;AAC5C,aAAO,qBAAqB;AAAA,IAC9B;AAAA,IACA,kBAAkB,KAAK;AACrB,YAAM,KAAM,IAAmC;AAC/C,aAAO,EAAE,MAAM,WAAW,WAAW,OAAO,OAAO,WAAW,KAAK,OAAU;AAAA,IAC/E;AAAA,IACA,MAAM,QAAQ;AACZ,eAAS;AACT,YAAM,UAAU;AAChB,uBAAiB;AACjB,UAAI,SAAS;AACX,cAAM,UAAU,MAAM;AACtB,cAAM,QAAQ,MAAM;AAAA,MACtB;AAAA,IACF;AAAA,EACF;AACF;;;ACjVO,IAAM,uBAAuB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAmBlC,KAAK;AAGA,IAAM,cAAsC;AAAA,EACjD,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMX,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,eAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMf,KAAK;AAAA,EACL,YAAY;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMZ,KAAK;AAAA,EACL,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMR,KAAK;AAAA,EACL,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMT,KAAK;AAAA,EACL,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMb,KAAK;AAAA,EACL,yBAAyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMzB,KAAK;AAAA,EACL,OAAO;AAAA;AAAA;AAAA,EAGP,KAAK;AACP;AAOO,SAAS,yBAAyB,MAAsB;AAC7D,QAAM,QAAQ,YAAY,IAAI;AAC9B,SAAO,GAAG,oBAAoB;AAAA;AAAA,EAAO,KAAK;AAC5C;;;AC1GA,IAAM,oBAAoB,IAAI,IAAY,SAAS;AAEnD,SAAS,SAAS,GAAyB;AACzC,SAAO,OAAO,MAAM,YAAY,kBAAkB,IAAI,CAAW;AACnE;AAGO,SAAS,mBAAmB,QAAuC;AACxE,QAAM,WAAwB,CAAC;AAC/B,QAAM,WAA6B,CAAC;AACpC,MAAI;AACJ,MAAI;AAEJ,aAAW,OAAO,QAAQ;AACxB,QAAI,CAAC,OAAO,OAAO,QAAQ,SAAU;AACrC,UAAM,OAAO,OAAO,IAAI,QAAQ,EAAE;AAClC,UAAM,OACJ,IAAI,QAAQ,OAAO,IAAI,SAAS,WAAY,IAAI,OAAmC;AACrF,QAAI,CAAC,KAAM;AAEX,YAAQ,MAAM;AAAA,MACZ,KAAK,cAAc;AACjB,cAAM,IAAI,KAAK;AACf,YAAI,SAAS,CAAC,EAAG,QAAO;AACxB;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,MAAM;AACZ,YACE,OAAO,IAAI,SAAS,YACpB,OAAO,IAAI,aAAa,YACxB,OAAO,IAAI,aAAa,aACxB,OAAO,IAAI,UAAU,YACrB,OAAO,IAAI,QAAQ,YACnB,OAAO,IAAI,eAAe,UAC1B;AACA,gBAAMA,OAAsB;AAAA,YAC1B,MAAM,IAAI;AAAA,YACV,UAAU,IAAI;AAAA,YACd,UAAU,IAAI;AAAA,YACd,OAAO,IAAI;AAAA,YACX,KAAK,IAAI;AAAA,YACT,YAAY,IAAI;AAAA,UAClB;AACA,cAAI,IAAI,gBAAiB,CAAAA,KAAI,kBAAkB,IAAI;AACnD,cAAI,IAAI,MAAO,CAAAA,KAAI,QAAQ,IAAI;AAC/B,mBAAS,KAAKA,IAAG;AAAA,QACnB;AACA;AAAA,MACF;AAAA,MACA,KAAK,iBAAiB;AACpB,cAAM,IAAI;AAIV,YACE,OAAO,EAAE,UAAU,YACnB,EAAE,MAAM,KAAK,EAAE,SAAS,KACxB,SAAS,EAAE,IAAI,KACf,OAAO,EAAE,aAAa,YACtB,CAAC,OAAO,OAAO,QAAQ,UAAU,EAAE,SAAS,EAAE,QAAQ,KACtD,OAAO,EAAE,UAAU,YACnB,OAAO,EAAE,gBAAgB,YACzB,OAAO,EAAE,WAAW,YACpB,OAAO,EAAE,iBAAiB,YAC1B,MAAM,QAAQ,EAAE,WAAW,GAC3B;AACA,mBAAS,KAAK,CAAc;AAAA,QAC9B;AACA;AAAA,MACF;AAAA,MACA,KAAK,eAAe;AAClB,cAAM,IAAI,KAAK;AACf,YAAI,OAAO,MAAM,YAAY,EAAE,KAAK,EAAE,SAAS,EAAG,SAAQ;AAC1D;AAAA,MACF;AAAA,MACA;AAGE;AAAA,IACJ;AAAA,EACF;AAEA,QAAM,MAAqB,EAAE,MAAM,QAAQ,SAAS,UAAU,SAAS;AACvE,MAAI,MAAO,KAAI,QAAQ;AACvB,SAAO;AACT;;;AC9EA,IAAM,yBAAyB;AAAA,EAC7B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,SAAS,eAAe,OAAwB;AAC9C,QAAM,IAAI,MAAM,KAAK;AACrB,MAAI,EAAE,SAAS,GAAI,QAAO;AAC1B,SAAO,uBAAuB,KAAK,CAAC,OAAO,GAAG,KAAK,CAAC,CAAC;AACvD;AAGO,SAAS,yBAAyB,MAA6C;AACpF,SAAO;AAAA,IACL,MAAM,SAAS,QAAQ;AACrB,YAAM,WAAW,OAAO;AACxB,YAAM,WAAW,OAAO;AACxB,YAAM,eAAe,IAAI,IAAI,SAAS,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,EAAE,SAAS,KAAK,IAAI;AAC3D,UAAI,QAAQ,SAAS,GAAG;AACtB,cAAMC,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,QAAQ,MAAM,gDAAgD,KAAK,IAAI,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,IAAI,CAAC;AAAA,UAC/H,QAAQ,EAAE,SAAS,EAAE;AAAA,QACvB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,kBAAkB,SAAS;AAAA,QAC/B,CAAC,MAAM,CAAC,MAAM,QAAQ,EAAE,WAAW,KAAK,EAAE,YAAY,WAAW;AAAA,MACnE;AACA,UAAI,gBAAgB,SAAS,GAAG;AAC9B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,gBAAgB,MAAM;AAAA,UAChC,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,iBAAiB,SAAS;AAAA,QAAO,CAAC,MACtC,EAAE,YAAY,KAAK,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACrD;AACA,UAAI,eAAe,SAAS,GAAG;AAC7B,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO,GAAG,eAAe,MAAM;AAAA,UAC/B,QAAQ,EAAE,UAAU,EAAE;AAAA,QACxB;AACA,eAAOA;AAAA,MACT;AAEA,UAAI,SAAS,WAAW,GAAG;AACzB,cAAMA,WAA0B;AAAA,UAC9B,OAAO;AAAA,UACP,OAAO;AAAA,UACP,OAAO;AAAA,UACP,QAAQ,EAAE,aAAa,GAAG,UAAU,GAAG,QAAQ,EAAE;AAAA,QACnD;AACA,eAAOA;AAAA,MACT;AAEA,YAAM,eAAe,SAAS,OAAO,CAAC,MAAM,OAAO,EAAE,aAAa,QAAQ,EAAE;AAC5E,YAAM,cAAc,eAAe,SAAS;AAC5C,YAAM,UAAU,SAAS,OAAO,CAAC,MAAM,eAAe,EAAE,KAAK,CAAC,EAAE;AAChE,YAAM,SAAS,IAAI,UAAU,SAAS;AAOtC,YAAM,mBAAmB,SAAS;AAAA,QAChC,CAAC,MACC,MAAM,QAAQ,EAAE,WAAW,KAC3B,EAAE,YAAY,SAAS,KACvB,EAAE,YAAY,MAAM,CAAC,MAAM,aAAa,IAAI,EAAE,IAAI,CAAC;AAAA,MACvD,EAAE;AACF,YAAM,WAAW,mBAAmB,SAAS;AAC7C,YAAM,QAAQ,QAAQ,MAAM,cAAc,MAAM,WAAW,MAAM,QAAQ,QAAQ,CAAC,CAAC;AAEnF,YAAM,UAA0B;AAAA,QAC9B,OAAO;AAAA,QACP;AAAA,QACA,OAAO,GAAG,SAAS,MAAM,kCAA6B,YAAY,QAAQ,CAAC,CAAC,aAAa,SAAS,QAAQ,CAAC,CAAC,WAAW,OAAO,QAAQ,CAAC,CAAC;AAAA,QACxI,QAAQ,EAAE,aAAa,UAAU,OAAO;AAAA,MAC1C;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACF;;;AC1FO,SAAS,iBAAiB,UAAmC,CAAC,GAMnE;AACA,QAAM,OAAO,QAAQ,QAAQ;AAM7B,QAAM,UAAwB;AAAA,IAC5B;AAAA,IACA,aAAa;AAAA,IACb,QAAQ,EAAE,cAAc,GAAG;AAAA,IAC3B,OAAO,QAAQ,QAAQ,EAAE,SAAS,QAAQ,MAAM,IAAI;AAAA,IACpD,OAAO,EAAE,SAAS,MAAM,QAAQ,KAAK;AAAA,IACrC,UAAU,EAAE,MAAM,aAAa;AAAA,EACjC;AAEA,QAAM,SAAuC,EAAE,OAAO,mBAAmB;AACzE,QAAM,YAAsC,QAAQ,OAChD,yBAAyB,QAAQ,IAAI,IACrC,yBAAyB,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE,CAAC;AAW5D,QAAM,eAAe,CAAC,SACpB,GAAG,wBAAwB,IAAI,CAAC;AAAA,EAAK,yBAAyB,KAAK,IAAI,CAAC;AAAA;AAAA,EAAO,oBAAoB,IAAI,CAAC;AAE1G,QAAM,eAA0C;AAAA,IAC9C;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,cAAc,QAAQ,WAAW,aAAa;AAClE;","names":["out","verdict"]}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
2
|
-
import { D as Driver, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, I as Iteration, L as LoopWinner, a as LoopLineageOptions,
|
|
2
|
+
import { D as Driver, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, I as Iteration, L as LoopWinner, a as LoopLineageOptions, S as SandboxClient, b as LoopResult } from './types-DnYoHvvZ.js';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* @experimental
|
|
@@ -99,7 +99,7 @@ declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, O
|
|
|
99
99
|
* it) and merges `sandboxOverrides`. Shared by the loop kernel and the
|
|
100
100
|
* `AgentRuntime.act` sandbox bridge so both boot the sandbox identically.
|
|
101
101
|
*/
|
|
102
|
-
declare function createSandboxForSpec<Task>(client:
|
|
102
|
+
declare function createSandboxForSpec<Task>(client: SandboxClient, spec: AgentRunSpec<Task>, signal: AbortSignal): Promise<SandboxInstance>;
|
|
103
103
|
/**
|
|
104
104
|
* The kernel's winner argmax — best-valid-score, ties broken by earliest index,
|
|
105
105
|
* falling back to the best-scoring non-errored output when none is valid. Exported
|
package/dist/runtime.d.ts
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import { AgentProfile as AgentProfile$1, BackendType, CreateSandboxOptions, SandboxInstance, SandboxEvent } from '@tangle-network/sandbox';
|
|
2
2
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
-
import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec,
|
|
4
|
-
export {
|
|
5
|
-
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as
|
|
3
|
+
import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, E as ExecutorFactory, d as AgentSpec, e as ExecutorRegistry, B as Budget, A as Agent, f as RootHandle, g as SupervisedResult, h as Spend, S as Scope, U as UsageEvent, i as Supervisor } from './types-BfoeiQRZ.js';
|
|
4
|
+
export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-BfoeiQRZ.js';
|
|
5
|
+
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-C-mtBo7h.js';
|
|
6
|
+
import { S as SandboxClient, b as LoopResult, c as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DnYoHvvZ.js';
|
|
7
|
+
export { D as Driver, C as LoopDecisionPayload, F as LoopEndedPayload, G as LoopIterationDispatchPayload, H as LoopIterationEndedPayload, J as LoopIterationStartedPayload, a as LoopLineageOptions, M as LoopPlanDescription, N as LoopPlanPayload, f as LoopSandboxPlacement, P as LoopStartedPayload, Q as LoopTeardownFailedPayload, e as LoopTraceEmitter, T as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, U as ValidationCtx, V as Validator } from './types-DnYoHvvZ.js';
|
|
6
8
|
import { AgentProfile, AnalystFinding, DefaultVerdict } from '@tangle-network/agent-eval';
|
|
7
9
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
8
10
|
import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
9
|
-
import { R as RunLoopOptions } from './run-loop
|
|
10
|
-
export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop
|
|
11
|
-
import { b as LoopSandboxClient, c as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-DdzkffAm.js';
|
|
12
|
-
export { D as Driver, h as LoopDecisionPayload, i as LoopEndedPayload, j as LoopIterationDispatchPayload, k as LoopIterationEndedPayload, l as LoopIterationStartedPayload, a as LoopLineageOptions, m as LoopPlanDescription, n as LoopPlanPayload, g as LoopSandboxPlacement, o as LoopStartedPayload, p as LoopTeardownFailedPayload, f as LoopTraceEmitter, q as LoopTraceEvent, L as LoopWinner, O as OutputAdapter, r as ValidationCtx, V as Validator } from './types-DdzkffAm.js';
|
|
11
|
+
import { R as RunLoopOptions } from './run-loop-CU2Y00Si.js';
|
|
12
|
+
export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-CU2Y00Si.js';
|
|
13
13
|
import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
|
|
14
14
|
|
|
15
15
|
/**
|
|
@@ -113,6 +113,13 @@ declare function replaySpawnTree(journal: SpawnJournal, blobs: ResultBlobStore,
|
|
|
113
113
|
*/
|
|
114
114
|
declare function materializeTreeView(events: SpawnEvent[]): TreeView;
|
|
115
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Adapt an `ExecutorFactory` into a `SandboxClient` for `runLoop`. The factory is
|
|
118
|
+
* instantiated fresh per `streamPrompt` (mirrors the per-spawn executor lifecycle):
|
|
119
|
+
* run once on the prompt, emit the terminal result event, tear down.
|
|
120
|
+
*/
|
|
121
|
+
declare function inlineSandboxClient(factory: ExecutorFactory<unknown>): SandboxClient;
|
|
122
|
+
|
|
116
123
|
/**
|
|
117
124
|
* `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
|
|
118
125
|
*
|
|
@@ -146,7 +153,7 @@ declare function materializeTreeView(events: SpawnEvent[]): TreeView;
|
|
|
146
153
|
type LoopOptionsForDispatch<Task, Output, Decision> = Omit<RunLoopOptions<Task, Output, Decision>, 'ctx'>;
|
|
147
154
|
interface LoopDispatchOptions<Task, Output, Decision, TScenario extends Scenario, TArtifact> {
|
|
148
155
|
/** Sandbox client used for every cell's `runLoop`. Supplied once. */
|
|
149
|
-
sandboxClient:
|
|
156
|
+
sandboxClient: SandboxClient;
|
|
150
157
|
/** Build the per-cell runLoop options from the scenario (+ profile, when
|
|
151
158
|
* used with `runProfileMatrix`). */
|
|
152
159
|
toLoopOptions: (scenario: TScenario, profile: AgentProfile) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
@@ -317,7 +324,7 @@ interface ShapeContext<D = unknown> {
|
|
|
317
324
|
* Wrap an `AgentSpec` into a leaf `Agent` carrying it as `executorSpec`, so the shape can
|
|
318
325
|
* `scope.spawn(spawnChild(spec), task, opts)`. `name` labels the child for traces. The
|
|
319
326
|
* returned agent's `act` is never invoked by the keystone (it is spawned, not run) — the
|
|
320
|
-
* spec drives the resolved `
|
|
327
|
+
* spec drives the resolved `Executor`; `act` exists only to satisfy the `Agent` shape.
|
|
321
328
|
*/
|
|
322
329
|
spawnChild(name: string, spec: AgentSpec): Agent<unknown, Outcome<D>>;
|
|
323
330
|
/** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
|
|
@@ -1299,7 +1306,7 @@ interface AcquireOptions {
|
|
|
1299
1306
|
sleep?: (ms: number) => Promise<void>;
|
|
1300
1307
|
}
|
|
1301
1308
|
/** @experimental */
|
|
1302
|
-
declare function acquireSandbox(client:
|
|
1309
|
+
declare function acquireSandbox(client: SandboxClient, options: CreateSandboxOptions, acquire?: AcquireOptions): Promise<SandboxInstance>;
|
|
1303
1310
|
|
|
1304
1311
|
/**
|
|
1305
1312
|
* @experimental
|
|
@@ -1340,9 +1347,9 @@ interface SandboxCapabilities {
|
|
|
1340
1347
|
*
|
|
1341
1348
|
* @experimental
|
|
1342
1349
|
*/
|
|
1343
|
-
declare function probeSandboxCapabilities(client:
|
|
1350
|
+
declare function probeSandboxCapabilities(client: SandboxClient): Promise<SandboxCapabilities>;
|
|
1344
1351
|
/**
|
|
1345
|
-
* Narrowed view of the optional CRIU probe. The loop-side `
|
|
1352
|
+
* Narrowed view of the optional CRIU probe. The loop-side `SandboxClient`
|
|
1346
1353
|
* does not require `criuStatus`; this widens it optionally so the probe can be
|
|
1347
1354
|
* read without importing sandbox-backend specifics. @experimental
|
|
1348
1355
|
*/
|
|
@@ -1514,12 +1521,13 @@ interface SandboxLineage {
|
|
|
1514
1521
|
*
|
|
1515
1522
|
* @experimental
|
|
1516
1523
|
*/
|
|
1517
|
-
declare function createSandboxLineage(client:
|
|
1524
|
+
declare function createSandboxLineage(client: SandboxClient, capabilities: SandboxCapabilities, options?: {
|
|
1518
1525
|
maxConcurrency?: number;
|
|
1526
|
+
streaming?: 'sse' | 'poll';
|
|
1519
1527
|
}): SandboxLineage;
|
|
1520
1528
|
/**
|
|
1521
1529
|
* Loop-side widening of the box's optional checkpoint method. The
|
|
1522
|
-
* `
|
|
1530
|
+
* `SandboxClient`/`SandboxInstance` surface the kernel relies on does not
|
|
1523
1531
|
* require checkpointing; this reads it optionally so the lineage can probe-gate
|
|
1524
1532
|
* without importing sandbox-backend specifics. @experimental
|
|
1525
1533
|
*/
|
|
@@ -1550,6 +1558,104 @@ interface SessionCapableBox {
|
|
|
1550
1558
|
};
|
|
1551
1559
|
}
|
|
1552
1560
|
|
|
1561
|
+
/**
|
|
1562
|
+
* `openSandboxRun` — the ONE harness-agnostic seam for running an agent in a
|
|
1563
|
+
* sandbox over a persistent artifact: run it, stream it, RESUME the same session
|
|
1564
|
+
* across turns. Domain-agnostic: a coding agent, a research agent, a tax/legal
|
|
1565
|
+
* agent — all flow through this; the domain lives only in the `Deliverable<Out>`
|
|
1566
|
+
* the caller supplies, never in a per-domain copy of this function.
|
|
1567
|
+
*
|
|
1568
|
+
* It is a thin facade (NOT a new layer) over code that already exists and is
|
|
1569
|
+
* already hardened:
|
|
1570
|
+
* - `acquireSandbox` — cold-start / 502-503-504 / gateway-timeout recovery,
|
|
1571
|
+
* - `buildBackendOptions` — the harness IS `backend.type` (opencode / codex /
|
|
1572
|
+
* claude-code / kimi-code / hermes / pi); the only "which agent" knob,
|
|
1573
|
+
* - `createSandboxLineage` — `start` mints a session; `resume` continues the
|
|
1574
|
+
* SAME server-side session with a fail-loud `assertSessionLive`.
|
|
1575
|
+
*
|
|
1576
|
+
* The one genuinely-new piece is {@link Deliverable}: it widens the pure
|
|
1577
|
+
* `OutputAdapter.parse(events)` to ALSO admit a post-turn read off the box FS —
|
|
1578
|
+
* the structural gap that made the bench gates hand-roll `box.fs.read`, because a
|
|
1579
|
+
* large produced file (a git diff, a generated document) truncates in the chat
|
|
1580
|
+
* stream and a pure events-parser cannot reach the workspace. Per the SDK, a
|
|
1581
|
+
* RELATIVE `deliverable.path` resolves from the workspace root and an ABSOLUTE one
|
|
1582
|
+
* (e.g. `/tmp/solution.patch`) reads the container filesystem directly — both are
|
|
1583
|
+
* valid; pick the one the agent actually wrote to. Avoid `..` traversal segments.
|
|
1584
|
+
*
|
|
1585
|
+
* What this deliberately does NOT do (so it stays a facade, not slop): no custom
|
|
1586
|
+
* reconnect/replay (the SDK + platform own per-session buffering + `Last-Event-ID`);
|
|
1587
|
+
* no fork verb (platform CRIU is probe-gated and currently absent — fork lives in
|
|
1588
|
+
* `SandboxLineage.fork` behind the capability probe, surfaced only if it returns).
|
|
1589
|
+
* It is also distinct from `runLoop`: `runLoop` is the multi-round, driver-driven
|
|
1590
|
+
* kernel (fresh box per round, events deliverable); this is a SINGLE rollout +
|
|
1591
|
+
* artifact-or-events deliverable + resume over ONE persistent box.
|
|
1592
|
+
*/
|
|
1593
|
+
|
|
1594
|
+
/**
|
|
1595
|
+
* @experimental
|
|
1596
|
+
* How a typed deliverable `Out` is materialized from a finished turn.
|
|
1597
|
+
* - `events` — pure parse over the event array (identical to `OutputAdapter`).
|
|
1598
|
+
* - `artifact` — read a file off the box AFTER the turn drains, then map it (+ the
|
|
1599
|
+
* events). For diffs/codebases/documents that don't fit the chat
|
|
1600
|
+
* stream. `path` relative ⇒ workspace root; absolute ⇒ container FS.
|
|
1601
|
+
*/
|
|
1602
|
+
type Deliverable<Out> = {
|
|
1603
|
+
kind: 'events';
|
|
1604
|
+
fromEvents: (events: SandboxEvent[]) => Out;
|
|
1605
|
+
} | {
|
|
1606
|
+
kind: 'artifact';
|
|
1607
|
+
path: string;
|
|
1608
|
+
fromArtifact: (raw: string, events: SandboxEvent[]) => Out;
|
|
1609
|
+
};
|
|
1610
|
+
/**
|
|
1611
|
+
* @experimental
|
|
1612
|
+
* One finished turn over the artifact. A failed FS read is surfaced in `readError`
|
|
1613
|
+
* (never masked as an empty deliverable) so a caller distinguishes "agent produced
|
|
1614
|
+
* nothing" from a transport/FS fault.
|
|
1615
|
+
*/
|
|
1616
|
+
interface TurnResult<Out> {
|
|
1617
|
+
out: Out;
|
|
1618
|
+
events: SandboxEvent[];
|
|
1619
|
+
readError?: string;
|
|
1620
|
+
}
|
|
1621
|
+
/** @experimental A live run over ONE persistent artifact (box + session). Close it
|
|
1622
|
+
* when done — `close()` tears the box down. */
|
|
1623
|
+
interface SandboxRun<Out> {
|
|
1624
|
+
readonly box: SandboxInstance;
|
|
1625
|
+
readonly sessionId: string;
|
|
1626
|
+
/** First turn over the fresh box (mints the session). Throws if already started. */
|
|
1627
|
+
start(prompt: string): Promise<TurnResult<Out>>;
|
|
1628
|
+
/** Continue THE SAME session over THE SAME artifact — a resumed turn/rollout. */
|
|
1629
|
+
resume(prompt: string): Promise<TurnResult<Out>>;
|
|
1630
|
+
close(): Promise<void>;
|
|
1631
|
+
}
|
|
1632
|
+
/** @experimental */
|
|
1633
|
+
interface OpenSandboxRunOptions {
|
|
1634
|
+
/** Profile + sandbox env/overrides. `sandboxOverrides.backend.type` is the harness. */
|
|
1635
|
+
agentRun: AgentRunSpec<string>;
|
|
1636
|
+
signal: AbortSignal;
|
|
1637
|
+
/** Optional execution-scoped observers. Hook failures never fail the run. */
|
|
1638
|
+
hooks?: RuntimeHooks;
|
|
1639
|
+
/** Stable run id for trace joins. Defaults to a short runtime-minted id. */
|
|
1640
|
+
runId?: string;
|
|
1641
|
+
/** Optional benchmark/scenario id carried into emitted hook events. */
|
|
1642
|
+
scenarioId?: string;
|
|
1643
|
+
/** Test seam for deterministic hook timestamps. Defaults to `Date.now`. */
|
|
1644
|
+
now?: () => number;
|
|
1645
|
+
/** Bounds box-creation bursts inside lineage fanout. Default from lineage. */
|
|
1646
|
+
maxConcurrency?: number;
|
|
1647
|
+
/** Base backoff (ms) for retrying a transient artifact `fs.read` failure; the i-th
|
|
1648
|
+
* retry waits `readRetryDelayMs * i`. Default 1000. Set 0 to disable the wait (tests). */
|
|
1649
|
+
readRetryDelayMs?: number;
|
|
1650
|
+
}
|
|
1651
|
+
/**
|
|
1652
|
+
* @experimental
|
|
1653
|
+
* Open a sandbox run. Harness-agnostic: the harness lives in
|
|
1654
|
+
* `options.agentRun.sandboxOverrides.backend.type`, so opencode/codex/claude-code/
|
|
1655
|
+
* kimi-code all flow through this one entrypoint with identical env/auth wiring.
|
|
1656
|
+
*/
|
|
1657
|
+
declare function openSandboxRun<Out>(client: SandboxClient, options: OpenSandboxRunOptions, deliverable: Deliverable<Out>): Promise<SandboxRun<Out>>;
|
|
1658
|
+
|
|
1553
1659
|
/**
|
|
1554
1660
|
* @experimental
|
|
1555
1661
|
*
|
|
@@ -1635,7 +1741,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
|
|
|
1635
1741
|
/**
|
|
1636
1742
|
* @experimental
|
|
1637
1743
|
*
|
|
1638
|
-
* The leaf runtime — the built-in `
|
|
1744
|
+
* The leaf runtime — the built-in `Executor` IMPLEMENTATIONS behind the ONE
|
|
1639
1745
|
* open interface frozen in `./types`, plus the open resolver/registry that maps
|
|
1640
1746
|
* an `AgentSpec` to one of them OR accepts a bring-your-own executor verbatim.
|
|
1641
1747
|
*
|
|
@@ -1649,7 +1755,7 @@ declare function createBudgetPool(root: Budget, now?: () => number): BudgetPool;
|
|
|
1649
1755
|
* excluded from the equal-k arms by construction (streaming).
|
|
1650
1756
|
* Every metered runtime reports through the SAME normalized `UsageEvent` channel
|
|
1651
1757
|
* so the conserved budget pool meters them identically. A user's own agent is
|
|
1652
|
-
* first-class the moment it implements `
|
|
1758
|
+
* first-class the moment it implements `Executor` — register it by name or
|
|
1653
1759
|
* pass it as `AgentSpec.executor`.
|
|
1654
1760
|
*
|
|
1655
1761
|
* Layering: `estimateCost`/`isModelPriced` are substrate primitives from
|
|
@@ -1675,7 +1781,7 @@ interface RouterSeam {
|
|
|
1675
1781
|
* checkpoint/fork.
|
|
1676
1782
|
*/
|
|
1677
1783
|
interface SandboxSeam {
|
|
1678
|
-
sandboxClient:
|
|
1784
|
+
sandboxClient: SandboxClient;
|
|
1679
1785
|
/** Forwarded into the composed `runLoop`'s `ctx` (trace emitter, run handle, etc.). */
|
|
1680
1786
|
loopCtx?: Partial<Omit<ExecCtx, 'sandboxClient' | 'signal'>>;
|
|
1681
1787
|
/** PR #150 `RunLoopOptions.lineage` passthrough — opaque; forwarded, not parsed. */
|
|
@@ -1694,40 +1800,38 @@ interface CliSeam {
|
|
|
1694
1800
|
cwd?: string;
|
|
1695
1801
|
}
|
|
1696
1802
|
/**
|
|
1697
|
-
* A
|
|
1698
|
-
*
|
|
1699
|
-
*
|
|
1700
|
-
*
|
|
1701
|
-
*
|
|
1702
|
-
*
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
* COMPOSES `runLoop` as a single-task leaf: one box, a refine driver bounded to
|
|
1712
|
-
* the seam's `maxIterations` (default 1), the spec's profile as the agent run.
|
|
1713
|
-
* Surfaces the loop's aggregated `tokenUsage` + `costUsd` as `UsageEvent`s after
|
|
1714
|
-
* it drains, and yields one `iteration` event per loop iteration. Forwards the
|
|
1715
|
-
* optional `lineage` passthrough WITHOUT importing sandbox-lineage / reinventing
|
|
1716
|
-
* checkpoint/fork.
|
|
1717
|
-
*
|
|
1718
|
-
* Streaming shape: the loop runs to completion inside the first `next()`, then
|
|
1719
|
-
* the recorded usage events are yielded; the terminal artifact is read from
|
|
1720
|
-
* `resultArtifact()` after the stream drains.
|
|
1721
|
-
*/
|
|
1722
|
-
declare const sandboxExecutor: LeafExecutorFactory<unknown>;
|
|
1803
|
+
* cli-bridge seam. A local OpenAI-compatible bridge that fronts harness CLIs
|
|
1804
|
+
* (claude-code / opencode / kimi / pi) behind one HTTP surface; `model` doubles
|
|
1805
|
+
* as the harness selector (e.g. `claude-code/sonnet`, `opencode/<provider>/<model>`).
|
|
1806
|
+
* `agentProfile` is the bridge-dialect profile (metadata.disallowedTools, mcp)
|
|
1807
|
+
* forwarded verbatim per request — how an arm disables native tools or injects
|
|
1808
|
+
* a provider search MCP.
|
|
1809
|
+
*/
|
|
1810
|
+
interface BridgeSeam {
|
|
1811
|
+
bridgeUrl: string;
|
|
1812
|
+
bridgeBearer: string;
|
|
1813
|
+
model: string;
|
|
1814
|
+
agentProfile?: Record<string, unknown>;
|
|
1815
|
+
timeoutMs?: number;
|
|
1816
|
+
}
|
|
1723
1817
|
/**
|
|
1724
|
-
*
|
|
1725
|
-
*
|
|
1726
|
-
*
|
|
1727
|
-
*
|
|
1728
|
-
*
|
|
1729
|
-
*/
|
|
1730
|
-
|
|
1818
|
+
* The single built-in executor entrypoint. The backend is DATA — the cost dial a
|
|
1819
|
+
* profile, an experiment config, or a replay journal can name — not an import
|
|
1820
|
+
* choice. Injects the matching seam and delegates to the built-in implementation;
|
|
1821
|
+
* the port stays OPEN: bring-your-own agents implement `Executor` directly and
|
|
1822
|
+
* never pass through here.
|
|
1823
|
+
*/
|
|
1824
|
+
type ExecutorConfig = ({
|
|
1825
|
+
backend: 'router';
|
|
1826
|
+
} & RouterSeam) | ({
|
|
1827
|
+
backend: 'bridge';
|
|
1828
|
+
} & BridgeSeam) | ({
|
|
1829
|
+
backend: 'cli';
|
|
1830
|
+
} & CliSeam) | ({
|
|
1831
|
+
backend: 'sandbox';
|
|
1832
|
+
harness?: BackendType;
|
|
1833
|
+
} & SandboxSeam);
|
|
1834
|
+
declare function createExecutor(config: ExecutorConfig): ExecutorFactory<unknown>;
|
|
1731
1835
|
/**
|
|
1732
1836
|
* The open resolver/registry. Pre-registers the three built-ins under their
|
|
1733
1837
|
* runtime tags (`'router'`, `'sandbox'`, `'cli'`) and accepts `register(name,
|
|
@@ -1749,7 +1853,7 @@ declare function createExecutorRegistry(): ExecutorRegistry;
|
|
|
1749
1853
|
* An `Agent.act` runs inside a `Scope`. It `spawn`s children dynamically and reacts to
|
|
1750
1854
|
* them via `next()`. The scope owns ONE in-memory nursery — the authoritative live set —
|
|
1751
1855
|
* and is the single place that drives a child's lifecycle: reserve budget atomically,
|
|
1752
|
-
* resolve a `
|
|
1856
|
+
* resolve a `Executor` through the open registry, run it (one-shot OR streaming),
|
|
1753
1857
|
* fold its normalized `UsageEvent`s into a conserved `Spend`, reconcile the reservation
|
|
1754
1858
|
* (refunding the unspent remainder), persist the result blob + journal records, and
|
|
1755
1859
|
* deliver the `Settled` through the `next()` cursor.
|
|
@@ -1857,4 +1961,4 @@ declare function createSupervisor<Task, Out>(): Supervisor<Task, Out>;
|
|
|
1857
1961
|
*/
|
|
1858
1962
|
declare function createRootHandle<Out>(): RootHandle<Out>;
|
|
1859
1963
|
|
|
1860
|
-
export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration,
|
|
1964
|
+
export { Agent, AgentRunSpec, AgentSpec, type AssertTraceDerivedFindings, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, NodeId, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterSeam, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, SpawnEvent, SpawnJournal, Spend, type SteerContext, SupervisedResult, Supervisor, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type Verify, type VerifySpec, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, acquireSandbox, assertTraceDerivedFindings, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, definePersona, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, inlineSandboxClient, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, openSandboxRun, panel, pipeline, probeSandboxCapabilities, registerShape, renderCorpusToInstructions, replaySpawnTree, reportLoopUsage, runPersonified, settledToIteration, spendFromUsageEvents, trajectoryReport, verify, widen };
|