@tangle-network/agent-runtime 0.44.0 → 0.45.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +95 -203
  2. package/dist/agent.d.ts +5 -4
  3. package/dist/agent.js +5 -7
  4. package/dist/agent.js.map +1 -1
  5. package/dist/analyst-loop.d.ts +65 -4
  6. package/dist/analyst-loop.js +6 -1
  7. package/dist/audit.d.ts +93 -0
  8. package/dist/audit.js +312 -0
  9. package/dist/audit.js.map +1 -0
  10. package/dist/chunk-4B6U4CVQ.js +15 -0
  11. package/dist/chunk-4B6U4CVQ.js.map +1 -0
  12. package/dist/chunk-FK53TXOP.js +603 -0
  13. package/dist/chunk-FK53TXOP.js.map +1 -0
  14. package/dist/{chunk-SKUZZCHE.js → chunk-IJ6FGOPO.js} +5 -5
  15. package/dist/chunk-IJ6FGOPO.js.map +1 -0
  16. package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
  17. package/dist/chunk-IJGS6J7X.js.map +1 -0
  18. package/dist/chunk-KEWO4KI6.js +3599 -0
  19. package/dist/chunk-KEWO4KI6.js.map +1 -0
  20. package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
  21. package/dist/{chunk-GFKVVRQ7.js → chunk-NYN5RTLP.js} +11 -10
  22. package/dist/chunk-NYN5RTLP.js.map +1 -0
  23. package/dist/chunk-PRX45WE2.js +264 -0
  24. package/dist/chunk-PRX45WE2.js.map +1 -0
  25. package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
  26. package/dist/chunk-QR4UUC5P.js.map +1 -0
  27. package/dist/chunk-WIR4HOOJ.js +27 -0
  28. package/dist/chunk-WIR4HOOJ.js.map +1 -0
  29. package/dist/{chunk-KDMRUD2P.js → chunk-Z2QXVBA6.js} +296 -8
  30. package/dist/chunk-Z2QXVBA6.js.map +1 -0
  31. package/dist/coder-CczgMqFx.d.ts +114 -0
  32. package/dist/dynamic-BvllHV6M.d.ts +221 -0
  33. package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
  34. package/dist/improvement.d.ts +2 -3
  35. package/dist/improvement.js +0 -5
  36. package/dist/improvement.js.map +1 -1
  37. package/dist/index.d.ts +123 -10
  38. package/dist/index.js +398 -10
  39. package/dist/index.js.map +1 -1
  40. package/dist/{kb-gate-D0ZIhFOU.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
  41. package/dist/{loop-runner-bin-BLMa8He3.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
  42. package/dist/loop-runner-bin.d.ts +9 -7
  43. package/dist/loop-runner-bin.js +6 -8
  44. package/dist/loops.d.ts +7 -393
  45. package/dist/loops.js +94 -25
  46. package/dist/mcp/bin.js +7 -7
  47. package/dist/mcp/bin.js.map +1 -1
  48. package/dist/mcp/index.d.ts +284 -11
  49. package/dist/mcp/index.js +341 -9
  50. package/dist/mcp/index.js.map +1 -1
  51. package/dist/{otel-export-wFDmmurL.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
  52. package/dist/profiles.d.ts +385 -86
  53. package/dist/profiles.js +549 -4
  54. package/dist/profiles.js.map +1 -1
  55. package/dist/{run-loop-C4L1Sted.d.ts → run-loop--hSoIknW.d.ts} +35 -12
  56. package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
  57. package/dist/runtime.d.ts +1860 -0
  58. package/dist/runtime.js +114 -0
  59. package/dist/runtime.js.map +1 -0
  60. package/dist/substrate-CUgk7F7s.d.ts +77 -0
  61. package/dist/topology.d.ts +73 -0
  62. package/dist/topology.js +111 -0
  63. package/dist/topology.js.map +1 -0
  64. package/dist/types-1HbsFa7H.d.ts +438 -0
  65. package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
  66. package/dist/{types-DbJzz2uf.d.ts → types-DdzkffAm.d.ts} +95 -1
  67. package/dist/workflow.d.ts +3 -2
  68. package/dist/workflow.js +4 -5
  69. package/dist/workflow.js.map +1 -1
  70. package/package.json +26 -6
  71. package/skills/agent-runtime-adoption/SKILL.md +29 -26
  72. package/dist/chunk-3HMHSN22.js.map +0 -1
  73. package/dist/chunk-GFKVVRQ7.js.map +0 -1
  74. package/dist/chunk-HVYOHJHK.js.map +0 -1
  75. package/dist/chunk-KDMRUD2P.js.map +0 -1
  76. package/dist/chunk-PY6NMZYX.js +0 -52
  77. package/dist/chunk-PY6NMZYX.js.map +0 -1
  78. package/dist/chunk-S7JXV32P.js +0 -947
  79. package/dist/chunk-S7JXV32P.js.map +0 -1
  80. package/dist/chunk-SKUZZCHE.js.map +0 -1
  81. package/dist/chunk-SQSCRJ7U.js +0 -65
  82. package/dist/chunk-SQSCRJ7U.js.map +0 -1
  83. package/dist/chunk-VOX6Z3II.js +0 -90
  84. package/dist/chunk-VOX6Z3II.js.map +0 -1
  85. package/dist/chunk-XBUG326M.js +0 -261
  86. package/dist/chunk-XBUG326M.js.map +0 -1
  87. package/dist/dynamic-wUgp6UKs.d.ts +0 -108
  88. package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
  89. /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
@@ -1,115 +1,414 @@
1
- import { AgentProfile } from '@tangle-network/sandbox';
2
- import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-DbJzz2uf.js';
1
+ export { C as CoderOutput, b as CoderProfileOptions, a as CoderTask, M as MultiHarnessCoderFanoutOptions, c as coderProfile, d as createCoderValidator, m as multiHarnessCoderFanout } from './coder-CczgMqFx.js';
2
+ import { b as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec } from './types-DdzkffAm.js';
3
+ import { a as UiLens, U as UiFinding } from './substrate-CUgk7F7s.js';
4
+ export { b as UI_FINDING_SEVERITIES, c as UI_LENSES, d as UiFindingScreenshot, e as UiFindingSeverity } from './substrate-CUgk7F7s.js';
5
+ import { SandboxEvent, AgentProfile } from '@tangle-network/sandbox';
3
6
  import '@tangle-network/agent-eval';
7
+ import './runtime-hooks-C7JwKb9E.js';
4
8
 
5
9
  /**
6
10
  * @experimental
7
11
  *
8
- * `coderProfile`opinionated preset for code-modification tasks.
12
+ * UI auditor task + output shapes what one iteration of the audit loop
13
+ * does and what it returns.
9
14
  *
10
- * The agent is told to:
11
- * - work on a fresh branch inside the sandbox workspace
12
- * - keep the patch minimal (under `maxDiffLines`)
13
- * - avoid `forbiddenPaths`
14
- * - run `testCmd` and `typecheckCmd`
15
- * - emit a final JSON result the output adapter parses
15
+ * An iteration is one (lens × route × viewport) audit pass. The driver
16
+ * decides which iterations to plan (lens-cycling, route-cycling,
17
+ * refine-on-low-yield, etc.); the iteration itself captures screenshots
18
+ * and asks a vision judge to identify findings under that lens.
19
+ */
20
+
21
+ /** @experimental */
22
+ interface UiAuditViewport {
23
+ width: number;
24
+ height: number;
25
+ }
26
+ /** @experimental */
27
+ interface UiAuditCaptureRequest {
28
+ /**
29
+ * Logical route name (e.g. `home`, `checkout-step-2`). Used in screenshot
30
+ * filenames and finding metadata.
31
+ */
32
+ route: string;
33
+ /** Fully qualified URL the iteration audits. */
34
+ url: string;
35
+ /** Default `{ width: 1280, height: 800 }`. */
36
+ viewport?: UiAuditViewport;
37
+ /** Default `false`. */
38
+ fullPage?: boolean;
39
+ /** CSS selector to wait for before capturing. */
40
+ waitFor?: string;
41
+ /** Extra milliseconds to wait after navigation settles. Default `500`. */
42
+ waitMs?: number;
43
+ /** Optional CSS selector — capture only the matched element. */
44
+ elementSelector?: string;
45
+ /** Optional human-readable label appended to the screenshot filename. */
46
+ label?: string;
47
+ }
48
+ /**
49
+ * One iteration's task: audit a single (lens × route) pair, capturing the
50
+ * surfaces the lens needs.
51
+ *
52
+ * `captures` lists the screenshots to take BEFORE the judge is invoked.
53
+ * The judge sees all captures from this iteration plus the lens-specific
54
+ * brief.
55
+ *
56
+ * @experimental
57
+ */
58
+ interface UiAuditTask {
59
+ /** The audit lens that scopes which findings are valid this iteration. */
60
+ lens: UiLens;
61
+ /** Required captures. Order is preserved; index 0 is the primary frame. */
62
+ captures: readonly UiAuditCaptureRequest[];
63
+ /**
64
+ * Free-form context the consumer wants the judge to know about (product
65
+ * name, target audience, copy tone). Surfaced as a prompt prelude.
66
+ */
67
+ productContext?: string;
68
+ /**
69
+ * IDs of findings already on file across earlier iterations. The judge
70
+ * uses these to mark cross-references via `similarTo` instead of filing
71
+ * pile-on duplicates.
72
+ */
73
+ knownFindingIds?: readonly number[];
74
+ }
75
+ /** @experimental */
76
+ interface UiAuditCapture {
77
+ /** Workspace-relative path to the screenshot file. */
78
+ path: string;
79
+ viewport: string;
80
+ fullPage: boolean;
81
+ elementSelector?: string;
82
+ label?: string;
83
+ route: string;
84
+ url: string;
85
+ /** Wall-clock when the capture completed. */
86
+ capturedAt: string;
87
+ }
88
+ /**
89
+ * Output of one iteration. `findings` is the headline payload; `captures`
90
+ * is the screenshot manifest the writer needs to link evidence. `notes`
91
+ * carries judge commentary that didn't rise to a finding.
92
+ *
93
+ * @experimental
94
+ */
95
+ interface UiAuditOutput {
96
+ lens: UiLens;
97
+ findings: UiFinding[];
98
+ captures: UiAuditCapture[];
99
+ /** Optional judge commentary (debug / triage aid). */
100
+ notes?: string;
101
+ }
102
+
103
+ /**
104
+ * @experimental
105
+ *
106
+ * UI judge seam — consumer-supplied vision LLM hook the in-process
107
+ * auditor client invokes to identify findings from captured screenshots.
108
+ *
109
+ * The seam stays model-agnostic so consumers can plug in OpenAI vision,
110
+ * Anthropic vision, gemini, a local model, or a deterministic stub for
111
+ * tests. The auditor handles browser capture + Markdown emission; the
112
+ * judge owns the perception + judgment.
16
113
  *
17
- * The profile is stateless and agent-agnostic — `harness` selects the
18
- * sandbox-SDK backend (`claude-code`, `codex`, `opencode/*`). For
19
- * heterogeneous fanout, use `multiHarnessCoderFanout`.
114
+ * Implementor contract:
115
+ * - Treat `lens` as authoritative — only emit findings that belong to
116
+ * that lens. Findings with `lens !== input.lens` will fail the
117
+ * iteration validator.
118
+ * - Reference screenshots via the `path` strings provided in
119
+ * `input.captures`. Inventing a path will cause the validator to
120
+ * reject the iteration.
121
+ * - Be conservative — a finding the judge cannot actually see in the
122
+ * screenshots is a hallucination and pollutes the audit.
123
+ * - Treat any exception thrown by the judge as the iteration's failure —
124
+ * do not swallow LLM errors. Per agent-runtime's fail-loud doctrine,
125
+ * surfacing the error to the kernel beats producing a silent zero.
20
126
  */
21
127
 
22
128
  /** @experimental */
23
- interface CoderTask {
24
- /** What the agent must accomplish. Free-form prose. */
25
- goal: string;
26
- /** Absolute path inside the sandbox where the repo lives. */
27
- repoRoot: string;
28
- /** Default `main`. The branch the agent diffs against. */
29
- baseBranch?: string;
30
- /** Default `pnpm test --run`. */
31
- testCmd?: string;
32
- /** Default `pnpm typecheck`. */
33
- typecheckCmd?: string;
34
- /** Files the agent may inspect for context. Surfaced verbatim in the prompt. */
35
- contextFiles?: string[];
129
+ interface UiJudgeTokenUsage {
130
+ input: number;
131
+ output: number;
132
+ }
133
+ /** @experimental */
134
+ interface UiJudgeInput {
135
+ lens: UiLens;
136
+ captures: readonly UiAuditCapture[];
137
+ /** Free-form product context the consumer wants the judge to know. */
138
+ productContext?: string;
139
+ /** Findings already on file across earlier iterations — for similarTo linkage. */
140
+ knownFindingIds?: readonly number[];
141
+ /** The full prompt the loop kernel synthesized for this iteration. */
142
+ promptText: string;
143
+ /** Cooperative cancellation. */
144
+ signal: AbortSignal;
145
+ }
146
+ /** @experimental */
147
+ interface UiJudgeOutput {
148
+ findings: UiFinding[];
149
+ /** Optional triage commentary. */
150
+ notes?: string;
151
+ /** Optional usage; folded into the kernel cost ledger when present. */
152
+ tokenUsage?: UiJudgeTokenUsage;
153
+ /** Optional total cost in USD. */
154
+ costUsd?: number;
155
+ }
156
+ /** @experimental */
157
+ type UiJudge = (input: UiJudgeInput) => Promise<UiJudgeOutput>;
158
+
159
+ /**
160
+ * @experimental
161
+ *
162
+ * `createInProcessUiAuditClient` — a `LoopSandboxClient` that drives a
163
+ * Playwright browser in-process and delegates finding identification to a
164
+ * consumer-supplied {@link UiJudge}.
165
+ *
166
+ * Why this exists: `runLoop` is built around a sandbox-SDK seam — each
167
+ * iteration is `client.create() → box.streamPrompt() → box.delete()`.
168
+ * For UI audit, spinning up a real container running a coding harness
169
+ * per iteration is overkill: the work is one browser capture + one
170
+ * vision LLM call. This client satisfies the kernel contract while
171
+ * doing the audit in-process; no container, no sandbox-SDK backend.
172
+ *
173
+ * The client owns ONE browser for its lifetime and creates a fresh
174
+ * context per iteration (isolated cookies/storage). Playwright is
175
+ * dynamically imported so consumers who use a different `LoopSandboxClient`
176
+ * — e.g. a fleet executor that drives Playwright remotely — do not pay
177
+ * the peer dep cost.
178
+ *
179
+ * Concurrency: each iteration's prompt carries a self-describing task
180
+ * envelope (see `prompt.ts`), so concurrent fanout iterations do not race
181
+ * over per-client side state.
182
+ */
183
+
184
+ /** @experimental */
185
+ interface InProcessUiAuditClientOptions {
186
+ /**
187
+ * Absolute path under which screenshots are written. Each capture lands
188
+ * at `<workspaceDir>/screenshots/<filename>`; finding screenshot paths
189
+ * are workspace-relative (`screenshots/<filename>`).
190
+ */
191
+ workspaceDir: string;
192
+ /** The vision judge that turns captures into findings. */
193
+ judge: UiJudge;
194
+ /**
195
+ * Navigation policy.
196
+ *
197
+ * `'strict'` (default) waits for `networkidle` and fails the iteration
198
+ * if the page does not settle. `'spa'` waits for `domcontentloaded` —
199
+ * use for single-page apps that hold open long-poll/websocket
200
+ * connections and never settle.
201
+ */
202
+ navPolicy?: 'strict' | 'spa';
36
203
  /**
37
- * Paths the agent must not touch. Validator hard-fails on any match.
38
- * Use glob-free literal path prefixes for unambiguous enforcement.
204
+ * Browser launch override. Default: chromium headless via Playwright.
205
+ * Consumers pass a custom factory to target a remote browser, a
206
+ * different channel, or a fleet adapter.
39
207
  */
40
- forbiddenPaths?: string[];
41
- /** Default 400. Hard cap; validator hard-fails when exceeded. */
42
- maxDiffLines?: number;
208
+ launchBrowser?: () => Promise<BrowserHandle>;
43
209
  }
44
210
  /** @experimental */
45
- interface CoderOutput {
46
- /** Branch the agent wrote the patch on. */
47
- branch: string;
48
- /** Unified diff (`git diff <base>..HEAD`). */
49
- patch: string;
50
- testResult: {
51
- passed: boolean;
52
- output: string;
53
- };
54
- typecheckResult: {
55
- passed: boolean;
56
- output: string;
57
- };
58
- diffStats: {
59
- filesChanged: number;
60
- insertions: number;
61
- deletions: number;
62
- };
63
- /** Optional reviewer commentary surfaced by the agent. */
64
- reviewerNotes?: string;
211
+ interface BrowserHandle {
212
+ newContext(options?: {
213
+ viewport?: {
214
+ width: number;
215
+ height: number;
216
+ };
217
+ }): Promise<BrowserContextHandle>;
218
+ close(): Promise<void>;
65
219
  }
66
220
  /** @experimental */
67
- interface CoderProfileOptions {
68
- /** Sandbox-SDK backend.type. Default `'claude-code'`. */
69
- harness?: string;
70
- /** Default model id passed in `AgentProfile.model.default`. */
71
- model?: string;
72
- /** Custom system prompt replacement. Default = built-in coder preset. */
73
- systemPrompt?: string;
74
- /** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */
75
- name?: string;
221
+ interface BrowserContextHandle {
222
+ newPage(): Promise<PageHandle>;
223
+ close(): Promise<void>;
76
224
  }
225
+ /** @experimental */
226
+ interface PageHandle {
227
+ setViewportSize(size: {
228
+ width: number;
229
+ height: number;
230
+ }): Promise<void>;
231
+ goto(url: string, options?: {
232
+ waitUntil?: string;
233
+ timeout?: number;
234
+ }): Promise<unknown>;
235
+ waitForSelector(selector: string, options?: {
236
+ timeout?: number;
237
+ }): Promise<unknown>;
238
+ waitForTimeout(ms: number): Promise<void>;
239
+ screenshot(options: {
240
+ path: string;
241
+ fullPage?: boolean;
242
+ }): Promise<void>;
243
+ locator(selector: string): {
244
+ first(): {
245
+ screenshot(options: {
246
+ path: string;
247
+ }): Promise<void>;
248
+ };
249
+ };
250
+ }
251
+ /** @experimental */
252
+ declare function createInProcessUiAuditClient(options: InProcessUiAuditClientOptions): LoopSandboxClient & {
253
+ /**
254
+ * Close the underlying browser. Idempotent.
255
+ *
256
+ * Contract: callers MUST ensure no iterations are in flight when this is
257
+ * called. The kernel respects this — `runLoop` awaits every iteration
258
+ * before returning, so `await runLoop(...); await client.close()` is the
259
+ * intended pattern (see `examples/ui-audit`). If `close()` is invoked
260
+ * concurrently with a running iteration, the browser teardown will race
261
+ * against in-flight page operations; the iteration will surface an
262
+ * AggregateError carrying both the iteration error and the close error,
263
+ * but no work is lost silently.
264
+ */
265
+ close(): Promise<void>;
266
+ };
267
+
77
268
  /**
78
- * Build a coder preset.
269
+ * @experimental
79
270
  *
80
- * `validator` enforces test + typecheck + a 400-line default diff cap. For
81
- * per-task `forbiddenPaths` / `maxDiffLines` enforcement, pass `task` here
82
- * the returned validator closes over its constraints. Without a task
83
- * the validator falls back to the default cap and skips path enforcement.
271
+ * Per-lens guidance the auditor inlines into its system prompt for an
272
+ * iteration. Each entry is a self-contained brief the same content the
273
+ * standalone ui-issue-finder skill ships, embedded as a string constant so
274
+ * agent-runtime carries no runtime dep on that external workspace.
275
+ *
276
+ * Briefs are deliberately concrete: they enumerate the SIGNALS to look for
277
+ * and the cross-lens distinctions to respect, so the judge files fewer
278
+ * pile-on findings under generic labels.
279
+ */
280
+
281
+ /** @experimental */
282
+ declare const SHARED_AUDITOR_RULES: string;
283
+ /** @experimental */
284
+ declare const LENS_BRIEFS: Record<UiLens, string>;
285
+ /**
286
+ * Build a system prompt for a single auditor iteration.
84
287
  *
85
288
  * @experimental
86
289
  */
87
- declare function coderProfile(options?: CoderProfileOptions & {
88
- task?: CoderTask;
89
- }): {
90
- profile: AgentProfile;
91
- taskToPrompt: (task: CoderTask) => string;
92
- output: OutputAdapter<CoderOutput>;
93
- validator: Validator<CoderOutput>;
94
- agentRunSpec: AgentRunSpec<CoderTask>;
95
- };
290
+ declare function buildAuditorSystemPrompt(lens: UiLens): string;
291
+
292
+ /**
293
+ * @experimental
294
+ *
295
+ * Sandbox-event stream → UiAuditOutput decoder. The custom auditor
296
+ * `LoopSandboxClient` emits events of the form:
297
+ *
298
+ * { type: 'audit.capture', data: UiAuditCapture }
299
+ * { type: 'audit.finding', data: UiFinding }
300
+ * { type: 'audit.notes', data: { notes: string } }
301
+ * { type: 'audit.lens', data: { lens: UiLens } }
302
+ * { type: 'done', data: { tokenUsage: { ... }, totalCostUsd?: number } }
303
+ *
304
+ * Other event types are tolerated and ignored. The adapter is pure: it
305
+ * folds an already-collected event array into a UiAuditOutput.
306
+ */
307
+
96
308
  /** @experimental */
97
- interface MultiHarnessCoderFanoutOptions {
309
+ declare function parseAuditorEvents(events: SandboxEvent[]): UiAuditOutput;
310
+
311
+ /**
312
+ * @experimental
313
+ *
314
+ * `uiAuditorProfile` — preset for vision-driven UI audit iterations.
315
+ *
316
+ * Mirrors the shape of `coderProfile`: returns the `AgentRunSpec`, output
317
+ * adapter, validator, and prompt formatter the loop kernel needs. Unlike
318
+ * `coderProfile`, the agent's "harness" is not a sandbox-SDK code-runner
319
+ * — it's a vision-capable judge driving a browser. The loop kernel still
320
+ * iterates `client.create() → box.streamPrompt() → box.delete()`; the
321
+ * client/box pair are provided by `createInProcessUiAuditClient` (in
322
+ * `./in-process-client.ts`) or by a consumer-supplied `LoopSandboxClient`.
323
+ */
324
+
325
+ /** @experimental */
326
+ interface UiAuditorProfileOptions {
98
327
  /**
99
- * Sandbox-SDK backend.type identifiers, one per parallel agent. Default:
100
- * `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`.
328
+ * Stable name surfaced in trace events. Defaults to `ui-auditor`.
101
329
  */
102
- harnesses?: string[];
103
- /** Optional per-harness model override. Indexed parallel to `harnesses`. */
104
- models?: (string | undefined)[];
330
+ name?: string;
331
+ /**
332
+ * Optional model identifier passed in `AgentProfile.model.default`.
333
+ * The consumer's `LoopSandboxClient` chooses how to interpret it.
334
+ */
335
+ model?: string;
336
+ /**
337
+ * Task bound to the validator. Without it the validator uses the lens
338
+ * embedded in the iteration output as its expectation — fine for one-off
339
+ * use; less strict than passing the task explicitly.
340
+ */
341
+ task?: UiAuditTask;
105
342
  }
106
343
  /** @experimental */
107
- declare function multiHarnessCoderFanout(options?: MultiHarnessCoderFanoutOptions): {
108
- agentRuns: AgentRunSpec<CoderTask>[];
109
- output: OutputAdapter<CoderOutput>;
110
- validator: Validator<CoderOutput>;
111
- driver: Driver<CoderTask, CoderOutput, 'pick-winner' | 'fail'>;
344
+ declare function uiAuditorProfile(options?: UiAuditorProfileOptions): {
345
+ profile: AgentProfile;
346
+ taskToPrompt: (task: UiAuditTask) => string;
347
+ output: OutputAdapter<UiAuditOutput>;
348
+ validator: Validator<UiAuditOutput>;
349
+ agentRunSpec: AgentRunSpec<UiAuditTask>;
112
350
  };
113
- declare function createCoderValidator(task: CoderTask): Validator<CoderOutput>;
114
351
 
115
- export { type CoderOutput, type CoderProfileOptions, type CoderTask, type MultiHarnessCoderFanoutOptions, coderProfile, createCoderValidator, multiHarnessCoderFanout };
352
+ /**
353
+ * @experimental
354
+ *
355
+ * Prompt formatter for the auditor profile. `formatAuditorPrompt` produces
356
+ * the user message handed to the iteration — describes the captures to be
357
+ * taken and the lens to apply. The system prompt comes from
358
+ * `buildAuditorSystemPrompt(lens)` (lens-prompts.ts).
359
+ *
360
+ * The formatter prepends a machine-readable envelope (`<<UI_AUDIT_TASK>>`
361
+ * … `<<UI_AUDIT_TASK_END>>`) carrying a JSON-serialised task. The
362
+ * in-process auditor client recovers the task from this envelope so the
363
+ * iteration is self-describing — robust to concurrent fanout, where any
364
+ * per-client side state (e.g. a "current task" register) would race.
365
+ *
366
+ * The formatter is pure and deterministic — re-run on the same task
367
+ * produces the same prompt. Tests and trace replays rely on this.
368
+ */
369
+
370
+ /** @experimental */
371
+ declare function encodeAuditTaskEnvelope(task: UiAuditTask): string;
372
+ /**
373
+ * Parse a task envelope back out of a prompt string. Returns undefined if
374
+ * the prompt does not contain a complete envelope OR if the payload is
375
+ * not valid JSON.
376
+ *
377
+ * @experimental
378
+ */
379
+ declare function decodeAuditTaskEnvelope(prompt: string): UiAuditTask | undefined;
380
+ /** @experimental */
381
+ declare function formatAuditorPrompt(task: UiAuditTask): string;
382
+
383
+ /**
384
+ * @experimental
385
+ *
386
+ * Auditor validator — scores a single iteration's findings for actionability
387
+ * and gates the iteration result. The kernel uses `valid` + `score` for
388
+ * winner selection across fanned-out iterations and to detect a degenerate
389
+ * iteration (lens-violating findings, no screenshot evidence, no findings
390
+ * at all on a route where we expected some).
391
+ *
392
+ * Hard fails (`valid = false`):
393
+ * - A finding is filed under a lens that does not match the iteration's
394
+ * lens. The whole iteration is bad — the judge isn't following the
395
+ * lens discipline and the resulting Markdown would mislead reviewers.
396
+ * - A finding has no screenshot reference.
397
+ * - A finding's screenshot references a path that wasn't captured in
398
+ * this iteration.
399
+ *
400
+ * Score (0..1, max two decimals stable):
401
+ * - 0.4 * specificityRatio — proportion of findings with a selector
402
+ * - 0.4 * evidenceRatio — proportion of findings whose screenshots resolve to captures
403
+ * - 0.2 * (1 - genericTitleRatio) — proportion of findings whose titles
404
+ * are concrete (not "improve UX", "fix layout", etc.)
405
+ *
406
+ * An iteration with zero findings scores 0.5 by convention — neither a
407
+ * confident pass nor a hard failure (the judge might just have nothing to
408
+ * say on this lens). The driver decides what to do with it.
409
+ */
410
+
411
+ /** @experimental */
412
+ declare function createUiAuditorValidator(task: UiAuditTask): Validator<UiAuditOutput>;
413
+
414
+ export { type BrowserContextHandle, type BrowserHandle, type InProcessUiAuditClientOptions, LENS_BRIEFS, type PageHandle, SHARED_AUDITOR_RULES, type UiAuditCapture, type UiAuditCaptureRequest, type UiAuditOutput, type UiAuditTask, type UiAuditViewport, type UiAuditorProfileOptions, UiFinding, type UiJudge, type UiJudgeInput, type UiJudgeOutput, type UiJudgeTokenUsage, UiLens, buildAuditorSystemPrompt, createInProcessUiAuditClient, createUiAuditorValidator, decodeAuditTaskEnvelope, encodeAuditTaskEnvelope, formatAuditorPrompt, parseAuditorEvents, uiAuditorProfile };