@tangle-network/agent-runtime 0.43.0 → 0.45.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -202
- package/dist/agent.d.ts +5 -4
- package/dist/agent.js +5 -7
- package/dist/agent.js.map +1 -1
- package/dist/analyst-loop.d.ts +65 -4
- package/dist/analyst-loop.js +6 -1
- package/dist/audit.d.ts +93 -0
- package/dist/audit.js +312 -0
- package/dist/audit.js.map +1 -0
- package/dist/chunk-4B6U4CVQ.js +15 -0
- package/dist/chunk-4B6U4CVQ.js.map +1 -0
- package/dist/chunk-FK53TXOP.js +603 -0
- package/dist/chunk-FK53TXOP.js.map +1 -0
- package/dist/{chunk-MJDGCRAT.js → chunk-IJ6FGOPO.js} +5 -5
- package/dist/chunk-IJ6FGOPO.js.map +1 -0
- package/dist/{chunk-HVYOHJHK.js → chunk-IJGS6J7X.js} +2 -2
- package/dist/chunk-IJGS6J7X.js.map +1 -0
- package/dist/chunk-KEWO4KI6.js +3599 -0
- package/dist/chunk-KEWO4KI6.js.map +1 -0
- package/dist/{chunk-NRZOXCJK.js → chunk-KSMX62JF.js} +2 -2
- package/dist/{chunk-C5HMTTNY.js → chunk-NYN5RTLP.js} +13 -12
- package/dist/chunk-NYN5RTLP.js.map +1 -0
- package/dist/chunk-PRX45WE2.js +264 -0
- package/dist/chunk-PRX45WE2.js.map +1 -0
- package/dist/{chunk-3HMHSN22.js → chunk-QR4UUC5P.js} +6 -6
- package/dist/chunk-QR4UUC5P.js.map +1 -0
- package/dist/chunk-WIR4HOOJ.js +27 -0
- package/dist/chunk-WIR4HOOJ.js.map +1 -0
- package/dist/{chunk-MNCB4SJ5.js → chunk-Z2QXVBA6.js} +296 -8
- package/dist/chunk-Z2QXVBA6.js.map +1 -0
- package/dist/coder-CczgMqFx.d.ts +114 -0
- package/dist/dynamic-BvllHV6M.d.ts +221 -0
- package/dist/{improvement-adapter-BC4HhuAR.d.ts → improvement-adapter-CWegd3vw.d.ts} +1 -1
- package/dist/improvement.d.ts +2 -3
- package/dist/improvement.js +0 -5
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +123 -10
- package/dist/index.js +407 -19
- package/dist/index.js.map +1 -1
- package/dist/{kb-gate-DTBum3vH.d.ts → kb-gate-D9GBocLN.d.ts} +82 -5
- package/dist/{loop-runner-bin-CVoCBmYk.d.ts → loop-runner-bin-CPrCoKqC.d.ts} +14 -10
- package/dist/loop-runner-bin.d.ts +9 -7
- package/dist/loop-runner-bin.js +6 -8
- package/dist/loops.d.ts +7 -371
- package/dist/loops.js +96 -19
- package/dist/mcp/bin.js +7 -7
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +284 -11
- package/dist/mcp/index.js +341 -9
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-BzvF1Ela.d.ts → otel-export-Dy2DyUCU.d.ts} +1 -1
- package/dist/profiles.d.ts +385 -86
- package/dist/profiles.js +549 -4
- package/dist/profiles.js.map +1 -1
- package/dist/run-loop--hSoIknW.d.ts +112 -0
- package/dist/runtime-hooks-C7JwKb9E.d.ts +70 -0
- package/dist/runtime.d.ts +1860 -0
- package/dist/runtime.js +114 -0
- package/dist/runtime.js.map +1 -0
- package/dist/substrate-CUgk7F7s.d.ts +77 -0
- package/dist/topology.d.ts +73 -0
- package/dist/topology.js +111 -0
- package/dist/topology.js.map +1 -0
- package/dist/types-1HbsFa7H.d.ts +438 -0
- package/dist/{types-p8dWBIXL.d.ts → types-BtRLF2U3.d.ts} +1 -1
- package/dist/{types-Bcp071Jg.d.ts → types-DdzkffAm.d.ts} +95 -1
- package/dist/workflow.d.ts +551 -0
- package/dist/workflow.js +1778 -0
- package/dist/workflow.js.map +1 -0
- package/package.json +53 -16
- package/skills/agent-runtime-adoption/SKILL.md +29 -26
- package/dist/chunk-3HMHSN22.js.map +0 -1
- package/dist/chunk-C5HMTTNY.js.map +0 -1
- package/dist/chunk-EKBSQYZE.js +0 -813
- package/dist/chunk-EKBSQYZE.js.map +0 -1
- package/dist/chunk-HVYOHJHK.js.map +0 -1
- package/dist/chunk-MJDGCRAT.js.map +0 -1
- package/dist/chunk-MNCB4SJ5.js.map +0 -1
- package/dist/chunk-PY6NMZYX.js +0 -52
- package/dist/chunk-PY6NMZYX.js.map +0 -1
- package/dist/chunk-SQSCRJ7U.js +0 -65
- package/dist/chunk-SQSCRJ7U.js.map +0 -1
- package/dist/chunk-VOX6Z3II.js +0 -90
- package/dist/chunk-VOX6Z3II.js.map +0 -1
- package/dist/chunk-XBUG326M.js +0 -261
- package/dist/chunk-XBUG326M.js.map +0 -1
- package/dist/dynamic-B_7GgCwu.d.ts +0 -108
- package/dist/optimize-prompt-D-urF2wW.d.ts +0 -129
- /package/dist/{chunk-NRZOXCJK.js.map → chunk-KSMX62JF.js.map} +0 -0
package/dist/profiles.d.ts
CHANGED
|
@@ -1,115 +1,414 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
1
|
+
export { C as CoderOutput, b as CoderProfileOptions, a as CoderTask, M as MultiHarnessCoderFanoutOptions, c as coderProfile, d as createCoderValidator, m as multiHarnessCoderFanout } from './coder-CczgMqFx.js';
|
|
2
|
+
import { b as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec } from './types-DdzkffAm.js';
|
|
3
|
+
import { a as UiLens, U as UiFinding } from './substrate-CUgk7F7s.js';
|
|
4
|
+
export { b as UI_FINDING_SEVERITIES, c as UI_LENSES, d as UiFindingScreenshot, e as UiFindingSeverity } from './substrate-CUgk7F7s.js';
|
|
5
|
+
import { SandboxEvent, AgentProfile } from '@tangle-network/sandbox';
|
|
3
6
|
import '@tangle-network/agent-eval';
|
|
7
|
+
import './runtime-hooks-C7JwKb9E.js';
|
|
4
8
|
|
|
5
9
|
/**
|
|
6
10
|
* @experimental
|
|
7
11
|
*
|
|
8
|
-
*
|
|
12
|
+
* UI auditor task + output shapes — what one iteration of the audit loop
|
|
13
|
+
* does and what it returns.
|
|
9
14
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
* An iteration is one (lens × route × viewport) audit pass. The driver
|
|
16
|
+
* decides which iterations to plan (lens-cycling, route-cycling,
|
|
17
|
+
* refine-on-low-yield, etc.); the iteration itself captures screenshots
|
|
18
|
+
* and asks a vision judge to identify findings under that lens.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
/** @experimental */
|
|
22
|
+
interface UiAuditViewport {
|
|
23
|
+
width: number;
|
|
24
|
+
height: number;
|
|
25
|
+
}
|
|
26
|
+
/** @experimental */
|
|
27
|
+
interface UiAuditCaptureRequest {
|
|
28
|
+
/**
|
|
29
|
+
* Logical route name (e.g. `home`, `checkout-step-2`). Used in screenshot
|
|
30
|
+
* filenames and finding metadata.
|
|
31
|
+
*/
|
|
32
|
+
route: string;
|
|
33
|
+
/** Fully qualified URL the iteration audits. */
|
|
34
|
+
url: string;
|
|
35
|
+
/** Default `{ width: 1280, height: 800 }`. */
|
|
36
|
+
viewport?: UiAuditViewport;
|
|
37
|
+
/** Default `false`. */
|
|
38
|
+
fullPage?: boolean;
|
|
39
|
+
/** CSS selector to wait for before capturing. */
|
|
40
|
+
waitFor?: string;
|
|
41
|
+
/** Extra milliseconds to wait after navigation settles. Default `500`. */
|
|
42
|
+
waitMs?: number;
|
|
43
|
+
/** Optional CSS selector — capture only the matched element. */
|
|
44
|
+
elementSelector?: string;
|
|
45
|
+
/** Optional human-readable label appended to the screenshot filename. */
|
|
46
|
+
label?: string;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* One iteration's task: audit a single (lens × route) pair, capturing the
|
|
50
|
+
* surfaces the lens needs.
|
|
51
|
+
*
|
|
52
|
+
* `captures` lists the screenshots to take BEFORE the judge is invoked.
|
|
53
|
+
* The judge sees all captures from this iteration plus the lens-specific
|
|
54
|
+
* brief.
|
|
55
|
+
*
|
|
56
|
+
* @experimental
|
|
57
|
+
*/
|
|
58
|
+
interface UiAuditTask {
|
|
59
|
+
/** The audit lens that scopes which findings are valid this iteration. */
|
|
60
|
+
lens: UiLens;
|
|
61
|
+
/** Required captures. Order is preserved; index 0 is the primary frame. */
|
|
62
|
+
captures: readonly UiAuditCaptureRequest[];
|
|
63
|
+
/**
|
|
64
|
+
* Free-form context the consumer wants the judge to know about (product
|
|
65
|
+
* name, target audience, copy tone). Surfaced as a prompt prelude.
|
|
66
|
+
*/
|
|
67
|
+
productContext?: string;
|
|
68
|
+
/**
|
|
69
|
+
* IDs of findings already on file across earlier iterations. The judge
|
|
70
|
+
* uses these to mark cross-references via `similarTo` instead of filing
|
|
71
|
+
* pile-on duplicates.
|
|
72
|
+
*/
|
|
73
|
+
knownFindingIds?: readonly number[];
|
|
74
|
+
}
|
|
75
|
+
/** @experimental */
|
|
76
|
+
interface UiAuditCapture {
|
|
77
|
+
/** Workspace-relative path to the screenshot file. */
|
|
78
|
+
path: string;
|
|
79
|
+
viewport: string;
|
|
80
|
+
fullPage: boolean;
|
|
81
|
+
elementSelector?: string;
|
|
82
|
+
label?: string;
|
|
83
|
+
route: string;
|
|
84
|
+
url: string;
|
|
85
|
+
/** Wall-clock when the capture completed. */
|
|
86
|
+
capturedAt: string;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Output of one iteration. `findings` is the headline payload; `captures`
|
|
90
|
+
* is the screenshot manifest the writer needs to link evidence. `notes`
|
|
91
|
+
* carries judge commentary that didn't rise to a finding.
|
|
92
|
+
*
|
|
93
|
+
* @experimental
|
|
94
|
+
*/
|
|
95
|
+
interface UiAuditOutput {
|
|
96
|
+
lens: UiLens;
|
|
97
|
+
findings: UiFinding[];
|
|
98
|
+
captures: UiAuditCapture[];
|
|
99
|
+
/** Optional judge commentary (debug / triage aid). */
|
|
100
|
+
notes?: string;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* @experimental
|
|
105
|
+
*
|
|
106
|
+
* UI judge seam — consumer-supplied vision LLM hook the in-process
|
|
107
|
+
* auditor client invokes to identify findings from captured screenshots.
|
|
108
|
+
*
|
|
109
|
+
* The seam stays model-agnostic so consumers can plug in OpenAI vision,
|
|
110
|
+
* Anthropic vision, gemini, a local model, or a deterministic stub for
|
|
111
|
+
* tests. The auditor handles browser capture + Markdown emission; the
|
|
112
|
+
* judge owns the perception + judgment.
|
|
16
113
|
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
114
|
+
* Implementor contract:
|
|
115
|
+
* - Treat `lens` as authoritative — only emit findings that belong to
|
|
116
|
+
* that lens. Findings with `lens !== input.lens` will fail the
|
|
117
|
+
* iteration validator.
|
|
118
|
+
* - Reference screenshots via the `path` strings provided in
|
|
119
|
+
* `input.captures`. Inventing a path will cause the validator to
|
|
120
|
+
* reject the iteration.
|
|
121
|
+
* - Be conservative — a finding the judge cannot actually see in the
|
|
122
|
+
* screenshots is a hallucination and pollutes the audit.
|
|
123
|
+
* - Treat any exception thrown by the judge as the iteration's failure —
|
|
124
|
+
* do not swallow LLM errors. Per agent-runtime's fail-loud doctrine,
|
|
125
|
+
* surfacing the error to the kernel beats producing a silent zero.
|
|
20
126
|
*/
|
|
21
127
|
|
|
22
128
|
/** @experimental */
|
|
23
|
-
interface
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
129
|
+
interface UiJudgeTokenUsage {
|
|
130
|
+
input: number;
|
|
131
|
+
output: number;
|
|
132
|
+
}
|
|
133
|
+
/** @experimental */
|
|
134
|
+
interface UiJudgeInput {
|
|
135
|
+
lens: UiLens;
|
|
136
|
+
captures: readonly UiAuditCapture[];
|
|
137
|
+
/** Free-form product context the consumer wants the judge to know. */
|
|
138
|
+
productContext?: string;
|
|
139
|
+
/** Findings already on file across earlier iterations — for similarTo linkage. */
|
|
140
|
+
knownFindingIds?: readonly number[];
|
|
141
|
+
/** The full prompt the loop kernel synthesized for this iteration. */
|
|
142
|
+
promptText: string;
|
|
143
|
+
/** Cooperative cancellation. */
|
|
144
|
+
signal: AbortSignal;
|
|
145
|
+
}
|
|
146
|
+
/** @experimental */
|
|
147
|
+
interface UiJudgeOutput {
|
|
148
|
+
findings: UiFinding[];
|
|
149
|
+
/** Optional triage commentary. */
|
|
150
|
+
notes?: string;
|
|
151
|
+
/** Optional usage; folded into the kernel cost ledger when present. */
|
|
152
|
+
tokenUsage?: UiJudgeTokenUsage;
|
|
153
|
+
/** Optional total cost in USD. */
|
|
154
|
+
costUsd?: number;
|
|
155
|
+
}
|
|
156
|
+
/** @experimental */
|
|
157
|
+
type UiJudge = (input: UiJudgeInput) => Promise<UiJudgeOutput>;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* @experimental
|
|
161
|
+
*
|
|
162
|
+
* `createInProcessUiAuditClient` — a `LoopSandboxClient` that drives a
|
|
163
|
+
* Playwright browser in-process and delegates finding identification to a
|
|
164
|
+
* consumer-supplied {@link UiJudge}.
|
|
165
|
+
*
|
|
166
|
+
* Why this exists: `runLoop` is built around a sandbox-SDK seam — each
|
|
167
|
+
* iteration is `client.create() → box.streamPrompt() → box.delete()`.
|
|
168
|
+
* For UI audit, spinning up a real container running a coding harness
|
|
169
|
+
* per iteration is overkill: the work is one browser capture + one
|
|
170
|
+
* vision LLM call. This client satisfies the kernel contract while
|
|
171
|
+
* doing the audit in-process; no container, no sandbox-SDK backend.
|
|
172
|
+
*
|
|
173
|
+
* The client owns ONE browser for its lifetime and creates a fresh
|
|
174
|
+
* context per iteration (isolated cookies/storage). Playwright is
|
|
175
|
+
* dynamically imported so consumers who use a different `LoopSandboxClient`
|
|
176
|
+
* — e.g. a fleet executor that drives Playwright remotely — do not pay
|
|
177
|
+
* the peer dep cost.
|
|
178
|
+
*
|
|
179
|
+
* Concurrency: each iteration's prompt carries a self-describing task
|
|
180
|
+
* envelope (see `prompt.ts`), so concurrent fanout iterations do not race
|
|
181
|
+
* over per-client side state.
|
|
182
|
+
*/
|
|
183
|
+
|
|
184
|
+
/** @experimental */
|
|
185
|
+
interface InProcessUiAuditClientOptions {
|
|
186
|
+
/**
|
|
187
|
+
* Absolute path under which screenshots are written. Each capture lands
|
|
188
|
+
* at `<workspaceDir>/screenshots/<filename>`; finding screenshot paths
|
|
189
|
+
* are workspace-relative (`screenshots/<filename>`).
|
|
190
|
+
*/
|
|
191
|
+
workspaceDir: string;
|
|
192
|
+
/** The vision judge that turns captures into findings. */
|
|
193
|
+
judge: UiJudge;
|
|
194
|
+
/**
|
|
195
|
+
* Navigation policy.
|
|
196
|
+
*
|
|
197
|
+
* `'strict'` (default) waits for `networkidle` and fails the iteration
|
|
198
|
+
* if the page does not settle. `'spa'` waits for `domcontentloaded` —
|
|
199
|
+
* use for single-page apps that hold open long-poll/websocket
|
|
200
|
+
* connections and never settle.
|
|
201
|
+
*/
|
|
202
|
+
navPolicy?: 'strict' | 'spa';
|
|
36
203
|
/**
|
|
37
|
-
*
|
|
38
|
-
*
|
|
204
|
+
* Browser launch override. Default: chromium headless via Playwright.
|
|
205
|
+
* Consumers pass a custom factory to target a remote browser, a
|
|
206
|
+
* different channel, or a fleet adapter.
|
|
39
207
|
*/
|
|
40
|
-
|
|
41
|
-
/** Default 400. Hard cap; validator hard-fails when exceeded. */
|
|
42
|
-
maxDiffLines?: number;
|
|
208
|
+
launchBrowser?: () => Promise<BrowserHandle>;
|
|
43
209
|
}
|
|
44
210
|
/** @experimental */
|
|
45
|
-
interface
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
};
|
|
54
|
-
typecheckResult: {
|
|
55
|
-
passed: boolean;
|
|
56
|
-
output: string;
|
|
57
|
-
};
|
|
58
|
-
diffStats: {
|
|
59
|
-
filesChanged: number;
|
|
60
|
-
insertions: number;
|
|
61
|
-
deletions: number;
|
|
62
|
-
};
|
|
63
|
-
/** Optional reviewer commentary surfaced by the agent. */
|
|
64
|
-
reviewerNotes?: string;
|
|
211
|
+
interface BrowserHandle {
|
|
212
|
+
newContext(options?: {
|
|
213
|
+
viewport?: {
|
|
214
|
+
width: number;
|
|
215
|
+
height: number;
|
|
216
|
+
};
|
|
217
|
+
}): Promise<BrowserContextHandle>;
|
|
218
|
+
close(): Promise<void>;
|
|
65
219
|
}
|
|
66
220
|
/** @experimental */
|
|
67
|
-
interface
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
/** Default model id passed in `AgentProfile.model.default`. */
|
|
71
|
-
model?: string;
|
|
72
|
-
/** Custom system prompt replacement. Default = built-in coder preset. */
|
|
73
|
-
systemPrompt?: string;
|
|
74
|
-
/** Stable name for `AgentRunSpec.name`. Default = `coder-${harness}`. */
|
|
75
|
-
name?: string;
|
|
221
|
+
interface BrowserContextHandle {
|
|
222
|
+
newPage(): Promise<PageHandle>;
|
|
223
|
+
close(): Promise<void>;
|
|
76
224
|
}
|
|
225
|
+
/** @experimental */
|
|
226
|
+
interface PageHandle {
|
|
227
|
+
setViewportSize(size: {
|
|
228
|
+
width: number;
|
|
229
|
+
height: number;
|
|
230
|
+
}): Promise<void>;
|
|
231
|
+
goto(url: string, options?: {
|
|
232
|
+
waitUntil?: string;
|
|
233
|
+
timeout?: number;
|
|
234
|
+
}): Promise<unknown>;
|
|
235
|
+
waitForSelector(selector: string, options?: {
|
|
236
|
+
timeout?: number;
|
|
237
|
+
}): Promise<unknown>;
|
|
238
|
+
waitForTimeout(ms: number): Promise<void>;
|
|
239
|
+
screenshot(options: {
|
|
240
|
+
path: string;
|
|
241
|
+
fullPage?: boolean;
|
|
242
|
+
}): Promise<void>;
|
|
243
|
+
locator(selector: string): {
|
|
244
|
+
first(): {
|
|
245
|
+
screenshot(options: {
|
|
246
|
+
path: string;
|
|
247
|
+
}): Promise<void>;
|
|
248
|
+
};
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
/** @experimental */
|
|
252
|
+
declare function createInProcessUiAuditClient(options: InProcessUiAuditClientOptions): LoopSandboxClient & {
|
|
253
|
+
/**
|
|
254
|
+
* Close the underlying browser. Idempotent.
|
|
255
|
+
*
|
|
256
|
+
* Contract: callers MUST ensure no iterations are in flight when this is
|
|
257
|
+
* called. The kernel respects this — `runLoop` awaits every iteration
|
|
258
|
+
* before returning, so `await runLoop(...); await client.close()` is the
|
|
259
|
+
* intended pattern (see `examples/ui-audit`). If `close()` is invoked
|
|
260
|
+
* concurrently with a running iteration, the browser teardown will race
|
|
261
|
+
* against in-flight page operations; the iteration will surface an
|
|
262
|
+
* AggregateError carrying both the iteration error and the close error,
|
|
263
|
+
* but no work is lost silently.
|
|
264
|
+
*/
|
|
265
|
+
close(): Promise<void>;
|
|
266
|
+
};
|
|
267
|
+
|
|
77
268
|
/**
|
|
78
|
-
*
|
|
269
|
+
* @experimental
|
|
79
270
|
*
|
|
80
|
-
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
*
|
|
271
|
+
* Per-lens guidance the auditor inlines into its system prompt for an
|
|
272
|
+
* iteration. Each entry is a self-contained brief — the same content the
|
|
273
|
+
* standalone ui-issue-finder skill ships, embedded as a string constant so
|
|
274
|
+
* agent-runtime carries no runtime dep on that external workspace.
|
|
275
|
+
*
|
|
276
|
+
* Briefs are deliberately concrete: they enumerate the SIGNALS to look for
|
|
277
|
+
* and the cross-lens distinctions to respect, so the judge files fewer
|
|
278
|
+
* pile-on findings under generic labels.
|
|
279
|
+
*/
|
|
280
|
+
|
|
281
|
+
/** @experimental */
|
|
282
|
+
declare const SHARED_AUDITOR_RULES: string;
|
|
283
|
+
/** @experimental */
|
|
284
|
+
declare const LENS_BRIEFS: Record<UiLens, string>;
|
|
285
|
+
/**
|
|
286
|
+
* Build a system prompt for a single auditor iteration.
|
|
84
287
|
*
|
|
85
288
|
* @experimental
|
|
86
289
|
*/
|
|
87
|
-
declare function
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
}
|
|
290
|
+
declare function buildAuditorSystemPrompt(lens: UiLens): string;
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* @experimental
|
|
294
|
+
*
|
|
295
|
+
* Sandbox-event stream → UiAuditOutput decoder. The custom auditor
|
|
296
|
+
* `LoopSandboxClient` emits events of the form:
|
|
297
|
+
*
|
|
298
|
+
* { type: 'audit.capture', data: UiAuditCapture }
|
|
299
|
+
* { type: 'audit.finding', data: UiFinding }
|
|
300
|
+
* { type: 'audit.notes', data: { notes: string } }
|
|
301
|
+
* { type: 'audit.lens', data: { lens: UiLens } }
|
|
302
|
+
* { type: 'done', data: { tokenUsage: { ... }, totalCostUsd?: number } }
|
|
303
|
+
*
|
|
304
|
+
* Other event types are tolerated and ignored. The adapter is pure: it
|
|
305
|
+
* folds an already-collected event array into a UiAuditOutput.
|
|
306
|
+
*/
|
|
307
|
+
|
|
96
308
|
/** @experimental */
|
|
97
|
-
|
|
309
|
+
declare function parseAuditorEvents(events: SandboxEvent[]): UiAuditOutput;
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* @experimental
|
|
313
|
+
*
|
|
314
|
+
* `uiAuditorProfile` — preset for vision-driven UI audit iterations.
|
|
315
|
+
*
|
|
316
|
+
* Mirrors the shape of `coderProfile`: returns the `AgentRunSpec`, output
|
|
317
|
+
* adapter, validator, and prompt formatter the loop kernel needs. Unlike
|
|
318
|
+
* `coderProfile`, the agent's "harness" is not a sandbox-SDK code-runner
|
|
319
|
+
* — it's a vision-capable judge driving a browser. The loop kernel still
|
|
320
|
+
* iterates `client.create() → box.streamPrompt() → box.delete()`; the
|
|
321
|
+
* client/box pair are provided by `createInProcessUiAuditClient` (in
|
|
322
|
+
* `./in-process-client.ts`) or by a consumer-supplied `LoopSandboxClient`.
|
|
323
|
+
*/
|
|
324
|
+
|
|
325
|
+
/** @experimental */
|
|
326
|
+
interface UiAuditorProfileOptions {
|
|
98
327
|
/**
|
|
99
|
-
*
|
|
100
|
-
* `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']`.
|
|
328
|
+
* Stable name surfaced in trace events. Defaults to `ui-auditor`.
|
|
101
329
|
*/
|
|
102
|
-
|
|
103
|
-
/**
|
|
104
|
-
|
|
330
|
+
name?: string;
|
|
331
|
+
/**
|
|
332
|
+
* Optional model identifier passed in `AgentProfile.model.default`.
|
|
333
|
+
* The consumer's `LoopSandboxClient` chooses how to interpret it.
|
|
334
|
+
*/
|
|
335
|
+
model?: string;
|
|
336
|
+
/**
|
|
337
|
+
* Task bound to the validator. Without it the validator uses the lens
|
|
338
|
+
* embedded in the iteration output as its expectation — fine for one-off
|
|
339
|
+
* use; less strict than passing the task explicitly.
|
|
340
|
+
*/
|
|
341
|
+
task?: UiAuditTask;
|
|
105
342
|
}
|
|
106
343
|
/** @experimental */
|
|
107
|
-
declare function
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
344
|
+
declare function uiAuditorProfile(options?: UiAuditorProfileOptions): {
|
|
345
|
+
profile: AgentProfile;
|
|
346
|
+
taskToPrompt: (task: UiAuditTask) => string;
|
|
347
|
+
output: OutputAdapter<UiAuditOutput>;
|
|
348
|
+
validator: Validator<UiAuditOutput>;
|
|
349
|
+
agentRunSpec: AgentRunSpec<UiAuditTask>;
|
|
112
350
|
};
|
|
113
|
-
declare function createCoderValidator(task: CoderTask): Validator<CoderOutput>;
|
|
114
351
|
|
|
115
|
-
|
|
352
|
+
/**
|
|
353
|
+
* @experimental
|
|
354
|
+
*
|
|
355
|
+
* Prompt formatter for the auditor profile. `formatAuditorPrompt` produces
|
|
356
|
+
* the user message handed to the iteration — describes the captures to be
|
|
357
|
+
* taken and the lens to apply. The system prompt comes from
|
|
358
|
+
* `buildAuditorSystemPrompt(lens)` (lens-prompts.ts).
|
|
359
|
+
*
|
|
360
|
+
* The formatter prepends a machine-readable envelope (`<<UI_AUDIT_TASK>>`
|
|
361
|
+
* … `<<UI_AUDIT_TASK_END>>`) carrying a JSON-serialised task. The
|
|
362
|
+
* in-process auditor client recovers the task from this envelope so the
|
|
363
|
+
* iteration is self-describing — robust to concurrent fanout, where any
|
|
364
|
+
* per-client side state (e.g. a "current task" register) would race.
|
|
365
|
+
*
|
|
366
|
+
* The formatter is pure and deterministic — re-run on the same task
|
|
367
|
+
* produces the same prompt. Tests and trace replays rely on this.
|
|
368
|
+
*/
|
|
369
|
+
|
|
370
|
+
/** @experimental */
|
|
371
|
+
declare function encodeAuditTaskEnvelope(task: UiAuditTask): string;
|
|
372
|
+
/**
|
|
373
|
+
* Parse a task envelope back out of a prompt string. Returns undefined if
|
|
374
|
+
* the prompt does not contain a complete envelope OR if the payload is
|
|
375
|
+
* not valid JSON.
|
|
376
|
+
*
|
|
377
|
+
* @experimental
|
|
378
|
+
*/
|
|
379
|
+
declare function decodeAuditTaskEnvelope(prompt: string): UiAuditTask | undefined;
|
|
380
|
+
/** @experimental */
|
|
381
|
+
declare function formatAuditorPrompt(task: UiAuditTask): string;
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* @experimental
|
|
385
|
+
*
|
|
386
|
+
* Auditor validator — scores a single iteration's findings for actionability
|
|
387
|
+
* and gates the iteration result. The kernel uses `valid` + `score` for
|
|
388
|
+
* winner selection across fanned-out iterations and to detect a degenerate
|
|
389
|
+
* iteration (lens-violating findings, no screenshot evidence, no findings
|
|
390
|
+
* at all on a route where we expected some).
|
|
391
|
+
*
|
|
392
|
+
* Hard fails (`valid = false`):
|
|
393
|
+
* - A finding is filed under a lens that does not match the iteration's
|
|
394
|
+
* lens. The whole iteration is bad — the judge isn't following the
|
|
395
|
+
* lens discipline and the resulting Markdown would mislead reviewers.
|
|
396
|
+
* - A finding has no screenshot reference.
|
|
397
|
+
* - A finding's screenshot references a path that wasn't captured in
|
|
398
|
+
* this iteration.
|
|
399
|
+
*
|
|
400
|
+
* Score (0..1, max two decimals stable):
|
|
401
|
+
* - 0.4 * specificityRatio — proportion of findings with a selector
|
|
402
|
+
* - 0.4 * evidenceRatio — proportion of findings whose screenshots resolve to captures
|
|
403
|
+
* - 0.2 * (1 - genericTitleRatio) — proportion of findings whose titles
|
|
404
|
+
* are concrete (not "improve UX", "fix layout", etc.)
|
|
405
|
+
*
|
|
406
|
+
* An iteration with zero findings scores 0.5 by convention — neither a
|
|
407
|
+
* confident pass nor a hard failure (the judge might just have nothing to
|
|
408
|
+
* say on this lens). The driver decides what to do with it.
|
|
409
|
+
*/
|
|
410
|
+
|
|
411
|
+
/** @experimental */
|
|
412
|
+
declare function createUiAuditorValidator(task: UiAuditTask): Validator<UiAuditOutput>;
|
|
413
|
+
|
|
414
|
+
export { type BrowserContextHandle, type BrowserHandle, type InProcessUiAuditClientOptions, LENS_BRIEFS, type PageHandle, SHARED_AUDITOR_RULES, type UiAuditCapture, type UiAuditCaptureRequest, type UiAuditOutput, type UiAuditTask, type UiAuditViewport, type UiAuditorProfileOptions, UiFinding, type UiJudge, type UiJudgeInput, type UiJudgeOutput, type UiJudgeTokenUsage, UiLens, buildAuditorSystemPrompt, createInProcessUiAuditClient, createUiAuditorValidator, decodeAuditTaskEnvelope, encodeAuditTaskEnvelope, formatAuditorPrompt, parseAuditorEvents, uiAuditorProfile };
|