@rtrentjones/greenlight 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/LICENSE +21 -0
  2. package/assets/skills/deploy-verify-promote/SKILL.md +53 -0
  3. package/assets/skills/provider-cloudflare/SKILL.md +42 -0
  4. package/assets/skills/provider-github/SKILL.md +35 -0
  5. package/assets/skills/provider-hcp/SKILL.md +46 -0
  6. package/assets/skills/provider-oci/SKILL.md +58 -0
  7. package/assets/skills/provider-supabase/SKILL.md +40 -0
  8. package/assets/skills/provider-vercel/SKILL.md +39 -0
  9. package/dist/agent-web-I4LXW4SR.js +7 -0
  10. package/dist/bin.js +1951 -0
  11. package/dist/chunk-6N7MD6FR.js +75 -0
  12. package/dist/chunk-KFKYLGFX.js +271 -0
  13. package/dist/chunk-KP3Y6WRU.js +45 -0
  14. package/dist/chunk-QFKE5JKC.js +12 -0
  15. package/dist/chunk-UXHHLEYO.js +231 -0
  16. package/dist/chunk-WFZTRXBF.js +61 -0
  17. package/dist/chunk-XBDQJVAX.js +94 -0
  18. package/dist/eval-LLQPOEQX.js +9 -0
  19. package/dist/index.d.ts +2 -0
  20. package/dist/index.js +16 -0
  21. package/dist/mcp-KU7WKB5K.js +7 -0
  22. package/dist/playwright-CGTTHGIL.js +7 -0
  23. package/dist/test-7GMOU7I5.js +7 -0
  24. package/package.json +51 -0
  25. package/templates/_template-astro/README.md +18 -0
  26. package/templates/_template-astro/astro.config.mjs +9 -0
  27. package/templates/_template-astro/package.json +18 -0
  28. package/templates/_template-astro/src/pages/index.astro +18 -0
  29. package/templates/_template-astro/tsconfig.json +5 -0
  30. package/templates/_template-astro/wrangler.jsonc +12 -0
  31. package/templates/_template-mcp/README.md +28 -0
  32. package/templates/_template-mcp/oci/Dockerfile +11 -0
  33. package/templates/_template-mcp/oci/package.json +12 -0
  34. package/templates/_template-mcp/oci/server.ts +80 -0
  35. package/templates/_template-mcp/workers/README.md +32 -0
  36. package/templates/_template-next/README.md +5 -0
@@ -0,0 +1,75 @@
1
+ import {
2
+ msg,
3
+ report
4
+ } from "./chunk-QFKE5JKC.js";
5
+
6
+ // ../packages/verify/src/eval.ts
7
+ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
8
+ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
9
+ function resultText(res) {
10
+ const r = res;
11
+ if (r.structuredContent !== void 0) return JSON.stringify(r.structuredContent);
12
+ if (Array.isArray(r.content)) {
13
+ return r.content.map((c) => c.type === "text" ? c.text ?? "" : JSON.stringify(c)).join("\n");
14
+ }
15
+ return JSON.stringify(res);
16
+ }
17
+ function llmJudge(model) {
18
+ return async ({ rubric, result }) => {
19
+ if (!process.env.ANTHROPIC_API_KEY) throw new Error("ANTHROPIC_API_KEY not set");
20
+ const sdkName = "@anthropic-ai/sdk";
21
+ const Anthropic = (await import(sdkName)).default;
22
+ const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
23
+ const resp = await client.messages.create({
24
+ model,
25
+ max_tokens: 512,
26
+ system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 1\u20135 scale (5 = fully satisfies). Reply ONLY with JSON: {"score": <1-5>, "pass": <bool>, "reason": "<short>"}.',
27
+ messages: [{ role: "user", content: `RUBRIC:
28
+ ${rubric}
29
+
30
+ RESULT:
31
+ ${result}` }]
32
+ });
33
+ const text = resp.content.find((b) => b.type === "text")?.text ?? "";
34
+ const json = text.match(/\{[\s\S]*\}/);
35
+ if (!json) throw new Error(`judge returned no JSON: ${text.slice(0, 120)}`);
36
+ const parsed = JSON.parse(json[0]);
37
+ return { score: Number(parsed.score) || 0, pass: Boolean(parsed.pass), reason: parsed.reason };
38
+ };
39
+ }
40
+ async function verifyEval(baseUrl, spec, judge) {
41
+ const score = judge ?? llmJudge(spec.model ?? "claude-sonnet-4-6");
42
+ const checks = [];
43
+ const client = new Client({ name: "greenlight-verify", version: "0.0.0" });
44
+ const transport = new StreamableHTTPClientTransport(new URL(baseUrl));
45
+ try {
46
+ await client.connect(transport);
47
+ } catch (e) {
48
+ return report("eval", baseUrl, [{ name: "initialize handshake", pass: false, detail: msg(e) }]);
49
+ }
50
+ try {
51
+ for (const c of spec.cases) {
52
+ const min = c.minScore ?? 4;
53
+ try {
54
+ const res = await client.callTool({ name: c.tool, arguments: c.args ?? {} });
55
+ const verdict = await score({ rubric: c.rubric, result: resultText(res) });
56
+ const pass = verdict.pass && verdict.score >= min;
57
+ checks.push({
58
+ name: `eval: ${c.name}`,
59
+ pass,
60
+ detail: `score ${verdict.score}/5 (min ${min})${verdict.reason ? ` \u2014 ${verdict.reason}` : ""}`
61
+ });
62
+ } catch (e) {
63
+ checks.push({ name: `eval: ${c.name}`, pass: false, detail: msg(e) });
64
+ }
65
+ }
66
+ } finally {
67
+ await client.close();
68
+ }
69
+ return report("eval", baseUrl, checks);
70
+ }
71
+
72
+ export {
73
+ llmJudge,
74
+ verifyEval
75
+ };
@@ -0,0 +1,271 @@
1
+ import {
2
+ msg,
3
+ report
4
+ } from "./chunk-QFKE5JKC.js";
5
+
6
+ // ../packages/shared/src/defineConfig.ts
7
+ function defineConfig(config) {
8
+ return config;
9
+ }
10
+
11
+ // ../packages/shared/src/load.ts
12
+ import { createJiti } from "jiti";
13
+
14
+ // ../packages/shared/src/schema.ts
15
+ import { z } from "zod";
16
+ var LaneEnum = z.enum(["astro", "next", "mcp"]);
17
+ var TargetEnum = z.enum(["workers", "vercel", "oci"]);
18
+ var DataEnum = z.enum(["none", "d1", "kv", "supabase"]);
19
+ var AuthEnum = z.enum(["none", "bearer", "oauth"]);
20
+ var AccessEnum = z.enum(["public", "private"]);
21
+ var EnvEnum = z.enum(["preview", "beta", "prod"]);
22
+ var MATRIX = {
23
+ astro: { targets: ["workers"], data: ["none", "d1", "kv"] },
24
+ next: { targets: ["vercel"], data: ["none", "supabase"] },
25
+ mcp: { targets: ["workers", "oci"], data: ["none"] }
26
+ };
27
+ var ToolSchema = z.object({
28
+ name: z.string().regex(/^[a-z][a-z0-9-]*$/, "tool name must be kebab-case starting with a letter"),
29
+ lane: LaneEnum,
30
+ target: TargetEnum,
31
+ data: DataEnum,
32
+ auth: AuthEnum.default("none"),
33
+ access: AccessEnum.default("public"),
34
+ envs: z.array(EnvEnum).nonempty("a tool needs at least one env"),
35
+ adopted: z.boolean().default(false),
36
+ // Directory the tool builds/deploys from. Defaults to tools/<name>; a standalone
37
+ // (poly-repo) tool sets '.' (the repo root).
38
+ dir: z.string().optional(),
39
+ // The tool's code lives in another repo — this entry is a registry pointer only,
40
+ // not built/deployed here (greenlight-v1.md §15.5 poly-repo).
41
+ external: z.boolean().default(false)
42
+ }).superRefine((tool, ctx) => {
43
+ const rule = MATRIX[tool.lane];
44
+ if (!rule.targets.includes(tool.target)) {
45
+ ctx.addIssue({
46
+ code: z.ZodIssueCode.custom,
47
+ path: ["target"],
48
+ message: `lane "${tool.lane}" does not support target "${tool.target}" (allowed: ${rule.targets.join(", ")})`
49
+ });
50
+ }
51
+ if (!rule.data.includes(tool.data)) {
52
+ ctx.addIssue({
53
+ code: z.ZodIssueCode.custom,
54
+ path: ["data"],
55
+ message: `lane "${tool.lane}" does not support data "${tool.data}" (allowed: ${rule.data.join(", ")})`
56
+ });
57
+ }
58
+ if (tool.access === "private" && tool.auth === "none") {
59
+ ctx.addIssue({
60
+ code: z.ZodIssueCode.custom,
61
+ path: ["auth"],
62
+ message: 'private tools must set auth to "bearer" or "oauth", never "none"'
63
+ });
64
+ }
65
+ });
66
+ var BlogSchema = z.object({
67
+ lane: z.literal("astro"),
68
+ target: z.literal("workers"),
69
+ data: z.enum(["none", "d1", "kv"])
70
+ });
71
+ var AlertsSchema = z.object({
72
+ sink: z.enum(["github-issue", "email"])
73
+ });
74
+ var ConfigSchema = z.object({
75
+ domain: z.string().min(1, "domain is required"),
76
+ alerts: AlertsSchema,
77
+ // Optional: a tool-only repo (a poly-repo consumer) has no blog.
78
+ blog: BlogSchema.optional(),
79
+ tools: z.array(ToolSchema).default([])
80
+ });
81
+
82
+ // ../packages/shared/src/load.ts
83
+ async function loadConfig(path) {
84
+ const jiti = createJiti(import.meta.url);
85
+ const mod = await jiti.import(path);
86
+ const raw = "default" in mod ? mod.default : mod;
87
+ const result = ConfigSchema.safeParse(raw);
88
+ if (!result.success) {
89
+ const details = result.error.issues.map((issue) => ` \u2022 ${issue.path.join(".") || "(root)"}: ${issue.message}`).join("\n");
90
+ throw new Error(`Invalid Greenlight manifest at ${path}:
91
+ ${details}`);
92
+ }
93
+ return result.data;
94
+ }
95
+
96
+ // ../packages/shared/src/url.ts
97
+ function resolveUrl({ domain, name, env, mcp }) {
98
+ if (env === "preview") {
99
+ throw new Error(
100
+ "preview URLs are per-target and not deterministic \u2014 get them from the adapter (deploy result / adapter.url), not resolveUrl()."
101
+ );
102
+ }
103
+ const host = name === void 0 ? env === "beta" ? `beta.${domain}` : domain : env === "beta" ? `beta.${name}.${domain}` : `${name}.${domain}`;
104
+ return `https://${host}${mcp ? "/mcp" : ""}`;
105
+ }
106
+
107
+ // ../packages/verify/src/index.ts
108
+ import { setTimeout as sleep } from "timers/promises";
109
+
110
+ // ../packages/verify/src/api.ts
111
+ var trimSlash = (s) => s.replace(/\/+$/, "");
112
+ async function checkRoute(base, c) {
113
+ const name = `GET ${c.path}`;
114
+ try {
115
+ const res = await fetch(base + c.path, { redirect: "manual" });
116
+ const reasons = [];
117
+ if (c.status !== void 0 && res.status !== c.status) {
118
+ reasons.push(`status ${res.status} != ${c.status}`);
119
+ }
120
+ if (c.contains !== void 0) {
121
+ const body = await res.text();
122
+ if (!body.includes(c.contains)) reasons.push(`body missing "${c.contains}"`);
123
+ }
124
+ if (c.header) {
125
+ const v = res.headers.get(c.header.name);
126
+ if (v === null) reasons.push(`header ${c.header.name} absent`);
127
+ else if (c.header.value !== void 0 && v !== c.header.value) {
128
+ reasons.push(`header ${c.header.name}="${v}" != "${c.header.value}"`);
129
+ }
130
+ }
131
+ return { name, pass: reasons.length === 0, detail: reasons.join("; ") || void 0 };
132
+ } catch (e) {
133
+ return { name, pass: false, detail: msg(e) };
134
+ }
135
+ }
136
+ async function checkXml(base, candidates, label, marker) {
137
+ for (const path of candidates) {
138
+ try {
139
+ const res = await fetch(base + path, { redirect: "manual" });
140
+ if (res.status === 200) {
141
+ const body = await res.text();
142
+ const ok = marker.test(body);
143
+ return {
144
+ name: `${label} (${path})`,
145
+ pass: ok,
146
+ detail: ok ? void 0 : `200 but content did not look like ${label}`
147
+ };
148
+ }
149
+ } catch {
150
+ }
151
+ }
152
+ return { name: label, pass: false, detail: `none of ${candidates.join(", ")} returned 200` };
153
+ }
154
+ async function checkInternalLinks(base, max = 25) {
155
+ try {
156
+ const res = await fetch(`${base}/`, { redirect: "manual" });
157
+ const html = await res.text();
158
+ const hrefs = /* @__PURE__ */ new Set();
159
+ for (const m of html.matchAll(/href="(\/[^"#?]*)"/g)) {
160
+ const href = m[1];
161
+ if (href && !href.startsWith("//")) hrefs.add(href);
162
+ if (hrefs.size >= max) break;
163
+ }
164
+ const broken = [];
165
+ for (const href of hrefs) {
166
+ try {
167
+ const r = await fetch(base + href, { redirect: "manual" });
168
+ if (r.status >= 400) broken.push(`${href} (${r.status})`);
169
+ } catch {
170
+ broken.push(`${href} (unreachable)`);
171
+ }
172
+ }
173
+ return {
174
+ name: `no broken internal links (${hrefs.size} checked)`,
175
+ pass: broken.length === 0,
176
+ detail: broken.length ? `broken: ${broken.join(", ")}` : void 0
177
+ };
178
+ } catch (e) {
179
+ return { name: "no broken internal links", pass: false, detail: msg(e) };
180
+ }
181
+ }
182
+ async function verifyApi(baseUrl, spec) {
183
+ const base = trimSlash(baseUrl);
184
+ const checks = [];
185
+ for (const c of spec.checks ?? []) checks.push(await checkRoute(base, c));
186
+ if (spec.rssValid) {
187
+ checks.push(
188
+ await checkXml(base, ["/rss.xml", "/feed.xml", "/index.xml"], "rss", /<(rss|feed)[\s>]/i)
189
+ );
190
+ }
191
+ if (spec.sitemapValid) {
192
+ checks.push(
193
+ await checkXml(
194
+ base,
195
+ ["/sitemap.xml", "/sitemap-index.xml"],
196
+ "sitemap",
197
+ /<(urlset|sitemapindex)[\s>]/i
198
+ )
199
+ );
200
+ }
201
+ if (spec.noBrokenInternalLinks) checks.push(await checkInternalLinks(base));
202
+ return report("api", baseUrl, checks);
203
+ }
204
+
205
+ // ../packages/verify/src/index.ts
206
+ function defineVerify(spec) {
207
+ return spec;
208
+ }
209
+ async function waitForReachable(url, timeoutMs) {
210
+ if (timeoutMs <= 0) return true;
211
+ const deadline = Date.now() + timeoutMs;
212
+ for (; ; ) {
213
+ try {
214
+ await fetch(url, { signal: AbortSignal.timeout(5e3) });
215
+ return true;
216
+ } catch {
217
+ if (Date.now() >= deadline) return false;
218
+ await sleep(2e3);
219
+ }
220
+ }
221
+ }
222
+ async function verify(baseUrl, spec, opts) {
223
+ if (opts?.reachableTimeoutMs) await waitForReachable(baseUrl, opts.reachableTimeoutMs);
224
+ switch (spec.mode) {
225
+ case "api":
226
+ return verifyApi(baseUrl, spec);
227
+ case "mcp": {
228
+ const { verifyMcp: verifyMcp2 } = await import("./mcp-KU7WKB5K.js");
229
+ return verifyMcp2(baseUrl, spec);
230
+ }
231
+ case "playwright": {
232
+ const { verifyPlaywright: verifyPlaywright2 } = await import("./playwright-CGTTHGIL.js");
233
+ return verifyPlaywright2(baseUrl, spec);
234
+ }
235
+ case "test": {
236
+ const { verifyTest: verifyTest2 } = await import("./test-7GMOU7I5.js");
237
+ return verifyTest2(spec, opts?.toolDir ?? process.cwd());
238
+ }
239
+ case "agent-web": {
240
+ const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-I4LXW4SR.js");
241
+ return verifyAgentWeb2(baseUrl, spec);
242
+ }
243
+ case "eval": {
244
+ const { verifyEval: verifyEval2 } = await import("./eval-LLQPOEQX.js");
245
+ return verifyEval2(baseUrl, spec);
246
+ }
247
+ }
248
+ }
249
+ async function verifyAll(baseUrl, specs, opts) {
250
+ const reports = [];
251
+ let waited = false;
252
+ for (const spec of specs) {
253
+ const perSpec = waited ? { ...opts, reachableTimeoutMs: 0 } : opts;
254
+ reports.push(await verify(baseUrl, spec, perSpec));
255
+ waited = true;
256
+ }
257
+ return reports;
258
+ }
259
+ function allPass(reports) {
260
+ return reports.length > 0 && reports.every((r) => r.pass);
261
+ }
262
+
263
+ export {
264
+ ConfigSchema,
265
+ defineConfig,
266
+ loadConfig,
267
+ resolveUrl,
268
+ defineVerify,
269
+ verifyAll,
270
+ allPass
271
+ };
@@ -0,0 +1,45 @@
1
+ import {
2
+ msg,
3
+ report
4
+ } from "./chunk-QFKE5JKC.js";
5
+
6
+ // ../packages/verify/src/test.ts
7
+ import { spawnSync } from "child_process";
8
+ function summarize(output) {
9
+ const lines = output.split("\n");
10
+ const hit = lines.find((l) => /Tests?\s+\d+\s+(passed|failed)|Tests:|# (pass|fail)\b/.test(l));
11
+ if (hit) return hit.trim();
12
+ for (let i = lines.length - 1; i >= 0; i--) {
13
+ const l = lines[i];
14
+ if (l?.trim()) return l.trim();
15
+ }
16
+ return void 0;
17
+ }
18
+ function verifyTest(spec, defaultCwd) {
19
+ const command = spec.command ?? "pnpm test";
20
+ const cwd = spec.cwd ?? defaultCwd;
21
+ const where = `${command} (${cwd})`;
22
+ try {
23
+ const res = spawnSync(command, {
24
+ cwd,
25
+ shell: true,
26
+ encoding: "utf8",
27
+ timeout: spec.timeoutMs ?? 6e5,
28
+ maxBuffer: 64 * 1024 * 1024
29
+ });
30
+ if (res.error) {
31
+ return report("test", where, [{ name: command, pass: false, detail: msg(res.error) }]);
32
+ }
33
+ const out = `${res.stdout ?? ""}${res.stderr ?? ""}`;
34
+ const summary = summarize(out);
35
+ const pass = res.status === 0;
36
+ const detail = pass ? summary : `exit ${res.status ?? "signal"}${summary ? ` \u2014 ${summary}` : ""}`;
37
+ return report("test", where, [{ name: command, pass, detail }]);
38
+ } catch (e) {
39
+ return report("test", where, [{ name: command, pass: false, detail: msg(e) }]);
40
+ }
41
+ }
42
+
43
+ export {
44
+ verifyTest
45
+ };
@@ -0,0 +1,12 @@
1
+ // ../packages/verify/src/types.ts
2
+ function msg(e) {
3
+ return e instanceof Error ? e.message : String(e);
4
+ }
5
+ function report(mode, url, checks) {
6
+ return { pass: checks.length > 0 && checks.every((c) => c.pass), mode, url, checks };
7
+ }
8
+
9
+ export {
10
+ msg,
11
+ report
12
+ };
@@ -0,0 +1,231 @@
1
+ import {
2
+ msg,
3
+ report
4
+ } from "./chunk-QFKE5JKC.js";
5
+
6
+ // ../packages/verify/src/agent-web.ts
7
+ var TOOLS = [
8
+ {
9
+ name: "browser_snapshot",
10
+ description: "Get the current URL and the accessibility tree of the page.",
11
+ input_schema: { type: "object", properties: {} }
12
+ },
13
+ {
14
+ name: "browser_click",
15
+ description: "Click an element identified by its ARIA role and accessible name.",
16
+ input_schema: {
17
+ type: "object",
18
+ properties: { role: { type: "string" }, name: { type: "string" } },
19
+ required: ["role", "name"]
20
+ }
21
+ },
22
+ {
23
+ name: "browser_type",
24
+ description: "Type text into a field by role + accessible name; submit=true presses Enter.",
25
+ input_schema: {
26
+ type: "object",
27
+ properties: {
28
+ role: { type: "string" },
29
+ name: { type: "string" },
30
+ text: { type: "string" },
31
+ submit: { type: "boolean" }
32
+ },
33
+ required: ["role", "name", "text"]
34
+ }
35
+ },
36
+ {
37
+ name: "browser_navigate",
38
+ description: 'Navigate to a path on the same site (e.g. "/login").',
39
+ input_schema: {
40
+ type: "object",
41
+ properties: { path: { type: "string" } },
42
+ required: ["path"]
43
+ }
44
+ },
45
+ {
46
+ name: "browser_finish",
47
+ description: "Finish the task. success=false if you could not complete it.",
48
+ input_schema: {
49
+ type: "object",
50
+ properties: { success: { type: "boolean" }, summary: { type: "string" } },
51
+ required: ["success"]
52
+ }
53
+ }
54
+ ];
55
+ var SYSTEM = "You are a QA agent validating a deployed web app. Use the browser tools to accomplish the given task, then call browser_finish. Identify elements by the ARIA role + accessible name shown in the snapshot. Start by calling browser_snapshot. Be efficient \u2014 no more steps than needed.";
56
+ async function execTool(page, base, name, input) {
57
+ try {
58
+ switch (name) {
59
+ case "browser_snapshot": {
60
+ const tree = await page.locator("body").ariaSnapshot();
61
+ return `URL: ${page.url()}
62
+ ${tree}`.slice(0, 6e3);
63
+ }
64
+ case "browser_click": {
65
+ await page.getByRole(input.role, { name: String(input.name) }).first().click({ timeout: 5e3 });
66
+ return `clicked ${input.role} "${input.name}"`;
67
+ }
68
+ case "browser_type": {
69
+ const el = page.getByRole(input.role, { name: String(input.name) }).first();
70
+ await el.fill(String(input.text), { timeout: 5e3 });
71
+ if (input.submit) await el.press("Enter");
72
+ return `typed into ${input.role} "${input.name}"${input.submit ? " + submitted" : ""}`;
73
+ }
74
+ case "browser_navigate": {
75
+ await page.goto(base + String(input.path), { waitUntil: "domcontentloaded" });
76
+ return `navigated to ${input.path}`;
77
+ }
78
+ case "browser_finish":
79
+ return "finished";
80
+ default:
81
+ return `unknown tool ${name}`;
82
+ }
83
+ } catch (e) {
84
+ return `error: ${msg(e)}`;
85
+ }
86
+ }
87
+ async function evalAsserts(page, asserts) {
88
+ const checks = [];
89
+ let text = "";
90
+ if (asserts.some((a) => a.textContains)) {
91
+ try {
92
+ text = await page.locator("body").innerText({ timeout: 3e3 });
93
+ } catch {
94
+ text = "";
95
+ }
96
+ }
97
+ for (const a of asserts) {
98
+ if (a.urlContains !== void 0) {
99
+ const ok = page.url().includes(a.urlContains);
100
+ checks.push({
101
+ name: `url contains "${a.urlContains}"`,
102
+ pass: ok,
103
+ detail: ok ? void 0 : page.url()
104
+ });
105
+ }
106
+ if (a.textContains !== void 0) {
107
+ const ok = text.includes(a.textContains);
108
+ checks.push({ name: `text contains "${a.textContains}"`, pass: ok });
109
+ }
110
+ if (a.selector !== void 0) {
111
+ let count = 0;
112
+ try {
113
+ count = await page.locator(a.selector).count();
114
+ } catch {
115
+ }
116
+ checks.push({ name: `selector ${a.selector}`, pass: count > 0 });
117
+ }
118
+ }
119
+ return checks;
120
+ }
121
+ async function runScenario(client, page, base, spec, scenario) {
122
+ const tag = `[${scenario.name}]`;
123
+ await page.goto(base + (scenario.start ?? "/"), { waitUntil: "domcontentloaded" });
124
+ const messages = [{ role: "user", content: `Task: ${scenario.task}` }];
125
+ const maxSteps = spec.maxSteps ?? 12;
126
+ let finish = null;
127
+ for (let step = 0; step < maxSteps && !finish; step++) {
128
+ const resp = await client.messages.create({
129
+ model: spec.model ?? "claude-sonnet-4-6",
130
+ max_tokens: 1024,
131
+ system: SYSTEM,
132
+ tools: TOOLS,
133
+ messages
134
+ });
135
+ const blocks = resp.content;
136
+ messages.push({ role: "assistant", content: blocks });
137
+ const toolUses = blocks.filter((b) => b.type === "tool_use");
138
+ if (toolUses.length === 0) break;
139
+ const results = [];
140
+ for (const tu of toolUses) {
141
+ const out = await execTool(page, base, tu.name, tu.input);
142
+ if (tu.name === "browser_finish") {
143
+ finish = tu.input;
144
+ }
145
+ results.push({ type: "tool_result", tool_use_id: tu.id, content: out });
146
+ }
147
+ messages.push({ role: "user", content: results });
148
+ }
149
+ const checks = [];
150
+ if (!finish) {
151
+ checks.push({
152
+ name: `${tag} completed`,
153
+ pass: false,
154
+ detail: "agent did not finish in maxSteps"
155
+ });
156
+ } else if (finish.success === false) {
157
+ checks.push({ name: `${tag} agent succeeded`, pass: false, detail: finish.summary });
158
+ }
159
+ for (const c of await evalAsserts(page, scenario.asserts ?? [])) {
160
+ checks.push({ ...c, name: `${tag} ${c.name}` });
161
+ }
162
+ if (checks.length === 0) checks.push({ name: `${tag} agent succeeded`, pass: true });
163
+ return checks;
164
+ }
165
+ async function verifyAgentWeb(baseUrl, spec) {
166
+ const base = baseUrl.replace(/\/+$/, "");
167
+ if (!process.env.ANTHROPIC_API_KEY) {
168
+ return report("agent-web", baseUrl, [
169
+ {
170
+ name: "ANTHROPIC_API_KEY set",
171
+ pass: false,
172
+ detail: "set ANTHROPIC_API_KEY to run agent-web"
173
+ }
174
+ ]);
175
+ }
176
+ let chromium;
177
+ let Anthropic;
178
+ try {
179
+ ({ chromium } = await import("playwright"));
180
+ } catch {
181
+ return report("agent-web", baseUrl, [
182
+ {
183
+ name: "playwright available",
184
+ pass: false,
185
+ detail: "pnpm add playwright && playwright install chromium"
186
+ }
187
+ ]);
188
+ }
189
+ try {
190
+ const sdkName = "@anthropic-ai/sdk";
191
+ const sdk = await import(sdkName);
192
+ Anthropic = sdk.default;
193
+ } catch {
194
+ return report("agent-web", baseUrl, [
195
+ { name: "@anthropic-ai/sdk available", pass: false, detail: "pnpm add @anthropic-ai/sdk" }
196
+ ]);
197
+ }
198
+ const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
199
+ let browser;
200
+ try {
201
+ browser = await chromium.launch({ headless: !spec.headed });
202
+ } catch (e) {
203
+ return report("agent-web", baseUrl, [
204
+ {
205
+ name: "launch browser",
206
+ pass: false,
207
+ detail: `${msg(e)} (try \`playwright install chromium\`)`
208
+ }
209
+ ]);
210
+ }
211
+ const checks = [];
212
+ try {
213
+ for (const scenario of spec.scenarios) {
214
+ const page = await browser.newPage();
215
+ try {
216
+ checks.push(...await runScenario(client, page, base, spec, scenario));
217
+ } catch (e) {
218
+ checks.push({ name: `[${scenario.name}]`, pass: false, detail: msg(e) });
219
+ } finally {
220
+ await page.close();
221
+ }
222
+ }
223
+ } finally {
224
+ await browser.close();
225
+ }
226
+ return report("agent-web", baseUrl, checks);
227
+ }
228
+
229
+ export {
230
+ verifyAgentWeb
231
+ };
@@ -0,0 +1,61 @@
1
+ import {
2
+ msg,
3
+ report
4
+ } from "./chunk-QFKE5JKC.js";
5
+
6
+ // ../packages/verify/src/playwright.ts
7
+ async function verifyPlaywright(baseUrl, spec) {
8
+ let chromium;
9
+ try {
10
+ ({ chromium } = await import("playwright"));
11
+ } catch {
12
+ return report("playwright", baseUrl, [
13
+ {
14
+ name: "playwright available",
15
+ pass: false,
16
+ detail: "playwright not installed \u2014 run `pnpm add playwright && pnpm exec playwright install chromium`"
17
+ }
18
+ ]);
19
+ }
20
+ const base = baseUrl.replace(/\/+$/, "");
21
+ const checks = [];
22
+ let browser;
23
+ try {
24
+ browser = await chromium.launch();
25
+ } catch (e) {
26
+ return report("playwright", baseUrl, [
27
+ {
28
+ name: "launch browser",
29
+ pass: false,
30
+ detail: `${msg(e)} (try \`playwright install chromium\`)`
31
+ }
32
+ ]);
33
+ }
34
+ try {
35
+ for (const path of spec.renders) {
36
+ const page = await browser.newPage();
37
+ try {
38
+ const res = await page.goto(base + path, { waitUntil: "domcontentloaded" });
39
+ const ok = res?.ok() ?? false;
40
+ const aria = await page.locator("body").ariaSnapshot();
41
+ const nonEmpty = aria.trim().length > 0;
42
+ checks.push({
43
+ name: `renders ${path}`,
44
+ pass: ok && nonEmpty,
45
+ detail: !ok ? `status ${res?.status() ?? "none"}` : nonEmpty ? void 0 : "empty accessibility tree"
46
+ });
47
+ } catch (e) {
48
+ checks.push({ name: `renders ${path}`, pass: false, detail: msg(e) });
49
+ } finally {
50
+ await page.close();
51
+ }
52
+ }
53
+ } finally {
54
+ await browser.close();
55
+ }
56
+ return report("playwright", baseUrl, checks);
57
+ }
58
+
59
+ export {
60
+ verifyPlaywright
61
+ };