@rtrentjones/greenlight 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/skills/provider-neon/SKILL.md +5 -1
- package/dist/{agent-web-3FTO2TLJ.js → agent-web-BG5ZIVAB.js} +1 -1
- package/dist/bin.js +50 -25
- package/dist/{chunk-XWTOJHLV.js → chunk-3A6F2JNP.js} +35 -7
- package/dist/{chunk-OBWWE7GE.js → chunk-FZH2YQPJ.js} +50 -2
- package/dist/{chunk-KVOI4UL2.js → chunk-IYEIZYI5.js} +18 -3
- package/dist/{eval-44S2BATV.js → eval-YZXJSUKH.js} +3 -1
- package/dist/index.js +3 -3
- package/package.json +5 -5
|
@@ -37,7 +37,11 @@ plain `.sql`); the app's own build runs its migrate against the wired **`DIRECT_
|
|
|
37
37
|
prod branch, preview build → preview branch; a failed migrate fails the build = a natural gate). The
|
|
38
38
|
**native Neon↔Vercel integration** owns ephemeral per-PR preview branches (don't put those in
|
|
39
39
|
Terraform). Greenlight's only role is the **dangerous-SQL gate**: run `greenlight migrations scan`
|
|
40
|
-
(auto-detects `supabase/migrations | migrations | drizzle/migrations | …`)
|
|
40
|
+
(auto-detects `supabase/migrations | migrations | drizzle/migrations | …`) **before** the migrate.
|
|
41
|
+
For a Vercel-git tool the migrate runs in the build, so wire the scan there — first in the build/migrate
|
|
42
|
+
script: `"migrate": "greenlight migrations scan && node scripts/migrate.mjs"`, `"build": "pnpm run
|
|
43
|
+
migrate && next build"` (add `@rtrentjones/greenlight` as a devDep so the bin resolves in the build).
|
|
44
|
+
`doctor` recognizes the scan wired in the build script **or** a workflow.
|
|
41
45
|
See [migrations.md](https://github.com/RTrentJones/greenlight/blob/main/docs/migrations.md).
|
|
42
46
|
|
|
43
47
|
## Sharing one DB + multi-account
|
package/dist/bin.js
CHANGED
|
@@ -7,13 +7,14 @@ import {
|
|
|
7
7
|
loadConfig,
|
|
8
8
|
resolveUrl,
|
|
9
9
|
scanSqlFiles,
|
|
10
|
+
toExportResult,
|
|
10
11
|
verifyAll
|
|
11
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-FZH2YQPJ.js";
|
|
12
13
|
import "./chunk-HX7VA25D.js";
|
|
13
14
|
import "./chunk-N3IKUCSF.js";
|
|
14
15
|
import "./chunk-KP3Y6WRU.js";
|
|
15
|
-
import "./chunk-
|
|
16
|
-
import "./chunk-
|
|
16
|
+
import "./chunk-IYEIZYI5.js";
|
|
17
|
+
import "./chunk-3A6F2JNP.js";
|
|
17
18
|
import "./chunk-QFKE5JKC.js";
|
|
18
19
|
|
|
19
20
|
// src/commands/add.ts
|
|
@@ -601,7 +602,7 @@ function tokensForTool(tool) {
|
|
|
601
602
|
}
|
|
602
603
|
|
|
603
604
|
// src/version.ts
|
|
604
|
-
var MODULE_REF = "v0.
|
|
605
|
+
var MODULE_REF = "v0.6.0";
|
|
605
606
|
var MODULE_SOURCE_BASE = "git::https://github.com/RTrentJones/greenlight.git//infra/modules";
|
|
606
607
|
function moduleSource(module, ref = MODULE_REF) {
|
|
607
608
|
return `${MODULE_SOURCE_BASE}/${module}?ref=${ref}`;
|
|
@@ -2475,7 +2476,7 @@ function conformanceChecks(t, root) {
|
|
|
2475
2476
|
const migBase = join6(root, toolDir);
|
|
2476
2477
|
const migDir = resolveMigrationsDir(void 0, migBase);
|
|
2477
2478
|
if (existsSync9(join6(migBase, migDir))) {
|
|
2478
|
-
const
|
|
2479
|
+
const inWorkflow = [join6(migBase, ".github/workflows"), join6(root, ".github/workflows")].some(
|
|
2479
2480
|
(d) => {
|
|
2480
2481
|
try {
|
|
2481
2482
|
return readdirSync4(d).filter((f) => /\.ya?ml$/.test(f)).some((f) => readFileSync8(join6(d, f), "utf8").includes("migrations scan"));
|
|
@@ -2484,10 +2485,19 @@ function conformanceChecks(t, root) {
|
|
|
2484
2485
|
}
|
|
2485
2486
|
}
|
|
2486
2487
|
);
|
|
2488
|
+
const inScripts = (() => {
|
|
2489
|
+
try {
|
|
2490
|
+
const pkg = JSON.parse(readFileSync8(join6(migBase, "package.json"), "utf8"));
|
|
2491
|
+
return Object.values(pkg.scripts ?? {}).some((s) => s.includes("migrations scan"));
|
|
2492
|
+
} catch {
|
|
2493
|
+
return false;
|
|
2494
|
+
}
|
|
2495
|
+
})();
|
|
2496
|
+
const wired = inWorkflow || inScripts;
|
|
2487
2497
|
out.push({
|
|
2488
2498
|
name: `${t.name}: migrations gate`,
|
|
2489
2499
|
status: wired ? "ok" : "warn",
|
|
2490
|
-
detail: wired ? `${migDir} scanned
|
|
2500
|
+
detail: wired ? `${migDir} scanned before apply (${inWorkflow ? "CI workflow" : "build script"})` : `${migDir} present but no workflow or build script runs \`greenlight migrations scan\` \u2014 wire the dangerous-SQL gate before the apply step`
|
|
2491
2501
|
});
|
|
2492
2502
|
}
|
|
2493
2503
|
}
|
|
@@ -2885,7 +2895,7 @@ import { setTimeout as sleep } from "timers/promises";
|
|
|
2885
2895
|
|
|
2886
2896
|
// src/commands/verify.ts
|
|
2887
2897
|
import { spawnSync } from "child_process";
|
|
2888
|
-
import { resolve as resolve9 } from "path";
|
|
2898
|
+
import { basename, resolve as resolve9 } from "path";
|
|
2889
2899
|
function defaultSpec(lane) {
|
|
2890
2900
|
switch (lane) {
|
|
2891
2901
|
case "astro":
|
|
@@ -2909,21 +2919,34 @@ function defaultSpec(lane) {
|
|
|
2909
2919
|
};
|
|
2910
2920
|
}
|
|
2911
2921
|
}
|
|
2912
|
-
function printReport(report) {
|
|
2913
|
-
|
|
2922
|
+
function printReport(report, log = console.log) {
|
|
2923
|
+
log(`verify ${report.mode} ${report.url}
|
|
2914
2924
|
`);
|
|
2915
2925
|
for (const c of report.checks) {
|
|
2916
|
-
|
|
2926
|
+
log(` ${c.pass ? "\u2714" : "\u2718"} ${c.name}${c.detail ? ` \u2014 ${c.detail}` : ""}`);
|
|
2917
2927
|
}
|
|
2918
|
-
|
|
2928
|
+
log(`
|
|
2919
2929
|
${report.pass ? "\u2714 PASS" : "\u2718 FAIL"}`);
|
|
2920
2930
|
if (!report.pass && report.logs) {
|
|
2921
|
-
|
|
2931
|
+
log(`
|
|
2922
2932
|
--- recent logs (${report.mode}) ---
|
|
2923
2933
|
${report.logs}
|
|
2924
2934
|
--- end logs ---`);
|
|
2925
2935
|
}
|
|
2926
2936
|
}
|
|
2937
|
+
function emitReports(reports, json, ctx) {
|
|
2938
|
+
const log = json ? console.error : console.log;
|
|
2939
|
+
for (const report of reports) printReport(report, log);
|
|
2940
|
+
const pass = allPass(reports);
|
|
2941
|
+
if (reports.length > 1) log(`
|
|
2942
|
+
${pass ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports.length} specs)`);
|
|
2943
|
+
if (json) process.stdout.write(`${JSON.stringify(toExportResult(reports, ctx))}
|
|
2944
|
+
`);
|
|
2945
|
+
process.exit(pass ? 0 : 1);
|
|
2946
|
+
}
|
|
2947
|
+
function gitSha() {
|
|
2948
|
+
return process.env.VERCEL_GIT_COMMIT_SHA ?? process.env.GITHUB_SHA ?? null;
|
|
2949
|
+
}
|
|
2927
2950
|
var LOG_TAIL_LINES = 50;
|
|
2928
2951
|
function redactSecrets(text, env = process.env) {
|
|
2929
2952
|
let out = text;
|
|
@@ -2964,6 +2987,9 @@ function flag6(args, name) {
|
|
|
2964
2987
|
const i = args.indexOf(name);
|
|
2965
2988
|
return i >= 0 ? args[i + 1] : void 0;
|
|
2966
2989
|
}
|
|
2990
|
+
function jsonFlag(args) {
|
|
2991
|
+
return args.includes("--json") || process.env.GREENLIGHT_VERIFY_JSON === "1";
|
|
2992
|
+
}
|
|
2967
2993
|
async function verifyCommand(args) {
|
|
2968
2994
|
const specPath = flag6(args, "--spec");
|
|
2969
2995
|
if (specPath) {
|
|
@@ -2978,12 +3004,12 @@ async function verifyCommand(args) {
|
|
|
2978
3004
|
toolDir: process.cwd()
|
|
2979
3005
|
});
|
|
2980
3006
|
attachFailureLogs(reports2, specs2, process.cwd());
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
3007
|
+
const tool = flag6(args, "--tool") ?? basename(specPath).replace(/\.config\.[tj]s$/, "");
|
|
3008
|
+
emitReports(reports2, jsonFlag(args), {
|
|
3009
|
+
tool,
|
|
3010
|
+
env: flag6(args, "--env") ?? "preview",
|
|
3011
|
+
gitSha: gitSha()
|
|
3012
|
+
});
|
|
2987
3013
|
}
|
|
2988
3014
|
const name = args[0];
|
|
2989
3015
|
if (!name || name.startsWith("-")) {
|
|
@@ -3016,12 +3042,11 @@ ${pass2 ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports2.length} specs)`);
|
|
|
3016
3042
|
const toolDir = resolve9(process.cwd(), entry.dir ?? ".");
|
|
3017
3043
|
const reports = await verifyAll(url, specs, { reachableTimeoutMs, toolDir });
|
|
3018
3044
|
attachFailureLogs(reports, specs, toolDir);
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
process.exit(pass ? 0 : 1);
|
|
3045
|
+
emitReports(reports, jsonFlag(args), {
|
|
3046
|
+
tool: entry.name ?? name,
|
|
3047
|
+
env: override ? "preview" : flag6(args, "--env") ?? "preview",
|
|
3048
|
+
gitSha: gitSha()
|
|
3049
|
+
});
|
|
3025
3050
|
}
|
|
3026
3051
|
|
|
3027
3052
|
// src/commands/preview.ts
|
|
@@ -3378,7 +3403,7 @@ var HELP = `greenlight <command>
|
|
|
3378
3403
|
config load & validate the manifest, then print it
|
|
3379
3404
|
deploy <name> --env <env> build + deploy an entry via its target adapter
|
|
3380
3405
|
preview <name> [--port <n>] build + serve locally + verify (one command)
|
|
3381
|
-
verify <name> [--env <env> | --url <url>]
|
|
3406
|
+
verify <name> [--env <env> | --url <url>] [--json] run the verify harness (--json: standards-shaped result to stdout)
|
|
3382
3407
|
promote <name> [--perform] [--push] gated develop -> main fast-forward
|
|
3383
3408
|
status <name> last ship/deploy/verify run for a tool (via gh)
|
|
3384
3409
|
secrets gather <name> [--repo o/r] [--env e] guided, link-first token prompts -> GitHub secrets (no disk/logs)
|
|
@@ -14,6 +14,10 @@ function resultText(res) {
|
|
|
14
14
|
}
|
|
15
15
|
return JSON.stringify(res);
|
|
16
16
|
}
|
|
17
|
+
var clamp01 = (n) => {
|
|
18
|
+
const v = typeof n === "number" ? n : Number(n);
|
|
19
|
+
return Number.isFinite(v) ? Math.min(1, Math.max(0, v)) : 0;
|
|
20
|
+
};
|
|
17
21
|
function llmJudge(model) {
|
|
18
22
|
return async ({ rubric, result }) => {
|
|
19
23
|
if (!process.env.ANTHROPIC_API_KEY) throw new Error("ANTHROPIC_API_KEY not set");
|
|
@@ -27,7 +31,7 @@ function llmJudge(model) {
|
|
|
27
31
|
const resp = await client.messages.create({
|
|
28
32
|
model,
|
|
29
33
|
max_tokens: 512,
|
|
30
|
-
system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 1
|
|
34
|
+
system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 0..1 scale (1 = fully satisfies). Reply ONLY with JSON: {"score": <0..1>, "pass": <bool>, "rationale": "<one sentence>"}.',
|
|
31
35
|
messages: [{ role: "user", content: `RUBRIC:
|
|
32
36
|
${rubric}
|
|
33
37
|
|
|
@@ -38,12 +42,23 @@ ${result}` }]
|
|
|
38
42
|
const json = text.match(/\{[\s\S]*\}/);
|
|
39
43
|
if (!json) throw new Error(`judge returned no JSON: ${text.slice(0, 120)}`);
|
|
40
44
|
const parsed = JSON.parse(json[0]);
|
|
41
|
-
return {
|
|
45
|
+
return {
|
|
46
|
+
score: clamp01(parsed.score),
|
|
47
|
+
pass: Boolean(parsed.pass),
|
|
48
|
+
rationale: parsed.rationale ?? parsed.reason,
|
|
49
|
+
// `reason` = deprecated alias, one release
|
|
50
|
+
tokensIn: resp.usage?.input_tokens,
|
|
51
|
+
tokensOut: resp.usage?.output_tokens
|
|
52
|
+
};
|
|
42
53
|
};
|
|
43
54
|
}
|
|
44
55
|
async function verifyEval(baseUrl, spec, judge) {
|
|
45
|
-
const
|
|
56
|
+
const model = spec.model ?? "claude-sonnet-4-6";
|
|
57
|
+
const score = judge ?? llmJudge(model);
|
|
46
58
|
const checks = [];
|
|
59
|
+
const started = Date.now();
|
|
60
|
+
let tokensIn = 0;
|
|
61
|
+
let tokensOut = 0;
|
|
47
62
|
const client = new Client({ name: "greenlight-verify", version: "0.0.0" });
|
|
48
63
|
const transport = new StreamableHTTPClientTransport(new URL(baseUrl));
|
|
49
64
|
try {
|
|
@@ -53,15 +68,22 @@ async function verifyEval(baseUrl, spec, judge) {
|
|
|
53
68
|
}
|
|
54
69
|
try {
|
|
55
70
|
for (const c of spec.cases) {
|
|
56
|
-
const min = c.minScore ??
|
|
71
|
+
const min = c.minScore ?? 0.8;
|
|
57
72
|
try {
|
|
58
73
|
const res = await client.callTool({ name: c.tool, arguments: c.args ?? {} });
|
|
59
|
-
const
|
|
74
|
+
const result = resultText(res);
|
|
75
|
+
const verdict = await score({ rubric: c.rubric, result });
|
|
60
76
|
const pass = verdict.pass && verdict.score >= min;
|
|
77
|
+
const rationale = verdict.rationale ?? verdict.reason;
|
|
78
|
+
tokensIn += verdict.tokensIn ?? 0;
|
|
79
|
+
tokensOut += verdict.tokensOut ?? 0;
|
|
61
80
|
checks.push({
|
|
62
81
|
name: `eval: ${c.name}`,
|
|
63
82
|
pass,
|
|
64
|
-
|
|
83
|
+
score: verdict.score,
|
|
84
|
+
explanation: rationale,
|
|
85
|
+
output: result,
|
|
86
|
+
detail: `score ${verdict.score.toFixed(2)} (min ${min})${rationale ? ` \u2014 ${rationale}` : ""}`
|
|
65
87
|
});
|
|
66
88
|
} catch (e) {
|
|
67
89
|
checks.push({ name: `eval: ${c.name}`, pass: false, detail: msg(e) });
|
|
@@ -70,10 +92,16 @@ async function verifyEval(baseUrl, spec, judge) {
|
|
|
70
92
|
} finally {
|
|
71
93
|
await client.close();
|
|
72
94
|
}
|
|
73
|
-
return
|
|
95
|
+
return {
|
|
96
|
+
...report("eval", baseUrl, checks),
|
|
97
|
+
model,
|
|
98
|
+
durationMs: Date.now() - started,
|
|
99
|
+
...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
|
|
100
|
+
};
|
|
74
101
|
}
|
|
75
102
|
|
|
76
103
|
export {
|
|
104
|
+
clamp01,
|
|
77
105
|
llmJudge,
|
|
78
106
|
verifyEval
|
|
79
107
|
};
|
|
@@ -421,6 +421,53 @@ async function verifyApi(baseUrl, spec) {
|
|
|
421
421
|
);
|
|
422
422
|
}
|
|
423
423
|
|
|
424
|
+
// ../packages/verify/src/export.ts
|
|
425
|
+
var clamp01 = (n) => Math.min(1, Math.max(0, n));
|
|
426
|
+
function sumDefined(xs) {
|
|
427
|
+
const present = xs.filter((x) => typeof x === "number");
|
|
428
|
+
return present.length ? present.reduce((a, b) => a + b, 0) : void 0;
|
|
429
|
+
}
|
|
430
|
+
function toExportResult(reports, ctx) {
|
|
431
|
+
const checks = [];
|
|
432
|
+
for (const r of reports) {
|
|
433
|
+
for (const c of r.checks) {
|
|
434
|
+
checks.push({
|
|
435
|
+
name: c.name,
|
|
436
|
+
passed: c.pass,
|
|
437
|
+
input: c.input ?? null,
|
|
438
|
+
expected: c.expected ?? null,
|
|
439
|
+
output: c.output ?? null,
|
|
440
|
+
"eval.score": c.score != null ? clamp01(c.score) : c.pass ? 1 : 0,
|
|
441
|
+
"eval.explanation": c.explanation ?? null
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
const passed = reports.length > 0 && reports.every((r) => r.pass);
|
|
446
|
+
const passRate = checks.length === 0 ? 0 : checks.filter((c) => c.passed).length / checks.length;
|
|
447
|
+
const model = reports.find((r) => r.model)?.model;
|
|
448
|
+
const tokensIn = sumDefined(reports.map((r) => r.tokensIn));
|
|
449
|
+
const tokensOut = sumDefined(reports.map((r) => r.tokensOut));
|
|
450
|
+
const cost = sumDefined(reports.map((r) => r.costUsd));
|
|
451
|
+
const durationMs = sumDefined(reports.map((r) => r.durationMs));
|
|
452
|
+
const attributes = {};
|
|
453
|
+
if (model) attributes["gen_ai.request.model"] = model;
|
|
454
|
+
if (tokensIn != null) attributes["gen_ai.usage.input_tokens"] = tokensIn;
|
|
455
|
+
if (tokensOut != null) attributes["gen_ai.usage.output_tokens"] = tokensOut;
|
|
456
|
+
if (cost != null) attributes["gen_ai.response.cost"] = cost;
|
|
457
|
+
return {
|
|
458
|
+
schemaVersion: "1",
|
|
459
|
+
tool: ctx.tool,
|
|
460
|
+
mode: reports.map((r) => r.mode).join("+") || "verify",
|
|
461
|
+
env: ctx.env,
|
|
462
|
+
git_sha: ctx.gitSha ?? null,
|
|
463
|
+
passed,
|
|
464
|
+
pass_rate: passRate,
|
|
465
|
+
duration_ms: durationMs ?? null,
|
|
466
|
+
...Object.keys(attributes).length ? { attributes } : {},
|
|
467
|
+
checks
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
|
|
424
471
|
// ../packages/verify/src/index.ts
|
|
425
472
|
function defineVerify(spec) {
|
|
426
473
|
return spec;
|
|
@@ -456,11 +503,11 @@ async function verify(baseUrl, spec, opts) {
|
|
|
456
503
|
return verifyTest2(spec, opts?.toolDir ?? process.cwd());
|
|
457
504
|
}
|
|
458
505
|
case "agent-web": {
|
|
459
|
-
const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-
|
|
506
|
+
const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-BG5ZIVAB.js");
|
|
460
507
|
return verifyAgentWeb2(baseUrl, spec);
|
|
461
508
|
}
|
|
462
509
|
case "eval": {
|
|
463
|
-
const { verifyEval: verifyEval2 } = await import("./eval-
|
|
510
|
+
const { verifyEval: verifyEval2 } = await import("./eval-YZXJSUKH.js");
|
|
464
511
|
return verifyEval2(baseUrl, spec);
|
|
465
512
|
}
|
|
466
513
|
}
|
|
@@ -487,6 +534,7 @@ export {
|
|
|
487
534
|
loadConfig,
|
|
488
535
|
resolveUrl,
|
|
489
536
|
scanSqlFiles,
|
|
537
|
+
toExportResult,
|
|
490
538
|
defineVerify,
|
|
491
539
|
verifyAll,
|
|
492
540
|
allPass
|
|
@@ -124,6 +124,8 @@ async function runScenario(client, page, base, spec, scenario) {
|
|
|
124
124
|
const messages = [{ role: "user", content: `Task: ${scenario.task}` }];
|
|
125
125
|
const maxSteps = spec.maxSteps ?? 12;
|
|
126
126
|
let finish = null;
|
|
127
|
+
let tokensIn = 0;
|
|
128
|
+
let tokensOut = 0;
|
|
127
129
|
for (let step = 0; step < maxSteps && !finish; step++) {
|
|
128
130
|
const resp = await client.messages.create({
|
|
129
131
|
model: spec.model ?? "claude-sonnet-4-6",
|
|
@@ -132,6 +134,8 @@ async function runScenario(client, page, base, spec, scenario) {
|
|
|
132
134
|
tools: TOOLS,
|
|
133
135
|
messages
|
|
134
136
|
});
|
|
137
|
+
tokensIn += resp.usage?.input_tokens ?? 0;
|
|
138
|
+
tokensOut += resp.usage?.output_tokens ?? 0;
|
|
135
139
|
const blocks = resp.content;
|
|
136
140
|
messages.push({ role: "assistant", content: blocks });
|
|
137
141
|
const toolUses = blocks.filter((b) => b.type === "tool_use");
|
|
@@ -160,7 +164,7 @@ async function runScenario(client, page, base, spec, scenario) {
|
|
|
160
164
|
checks.push({ ...c, name: `${tag} ${c.name}` });
|
|
161
165
|
}
|
|
162
166
|
if (checks.length === 0) checks.push({ name: `${tag} agent succeeded`, pass: true });
|
|
163
|
-
return checks;
|
|
167
|
+
return { checks, tokensIn, tokensOut };
|
|
164
168
|
}
|
|
165
169
|
async function verifyAgentWeb(baseUrl, spec) {
|
|
166
170
|
const base = baseUrl.replace(/\/+$/, "");
|
|
@@ -213,11 +217,17 @@ async function verifyAgentWeb(baseUrl, spec) {
|
|
|
213
217
|
]);
|
|
214
218
|
}
|
|
215
219
|
const checks = [];
|
|
220
|
+
const started = Date.now();
|
|
221
|
+
let tokensIn = 0;
|
|
222
|
+
let tokensOut = 0;
|
|
216
223
|
try {
|
|
217
224
|
for (const scenario of spec.scenarios) {
|
|
218
225
|
const page = await browser.newPage();
|
|
219
226
|
try {
|
|
220
|
-
|
|
227
|
+
const r = await runScenario(client, page, base, spec, scenario);
|
|
228
|
+
checks.push(...r.checks);
|
|
229
|
+
tokensIn += r.tokensIn;
|
|
230
|
+
tokensOut += r.tokensOut;
|
|
221
231
|
} catch (e) {
|
|
222
232
|
checks.push({ name: `[${scenario.name}]`, pass: false, detail: msg(e) });
|
|
223
233
|
} finally {
|
|
@@ -227,7 +237,12 @@ async function verifyAgentWeb(baseUrl, spec) {
|
|
|
227
237
|
} finally {
|
|
228
238
|
await browser.close();
|
|
229
239
|
}
|
|
230
|
-
return
|
|
240
|
+
return {
|
|
241
|
+
...report("agent-web", baseUrl, checks),
|
|
242
|
+
model: spec.model ?? "claude-sonnet-4-6",
|
|
243
|
+
durationMs: Date.now() - started,
|
|
244
|
+
...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
|
|
245
|
+
};
|
|
231
246
|
}
|
|
232
247
|
|
|
233
248
|
export {
|
package/dist/index.js
CHANGED
|
@@ -2,12 +2,12 @@ import {
|
|
|
2
2
|
defineConfig,
|
|
3
3
|
defineVerify,
|
|
4
4
|
loadConfig
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-FZH2YQPJ.js";
|
|
6
6
|
import "./chunk-HX7VA25D.js";
|
|
7
7
|
import "./chunk-N3IKUCSF.js";
|
|
8
8
|
import "./chunk-KP3Y6WRU.js";
|
|
9
|
-
import "./chunk-
|
|
10
|
-
import "./chunk-
|
|
9
|
+
import "./chunk-IYEIZYI5.js";
|
|
10
|
+
import "./chunk-3A6F2JNP.js";
|
|
11
11
|
import "./chunk-QFKE5JKC.js";
|
|
12
12
|
export {
|
|
13
13
|
defineConfig,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rtrentjones/greenlight",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Greenlight CLI — setup and lifecycle for the harness.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -31,10 +31,10 @@
|
|
|
31
31
|
"@anthropic-ai/sdk": "^0.69.0"
|
|
32
32
|
},
|
|
33
33
|
"devDependencies": {
|
|
34
|
-
"@rtrentjones/greenlight-adapters": "0.
|
|
35
|
-
"@rtrentjones/greenlight-
|
|
36
|
-
"@rtrentjones/greenlight-
|
|
37
|
-
"@rtrentjones/greenlight-
|
|
34
|
+
"@rtrentjones/greenlight-adapters": "0.6.0",
|
|
35
|
+
"@rtrentjones/greenlight-shared": "0.6.0",
|
|
36
|
+
"@rtrentjones/greenlight-verify": "0.6.0",
|
|
37
|
+
"@rtrentjones/greenlight-loop": "0.6.0"
|
|
38
38
|
},
|
|
39
39
|
"scripts": {
|
|
40
40
|
"build": "node scripts/copy-assets.mjs && tsup",
|