@rtrentjones/greenlight 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,7 +37,11 @@ plain `.sql`); the app's own build runs its migrate against the wired **`DIRECT_
37
37
  prod branch, preview build → preview branch; a failed migrate fails the build = a natural gate). The
38
38
  **native Neon↔Vercel integration** owns ephemeral per-PR preview branches (don't put those in
39
39
  Terraform). Greenlight's only role is the **dangerous-SQL gate**: run `greenlight migrations scan`
40
- (auto-detects `supabase/migrations | migrations | drizzle/migrations | …`) in CI before the migrate.
40
+ (auto-detects `supabase/migrations | migrations | drizzle/migrations | …`) **before** the migrate.
41
+ For a Vercel-git tool the migrate runs in the build, so wire the scan there — first in the build/migrate
42
+ script: `"migrate": "greenlight migrations scan && node scripts/migrate.mjs"`, `"build": "pnpm run
43
+ migrate && next build"` (add `@rtrentjones/greenlight` as a devDep so the bin resolves in the build).
44
+ `doctor` recognizes the scan wired in the build script **or** a workflow.
41
45
  See [migrations.md](https://github.com/RTrentJones/greenlight/blob/main/docs/migrations.md).
42
46
 
43
47
  ## Sharing one DB + multi-account
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  verifyAgentWeb
3
- } from "./chunk-KVOI4UL2.js";
3
+ } from "./chunk-IYEIZYI5.js";
4
4
  import "./chunk-QFKE5JKC.js";
5
5
  export {
6
6
  verifyAgentWeb
package/dist/bin.js CHANGED
@@ -7,13 +7,14 @@ import {
7
7
  loadConfig,
8
8
  resolveUrl,
9
9
  scanSqlFiles,
10
+ toExportResult,
10
11
  verifyAll
11
- } from "./chunk-OBWWE7GE.js";
12
+ } from "./chunk-FZH2YQPJ.js";
12
13
  import "./chunk-HX7VA25D.js";
13
14
  import "./chunk-N3IKUCSF.js";
14
15
  import "./chunk-KP3Y6WRU.js";
15
- import "./chunk-KVOI4UL2.js";
16
- import "./chunk-XWTOJHLV.js";
16
+ import "./chunk-IYEIZYI5.js";
17
+ import "./chunk-3A6F2JNP.js";
17
18
  import "./chunk-QFKE5JKC.js";
18
19
 
19
20
  // src/commands/add.ts
@@ -601,7 +602,7 @@ function tokensForTool(tool) {
601
602
  }
602
603
 
603
604
  // src/version.ts
604
- var MODULE_REF = "v0.5.0";
605
+ var MODULE_REF = "v0.6.0";
605
606
  var MODULE_SOURCE_BASE = "git::https://github.com/RTrentJones/greenlight.git//infra/modules";
606
607
  function moduleSource(module, ref = MODULE_REF) {
607
608
  return `${MODULE_SOURCE_BASE}/${module}?ref=${ref}`;
@@ -2475,7 +2476,7 @@ function conformanceChecks(t, root) {
2475
2476
  const migBase = join6(root, toolDir);
2476
2477
  const migDir = resolveMigrationsDir(void 0, migBase);
2477
2478
  if (existsSync9(join6(migBase, migDir))) {
2478
- const wired = [join6(migBase, ".github/workflows"), join6(root, ".github/workflows")].some(
2479
+ const inWorkflow = [join6(migBase, ".github/workflows"), join6(root, ".github/workflows")].some(
2479
2480
  (d) => {
2480
2481
  try {
2481
2482
  return readdirSync4(d).filter((f) => /\.ya?ml$/.test(f)).some((f) => readFileSync8(join6(d, f), "utf8").includes("migrations scan"));
@@ -2484,10 +2485,19 @@ function conformanceChecks(t, root) {
2484
2485
  }
2485
2486
  }
2486
2487
  );
2488
+ const inScripts = (() => {
2489
+ try {
2490
+ const pkg = JSON.parse(readFileSync8(join6(migBase, "package.json"), "utf8"));
2491
+ return Object.values(pkg.scripts ?? {}).some((s) => s.includes("migrations scan"));
2492
+ } catch {
2493
+ return false;
2494
+ }
2495
+ })();
2496
+ const wired = inWorkflow || inScripts;
2487
2497
  out.push({
2488
2498
  name: `${t.name}: migrations gate`,
2489
2499
  status: wired ? "ok" : "warn",
2490
- detail: wired ? `${migDir} scanned in CI` : `${migDir} present but no workflow runs \`greenlight migrations scan\` \u2014 wire the dangerous-SQL gate before the apply step`
2500
+ detail: wired ? `${migDir} scanned before apply (${inWorkflow ? "CI workflow" : "build script"})` : `${migDir} present but no workflow or build script runs \`greenlight migrations scan\` \u2014 wire the dangerous-SQL gate before the apply step`
2491
2501
  });
2492
2502
  }
2493
2503
  }
@@ -2885,7 +2895,7 @@ import { setTimeout as sleep } from "timers/promises";
2885
2895
 
2886
2896
  // src/commands/verify.ts
2887
2897
  import { spawnSync } from "child_process";
2888
- import { resolve as resolve9 } from "path";
2898
+ import { basename, resolve as resolve9 } from "path";
2889
2899
  function defaultSpec(lane) {
2890
2900
  switch (lane) {
2891
2901
  case "astro":
@@ -2909,21 +2919,34 @@ function defaultSpec(lane) {
2909
2919
  };
2910
2920
  }
2911
2921
  }
2912
- function printReport(report) {
2913
- console.log(`verify ${report.mode} ${report.url}
2922
+ function printReport(report, log = console.log) {
2923
+ log(`verify ${report.mode} ${report.url}
2914
2924
  `);
2915
2925
  for (const c of report.checks) {
2916
- console.log(` ${c.pass ? "\u2714" : "\u2718"} ${c.name}${c.detail ? ` \u2014 ${c.detail}` : ""}`);
2926
+ log(` ${c.pass ? "\u2714" : "\u2718"} ${c.name}${c.detail ? ` \u2014 ${c.detail}` : ""}`);
2917
2927
  }
2918
- console.log(`
2928
+ log(`
2919
2929
  ${report.pass ? "\u2714 PASS" : "\u2718 FAIL"}`);
2920
2930
  if (!report.pass && report.logs) {
2921
- console.log(`
2931
+ log(`
2922
2932
  --- recent logs (${report.mode}) ---
2923
2933
  ${report.logs}
2924
2934
  --- end logs ---`);
2925
2935
  }
2926
2936
  }
2937
+ function emitReports(reports, json, ctx) {
2938
+ const log = json ? console.error : console.log;
2939
+ for (const report of reports) printReport(report, log);
2940
+ const pass = allPass(reports);
2941
+ if (reports.length > 1) log(`
2942
+ ${pass ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports.length} specs)`);
2943
+ if (json) process.stdout.write(`${JSON.stringify(toExportResult(reports, ctx))}
2944
+ `);
2945
+ process.exit(pass ? 0 : 1);
2946
+ }
2947
+ function gitSha() {
2948
+ return process.env.VERCEL_GIT_COMMIT_SHA ?? process.env.GITHUB_SHA ?? null;
2949
+ }
2927
2950
  var LOG_TAIL_LINES = 50;
2928
2951
  function redactSecrets(text, env = process.env) {
2929
2952
  let out = text;
@@ -2964,6 +2987,9 @@ function flag6(args, name) {
2964
2987
  const i = args.indexOf(name);
2965
2988
  return i >= 0 ? args[i + 1] : void 0;
2966
2989
  }
2990
+ function jsonFlag(args) {
2991
+ return args.includes("--json") || process.env.GREENLIGHT_VERIFY_JSON === "1";
2992
+ }
2967
2993
  async function verifyCommand(args) {
2968
2994
  const specPath = flag6(args, "--spec");
2969
2995
  if (specPath) {
@@ -2978,12 +3004,12 @@ async function verifyCommand(args) {
2978
3004
  toolDir: process.cwd()
2979
3005
  });
2980
3006
  attachFailureLogs(reports2, specs2, process.cwd());
2981
- for (const report of reports2) printReport(report);
2982
- const pass2 = allPass(reports2);
2983
- if (reports2.length > 1)
2984
- console.log(`
2985
- ${pass2 ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports2.length} specs)`);
2986
- process.exit(pass2 ? 0 : 1);
3007
+ const tool = flag6(args, "--tool") ?? basename(specPath).replace(/\.config\.[tj]s$/, "");
3008
+ emitReports(reports2, jsonFlag(args), {
3009
+ tool,
3010
+ env: flag6(args, "--env") ?? "preview",
3011
+ gitSha: gitSha()
3012
+ });
2987
3013
  }
2988
3014
  const name = args[0];
2989
3015
  if (!name || name.startsWith("-")) {
@@ -3016,12 +3042,11 @@ ${pass2 ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports2.length} specs)`);
3016
3042
  const toolDir = resolve9(process.cwd(), entry.dir ?? ".");
3017
3043
  const reports = await verifyAll(url, specs, { reachableTimeoutMs, toolDir });
3018
3044
  attachFailureLogs(reports, specs, toolDir);
3019
- for (const report of reports) printReport(report);
3020
- const pass = allPass(reports);
3021
- if (reports.length > 1)
3022
- console.log(`
3023
- ${pass ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports.length} specs)`);
3024
- process.exit(pass ? 0 : 1);
3045
+ emitReports(reports, jsonFlag(args), {
3046
+ tool: entry.name ?? name,
3047
+ env: override ? "preview" : flag6(args, "--env") ?? "preview",
3048
+ gitSha: gitSha()
3049
+ });
3025
3050
  }
3026
3051
 
3027
3052
  // src/commands/preview.ts
@@ -3378,7 +3403,7 @@ var HELP = `greenlight <command>
3378
3403
  config load & validate the manifest, then print it
3379
3404
  deploy <name> --env <env> build + deploy an entry via its target adapter
3380
3405
  preview <name> [--port <n>] build + serve locally + verify (one command)
3381
- verify <name> [--env <env> | --url <url>] run the verify harness against the URL
3406
+ verify <name> [--env <env> | --url <url>] [--json] run the verify harness (--json: standards-shaped result to stdout)
3382
3407
  promote <name> [--perform] [--push] gated develop -> main fast-forward
3383
3408
  status <name> last ship/deploy/verify run for a tool (via gh)
3384
3409
  secrets gather <name> [--repo o/r] [--env e] guided, link-first token prompts -> GitHub secrets (no disk/logs)
@@ -14,6 +14,10 @@ function resultText(res) {
14
14
  }
15
15
  return JSON.stringify(res);
16
16
  }
17
+ var clamp01 = (n) => {
18
+ const v = typeof n === "number" ? n : Number(n);
19
+ return Number.isFinite(v) ? Math.min(1, Math.max(0, v)) : 0;
20
+ };
17
21
  function llmJudge(model) {
18
22
  return async ({ rubric, result }) => {
19
23
  if (!process.env.ANTHROPIC_API_KEY) throw new Error("ANTHROPIC_API_KEY not set");
@@ -27,7 +31,7 @@ function llmJudge(model) {
27
31
  const resp = await client.messages.create({
28
32
  model,
29
33
  max_tokens: 512,
30
- system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 1\u20135 scale (5 = fully satisfies). Reply ONLY with JSON: {"score": <1-5>, "pass": <bool>, "reason": "<short>"}.',
34
+ system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 0..1 scale (1 = fully satisfies). Reply ONLY with JSON: {"score": <0..1>, "pass": <bool>, "rationale": "<one sentence>"}.',
31
35
  messages: [{ role: "user", content: `RUBRIC:
32
36
  ${rubric}
33
37
 
@@ -38,12 +42,23 @@ ${result}` }]
38
42
  const json = text.match(/\{[\s\S]*\}/);
39
43
  if (!json) throw new Error(`judge returned no JSON: ${text.slice(0, 120)}`);
40
44
  const parsed = JSON.parse(json[0]);
41
- return { score: Number(parsed.score) || 0, pass: Boolean(parsed.pass), reason: parsed.reason };
45
+ return {
46
+ score: clamp01(parsed.score),
47
+ pass: Boolean(parsed.pass),
48
+ rationale: parsed.rationale ?? parsed.reason,
49
+ // `reason` = deprecated alias, one release
50
+ tokensIn: resp.usage?.input_tokens,
51
+ tokensOut: resp.usage?.output_tokens
52
+ };
42
53
  };
43
54
  }
44
55
  async function verifyEval(baseUrl, spec, judge) {
45
- const score = judge ?? llmJudge(spec.model ?? "claude-sonnet-4-6");
56
+ const model = spec.model ?? "claude-sonnet-4-6";
57
+ const score = judge ?? llmJudge(model);
46
58
  const checks = [];
59
+ const started = Date.now();
60
+ let tokensIn = 0;
61
+ let tokensOut = 0;
47
62
  const client = new Client({ name: "greenlight-verify", version: "0.0.0" });
48
63
  const transport = new StreamableHTTPClientTransport(new URL(baseUrl));
49
64
  try {
@@ -53,15 +68,22 @@ async function verifyEval(baseUrl, spec, judge) {
53
68
  }
54
69
  try {
55
70
  for (const c of spec.cases) {
56
- const min = c.minScore ?? 4;
71
+ const min = c.minScore ?? 0.8;
57
72
  try {
58
73
  const res = await client.callTool({ name: c.tool, arguments: c.args ?? {} });
59
- const verdict = await score({ rubric: c.rubric, result: resultText(res) });
74
+ const result = resultText(res);
75
+ const verdict = await score({ rubric: c.rubric, result });
60
76
  const pass = verdict.pass && verdict.score >= min;
77
+ const rationale = verdict.rationale ?? verdict.reason;
78
+ tokensIn += verdict.tokensIn ?? 0;
79
+ tokensOut += verdict.tokensOut ?? 0;
61
80
  checks.push({
62
81
  name: `eval: ${c.name}`,
63
82
  pass,
64
- detail: `score ${verdict.score}/5 (min ${min})${verdict.reason ? ` \u2014 ${verdict.reason}` : ""}`
83
+ score: verdict.score,
84
+ explanation: rationale,
85
+ output: result,
86
+ detail: `score ${verdict.score.toFixed(2)} (min ${min})${rationale ? ` \u2014 ${rationale}` : ""}`
65
87
  });
66
88
  } catch (e) {
67
89
  checks.push({ name: `eval: ${c.name}`, pass: false, detail: msg(e) });
@@ -70,10 +92,16 @@ async function verifyEval(baseUrl, spec, judge) {
70
92
  } finally {
71
93
  await client.close();
72
94
  }
73
- return report("eval", baseUrl, checks);
95
+ return {
96
+ ...report("eval", baseUrl, checks),
97
+ model,
98
+ durationMs: Date.now() - started,
99
+ ...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
100
+ };
74
101
  }
75
102
 
76
103
  export {
104
+ clamp01,
77
105
  llmJudge,
78
106
  verifyEval
79
107
  };
@@ -421,6 +421,53 @@ async function verifyApi(baseUrl, spec) {
421
421
  );
422
422
  }
423
423
 
424
+ // ../packages/verify/src/export.ts
425
+ var clamp01 = (n) => Math.min(1, Math.max(0, n));
426
+ function sumDefined(xs) {
427
+ const present = xs.filter((x) => typeof x === "number");
428
+ return present.length ? present.reduce((a, b) => a + b, 0) : void 0;
429
+ }
430
+ function toExportResult(reports, ctx) {
431
+ const checks = [];
432
+ for (const r of reports) {
433
+ for (const c of r.checks) {
434
+ checks.push({
435
+ name: c.name,
436
+ passed: c.pass,
437
+ input: c.input ?? null,
438
+ expected: c.expected ?? null,
439
+ output: c.output ?? null,
440
+ "eval.score": c.score != null ? clamp01(c.score) : c.pass ? 1 : 0,
441
+ "eval.explanation": c.explanation ?? null
442
+ });
443
+ }
444
+ }
445
+ const passed = reports.length > 0 && reports.every((r) => r.pass);
446
+ const passRate = checks.length === 0 ? 0 : checks.filter((c) => c.passed).length / checks.length;
447
+ const model = reports.find((r) => r.model)?.model;
448
+ const tokensIn = sumDefined(reports.map((r) => r.tokensIn));
449
+ const tokensOut = sumDefined(reports.map((r) => r.tokensOut));
450
+ const cost = sumDefined(reports.map((r) => r.costUsd));
451
+ const durationMs = sumDefined(reports.map((r) => r.durationMs));
452
+ const attributes = {};
453
+ if (model) attributes["gen_ai.request.model"] = model;
454
+ if (tokensIn != null) attributes["gen_ai.usage.input_tokens"] = tokensIn;
455
+ if (tokensOut != null) attributes["gen_ai.usage.output_tokens"] = tokensOut;
456
+ if (cost != null) attributes["gen_ai.response.cost"] = cost;
457
+ return {
458
+ schemaVersion: "1",
459
+ tool: ctx.tool,
460
+ mode: reports.map((r) => r.mode).join("+") || "verify",
461
+ env: ctx.env,
462
+ git_sha: ctx.gitSha ?? null,
463
+ passed,
464
+ pass_rate: passRate,
465
+ duration_ms: durationMs ?? null,
466
+ ...Object.keys(attributes).length ? { attributes } : {},
467
+ checks
468
+ };
469
+ }
470
+
424
471
  // ../packages/verify/src/index.ts
425
472
  function defineVerify(spec) {
426
473
  return spec;
@@ -456,11 +503,11 @@ async function verify(baseUrl, spec, opts) {
456
503
  return verifyTest2(spec, opts?.toolDir ?? process.cwd());
457
504
  }
458
505
  case "agent-web": {
459
- const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-3FTO2TLJ.js");
506
+ const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-BG5ZIVAB.js");
460
507
  return verifyAgentWeb2(baseUrl, spec);
461
508
  }
462
509
  case "eval": {
463
- const { verifyEval: verifyEval2 } = await import("./eval-44S2BATV.js");
510
+ const { verifyEval: verifyEval2 } = await import("./eval-YZXJSUKH.js");
464
511
  return verifyEval2(baseUrl, spec);
465
512
  }
466
513
  }
@@ -487,6 +534,7 @@ export {
487
534
  loadConfig,
488
535
  resolveUrl,
489
536
  scanSqlFiles,
537
+ toExportResult,
490
538
  defineVerify,
491
539
  verifyAll,
492
540
  allPass
@@ -124,6 +124,8 @@ async function runScenario(client, page, base, spec, scenario) {
124
124
  const messages = [{ role: "user", content: `Task: ${scenario.task}` }];
125
125
  const maxSteps = spec.maxSteps ?? 12;
126
126
  let finish = null;
127
+ let tokensIn = 0;
128
+ let tokensOut = 0;
127
129
  for (let step = 0; step < maxSteps && !finish; step++) {
128
130
  const resp = await client.messages.create({
129
131
  model: spec.model ?? "claude-sonnet-4-6",
@@ -132,6 +134,8 @@ async function runScenario(client, page, base, spec, scenario) {
132
134
  tools: TOOLS,
133
135
  messages
134
136
  });
137
+ tokensIn += resp.usage?.input_tokens ?? 0;
138
+ tokensOut += resp.usage?.output_tokens ?? 0;
135
139
  const blocks = resp.content;
136
140
  messages.push({ role: "assistant", content: blocks });
137
141
  const toolUses = blocks.filter((b) => b.type === "tool_use");
@@ -160,7 +164,7 @@ async function runScenario(client, page, base, spec, scenario) {
160
164
  checks.push({ ...c, name: `${tag} ${c.name}` });
161
165
  }
162
166
  if (checks.length === 0) checks.push({ name: `${tag} agent succeeded`, pass: true });
163
- return checks;
167
+ return { checks, tokensIn, tokensOut };
164
168
  }
165
169
  async function verifyAgentWeb(baseUrl, spec) {
166
170
  const base = baseUrl.replace(/\/+$/, "");
@@ -213,11 +217,17 @@ async function verifyAgentWeb(baseUrl, spec) {
213
217
  ]);
214
218
  }
215
219
  const checks = [];
220
+ const started = Date.now();
221
+ let tokensIn = 0;
222
+ let tokensOut = 0;
216
223
  try {
217
224
  for (const scenario of spec.scenarios) {
218
225
  const page = await browser.newPage();
219
226
  try {
220
- checks.push(...await runScenario(client, page, base, spec, scenario));
227
+ const r = await runScenario(client, page, base, spec, scenario);
228
+ checks.push(...r.checks);
229
+ tokensIn += r.tokensIn;
230
+ tokensOut += r.tokensOut;
221
231
  } catch (e) {
222
232
  checks.push({ name: `[${scenario.name}]`, pass: false, detail: msg(e) });
223
233
  } finally {
@@ -227,7 +237,12 @@ async function verifyAgentWeb(baseUrl, spec) {
227
237
  } finally {
228
238
  await browser.close();
229
239
  }
230
- return report("agent-web", baseUrl, checks);
240
+ return {
241
+ ...report("agent-web", baseUrl, checks),
242
+ model: spec.model ?? "claude-sonnet-4-6",
243
+ durationMs: Date.now() - started,
244
+ ...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
245
+ };
231
246
  }
232
247
 
233
248
  export {
@@ -1,9 +1,11 @@
1
1
  import {
2
+ clamp01,
2
3
  llmJudge,
3
4
  verifyEval
4
- } from "./chunk-XWTOJHLV.js";
5
+ } from "./chunk-3A6F2JNP.js";
5
6
  import "./chunk-QFKE5JKC.js";
6
7
  export {
8
+ clamp01,
7
9
  llmJudge,
8
10
  verifyEval
9
11
  };
package/dist/index.js CHANGED
@@ -2,12 +2,12 @@ import {
2
2
  defineConfig,
3
3
  defineVerify,
4
4
  loadConfig
5
- } from "./chunk-OBWWE7GE.js";
5
+ } from "./chunk-FZH2YQPJ.js";
6
6
  import "./chunk-HX7VA25D.js";
7
7
  import "./chunk-N3IKUCSF.js";
8
8
  import "./chunk-KP3Y6WRU.js";
9
- import "./chunk-KVOI4UL2.js";
10
- import "./chunk-XWTOJHLV.js";
9
+ import "./chunk-IYEIZYI5.js";
10
+ import "./chunk-3A6F2JNP.js";
11
11
  import "./chunk-QFKE5JKC.js";
12
12
  export {
13
13
  defineConfig,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rtrentjones/greenlight",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Greenlight CLI — setup and lifecycle for the harness.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -31,10 +31,10 @@
31
31
  "@anthropic-ai/sdk": "^0.69.0"
32
32
  },
33
33
  "devDependencies": {
34
- "@rtrentjones/greenlight-adapters": "0.5.0",
35
- "@rtrentjones/greenlight-loop": "0.5.0",
36
- "@rtrentjones/greenlight-shared": "0.5.0",
37
- "@rtrentjones/greenlight-verify": "0.5.0"
34
+ "@rtrentjones/greenlight-adapters": "0.6.0",
35
+ "@rtrentjones/greenlight-shared": "0.6.0",
36
+ "@rtrentjones/greenlight-verify": "0.6.0",
37
+ "@rtrentjones/greenlight-loop": "0.6.0"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "node scripts/copy-assets.mjs && tsup",