@rtrentjones/greenlight 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  verifyAgentWeb
3
- } from "./chunk-KVOI4UL2.js";
3
+ } from "./chunk-IYEIZYI5.js";
4
4
  import "./chunk-QFKE5JKC.js";
5
5
  export {
6
6
  verifyAgentWeb
package/dist/bin.js CHANGED
@@ -7,13 +7,14 @@ import {
7
7
  loadConfig,
8
8
  resolveUrl,
9
9
  scanSqlFiles,
10
+ toExportResult,
10
11
  verifyAll
11
- } from "./chunk-OBWWE7GE.js";
12
+ } from "./chunk-MDG3MT34.js";
12
13
  import "./chunk-HX7VA25D.js";
13
14
  import "./chunk-N3IKUCSF.js";
14
15
  import "./chunk-KP3Y6WRU.js";
15
- import "./chunk-KVOI4UL2.js";
16
- import "./chunk-XWTOJHLV.js";
16
+ import "./chunk-IYEIZYI5.js";
17
+ import "./chunk-3A6F2JNP.js";
17
18
  import "./chunk-QFKE5JKC.js";
18
19
 
19
20
  // src/commands/add.ts
@@ -601,7 +602,7 @@ function tokensForTool(tool) {
601
602
  }
602
603
 
603
604
  // src/version.ts
604
- var MODULE_REF = "v0.5.1";
605
+ var MODULE_REF = "v0.6.1";
605
606
  var MODULE_SOURCE_BASE = "git::https://github.com/RTrentJones/greenlight.git//infra/modules";
606
607
  function moduleSource(module, ref = MODULE_REF) {
607
608
  return `${MODULE_SOURCE_BASE}/${module}?ref=${ref}`;
@@ -1120,6 +1121,14 @@ function setGitHubSecret(repo, env, key, value) {
1120
1121
  throw new Error(`failed to set ${key}${detail ? `: ${detail}` : " (check `gh auth status`)"}`);
1121
1122
  }
1122
1123
  }
1124
+ function appSecretsToGather(entry, packs) {
1125
+ const packKeys = new Set(
1126
+ packs.flatMap(
1127
+ (p) => p.tokens.map((t) => secretKeyFor(t, entry.name ?? "", entry.tokenOverrides))
1128
+ )
1129
+ );
1130
+ return (entry.tokens ?? []).filter((k) => !packKeys.has(k));
1131
+ }
1123
1132
  async function gatherSecrets(name, repo, env, prefill) {
1124
1133
  const { config } = await loadManifest();
1125
1134
  const entry = resolveEntry(config, name);
@@ -1178,6 +1187,29 @@ async function gatherSecrets(name, repo, env, prefill) {
1178
1187
  pushed++;
1179
1188
  }
1180
1189
  }
1190
+ const appSecrets = appSecretsToGather(entry, packs);
1191
+ if (appSecrets.length) {
1192
+ console.log(`\u2500\u2500 ${name} (app secrets)`);
1193
+ for (const key of appSecrets) {
1194
+ const pre = prefill?.get(key);
1195
+ if (pre) {
1196
+ setGitHubSecret(repo, env, key, pre);
1197
+ console.log(` \u2714 ${existing?.has(key) ? "overrode" : "pushed"} ${key} \u2190 prefill`);
1198
+ pushed++;
1199
+ continue;
1200
+ }
1201
+ const state = existing ? existing.has(key) ? " [already set]" : " [not set]" : "";
1202
+ const value = await prompt.ask(` ${key}${state}
1203
+ value: `);
1204
+ if (!value) {
1205
+ console.log(existing?.has(key) ? " \xB7 kept existing" : " \xB7 skipped");
1206
+ continue;
1207
+ }
1208
+ setGitHubSecret(repo, env, key, value);
1209
+ console.log(` \u2714 ${existing?.has(key) ? "overrode" : "pushed"} ${key} \u2192 ${repo}`);
1210
+ pushed++;
1211
+ }
1212
+ }
1181
1213
  } finally {
1182
1214
  prompt.close();
1183
1215
  }
@@ -2894,7 +2926,7 @@ import { setTimeout as sleep } from "timers/promises";
2894
2926
 
2895
2927
  // src/commands/verify.ts
2896
2928
  import { spawnSync } from "child_process";
2897
- import { resolve as resolve9 } from "path";
2929
+ import { basename, resolve as resolve9 } from "path";
2898
2930
  function defaultSpec(lane) {
2899
2931
  switch (lane) {
2900
2932
  case "astro":
@@ -2918,21 +2950,34 @@ function defaultSpec(lane) {
2918
2950
  };
2919
2951
  }
2920
2952
  }
2921
- function printReport(report) {
2922
- console.log(`verify ${report.mode} ${report.url}
2953
+ function printReport(report, log = console.log) {
2954
+ log(`verify ${report.mode} ${report.url}
2923
2955
  `);
2924
2956
  for (const c of report.checks) {
2925
- console.log(` ${c.pass ? "\u2714" : "\u2718"} ${c.name}${c.detail ? ` \u2014 ${c.detail}` : ""}`);
2957
+ log(` ${c.pass ? "\u2714" : "\u2718"} ${c.name}${c.detail ? ` \u2014 ${c.detail}` : ""}`);
2926
2958
  }
2927
- console.log(`
2959
+ log(`
2928
2960
  ${report.pass ? "\u2714 PASS" : "\u2718 FAIL"}`);
2929
2961
  if (!report.pass && report.logs) {
2930
- console.log(`
2962
+ log(`
2931
2963
  --- recent logs (${report.mode}) ---
2932
2964
  ${report.logs}
2933
2965
  --- end logs ---`);
2934
2966
  }
2935
2967
  }
2968
+ function emitReports(reports, json, ctx) {
2969
+ const log = json ? console.error : console.log;
2970
+ for (const report of reports) printReport(report, log);
2971
+ const pass = allPass(reports);
2972
+ if (reports.length > 1) log(`
2973
+ ${pass ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports.length} specs)`);
2974
+ if (json) process.stdout.write(`${JSON.stringify(toExportResult(reports, ctx))}
2975
+ `);
2976
+ process.exit(pass ? 0 : 1);
2977
+ }
2978
+ function gitSha() {
2979
+ return process.env.VERCEL_GIT_COMMIT_SHA ?? process.env.GITHUB_SHA ?? null;
2980
+ }
2936
2981
  var LOG_TAIL_LINES = 50;
2937
2982
  function redactSecrets(text, env = process.env) {
2938
2983
  let out = text;
@@ -2973,6 +3018,9 @@ function flag6(args, name) {
2973
3018
  const i = args.indexOf(name);
2974
3019
  return i >= 0 ? args[i + 1] : void 0;
2975
3020
  }
3021
+ function jsonFlag(args) {
3022
+ return args.includes("--json") || process.env.GREENLIGHT_VERIFY_JSON === "1";
3023
+ }
2976
3024
  async function verifyCommand(args) {
2977
3025
  const specPath = flag6(args, "--spec");
2978
3026
  if (specPath) {
@@ -2987,12 +3035,12 @@ async function verifyCommand(args) {
2987
3035
  toolDir: process.cwd()
2988
3036
  });
2989
3037
  attachFailureLogs(reports2, specs2, process.cwd());
2990
- for (const report of reports2) printReport(report);
2991
- const pass2 = allPass(reports2);
2992
- if (reports2.length > 1)
2993
- console.log(`
2994
- ${pass2 ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports2.length} specs)`);
2995
- process.exit(pass2 ? 0 : 1);
3038
+ const tool = flag6(args, "--tool") ?? basename(specPath).replace(/\.config\.[tj]s$/, "");
3039
+ emitReports(reports2, jsonFlag(args), {
3040
+ tool,
3041
+ env: flag6(args, "--env") ?? "preview",
3042
+ gitSha: gitSha()
3043
+ });
2996
3044
  }
2997
3045
  const name = args[0];
2998
3046
  if (!name || name.startsWith("-")) {
@@ -3025,12 +3073,11 @@ ${pass2 ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports2.length} specs)`);
3025
3073
  const toolDir = resolve9(process.cwd(), entry.dir ?? ".");
3026
3074
  const reports = await verifyAll(url, specs, { reachableTimeoutMs, toolDir });
3027
3075
  attachFailureLogs(reports, specs, toolDir);
3028
- for (const report of reports) printReport(report);
3029
- const pass = allPass(reports);
3030
- if (reports.length > 1)
3031
- console.log(`
3032
- ${pass ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports.length} specs)`);
3033
- process.exit(pass ? 0 : 1);
3076
+ emitReports(reports, jsonFlag(args), {
3077
+ tool: entry.name ?? name,
3078
+ env: override ? "preview" : flag6(args, "--env") ?? "preview",
3079
+ gitSha: gitSha()
3080
+ });
3034
3081
  }
3035
3082
 
3036
3083
  // src/commands/preview.ts
@@ -3387,7 +3434,7 @@ var HELP = `greenlight <command>
3387
3434
  config load & validate the manifest, then print it
3388
3435
  deploy <name> --env <env> build + deploy an entry via its target adapter
3389
3436
  preview <name> [--port <n>] build + serve locally + verify (one command)
3390
- verify <name> [--env <env> | --url <url>] run the verify harness against the URL
3437
+ verify <name> [--env <env> | --url <url>] [--json] run the verify harness (--json: standards-shaped result to stdout)
3391
3438
  promote <name> [--perform] [--push] gated develop -> main fast-forward
3392
3439
  status <name> last ship/deploy/verify run for a tool (via gh)
3393
3440
  secrets gather <name> [--repo o/r] [--env e] guided, link-first token prompts -> GitHub secrets (no disk/logs)
@@ -14,6 +14,10 @@ function resultText(res) {
14
14
  }
15
15
  return JSON.stringify(res);
16
16
  }
17
+ var clamp01 = (n) => {
18
+ const v = typeof n === "number" ? n : Number(n);
19
+ return Number.isFinite(v) ? Math.min(1, Math.max(0, v)) : 0;
20
+ };
17
21
  function llmJudge(model) {
18
22
  return async ({ rubric, result }) => {
19
23
  if (!process.env.ANTHROPIC_API_KEY) throw new Error("ANTHROPIC_API_KEY not set");
@@ -27,7 +31,7 @@ function llmJudge(model) {
27
31
  const resp = await client.messages.create({
28
32
  model,
29
33
  max_tokens: 512,
30
- system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 1\u20135 scale (5 = fully satisfies). Reply ONLY with JSON: {"score": <1-5>, "pass": <bool>, "reason": "<short>"}.',
34
+ system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 0..1 scale (1 = fully satisfies). Reply ONLY with JSON: {"score": <0..1>, "pass": <bool>, "rationale": "<one sentence>"}.',
31
35
  messages: [{ role: "user", content: `RUBRIC:
32
36
  ${rubric}
33
37
 
@@ -38,12 +42,23 @@ ${result}` }]
38
42
  const json = text.match(/\{[\s\S]*\}/);
39
43
  if (!json) throw new Error(`judge returned no JSON: ${text.slice(0, 120)}`);
40
44
  const parsed = JSON.parse(json[0]);
41
- return { score: Number(parsed.score) || 0, pass: Boolean(parsed.pass), reason: parsed.reason };
45
+ return {
46
+ score: clamp01(parsed.score),
47
+ pass: Boolean(parsed.pass),
48
+ rationale: parsed.rationale ?? parsed.reason,
49
+ // `reason` = deprecated alias, one release
50
+ tokensIn: resp.usage?.input_tokens,
51
+ tokensOut: resp.usage?.output_tokens
52
+ };
42
53
  };
43
54
  }
44
55
  async function verifyEval(baseUrl, spec, judge) {
45
- const score = judge ?? llmJudge(spec.model ?? "claude-sonnet-4-6");
56
+ const model = spec.model ?? "claude-sonnet-4-6";
57
+ const score = judge ?? llmJudge(model);
46
58
  const checks = [];
59
+ const started = Date.now();
60
+ let tokensIn = 0;
61
+ let tokensOut = 0;
47
62
  const client = new Client({ name: "greenlight-verify", version: "0.0.0" });
48
63
  const transport = new StreamableHTTPClientTransport(new URL(baseUrl));
49
64
  try {
@@ -53,15 +68,22 @@ async function verifyEval(baseUrl, spec, judge) {
53
68
  }
54
69
  try {
55
70
  for (const c of spec.cases) {
56
- const min = c.minScore ?? 4;
71
+ const min = c.minScore ?? 0.8;
57
72
  try {
58
73
  const res = await client.callTool({ name: c.tool, arguments: c.args ?? {} });
59
- const verdict = await score({ rubric: c.rubric, result: resultText(res) });
74
+ const result = resultText(res);
75
+ const verdict = await score({ rubric: c.rubric, result });
60
76
  const pass = verdict.pass && verdict.score >= min;
77
+ const rationale = verdict.rationale ?? verdict.reason;
78
+ tokensIn += verdict.tokensIn ?? 0;
79
+ tokensOut += verdict.tokensOut ?? 0;
61
80
  checks.push({
62
81
  name: `eval: ${c.name}`,
63
82
  pass,
64
- detail: `score ${verdict.score}/5 (min ${min})${verdict.reason ? ` \u2014 ${verdict.reason}` : ""}`
83
+ score: verdict.score,
84
+ explanation: rationale,
85
+ output: result,
86
+ detail: `score ${verdict.score.toFixed(2)} (min ${min})${rationale ? ` \u2014 ${rationale}` : ""}`
65
87
  });
66
88
  } catch (e) {
67
89
  checks.push({ name: `eval: ${c.name}`, pass: false, detail: msg(e) });
@@ -70,10 +92,16 @@ async function verifyEval(baseUrl, spec, judge) {
70
92
  } finally {
71
93
  await client.close();
72
94
  }
73
- return report("eval", baseUrl, checks);
95
+ return {
96
+ ...report("eval", baseUrl, checks),
97
+ model,
98
+ durationMs: Date.now() - started,
99
+ ...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
100
+ };
74
101
  }
75
102
 
76
103
  export {
104
+ clamp01,
77
105
  llmJudge,
78
106
  verifyEval
79
107
  };
@@ -124,6 +124,8 @@ async function runScenario(client, page, base, spec, scenario) {
124
124
  const messages = [{ role: "user", content: `Task: ${scenario.task}` }];
125
125
  const maxSteps = spec.maxSteps ?? 12;
126
126
  let finish = null;
127
+ let tokensIn = 0;
128
+ let tokensOut = 0;
127
129
  for (let step = 0; step < maxSteps && !finish; step++) {
128
130
  const resp = await client.messages.create({
129
131
  model: spec.model ?? "claude-sonnet-4-6",
@@ -132,6 +134,8 @@ async function runScenario(client, page, base, spec, scenario) {
132
134
  tools: TOOLS,
133
135
  messages
134
136
  });
137
+ tokensIn += resp.usage?.input_tokens ?? 0;
138
+ tokensOut += resp.usage?.output_tokens ?? 0;
135
139
  const blocks = resp.content;
136
140
  messages.push({ role: "assistant", content: blocks });
137
141
  const toolUses = blocks.filter((b) => b.type === "tool_use");
@@ -160,7 +164,7 @@ async function runScenario(client, page, base, spec, scenario) {
160
164
  checks.push({ ...c, name: `${tag} ${c.name}` });
161
165
  }
162
166
  if (checks.length === 0) checks.push({ name: `${tag} agent succeeded`, pass: true });
163
- return checks;
167
+ return { checks, tokensIn, tokensOut };
164
168
  }
165
169
  async function verifyAgentWeb(baseUrl, spec) {
166
170
  const base = baseUrl.replace(/\/+$/, "");
@@ -213,11 +217,17 @@ async function verifyAgentWeb(baseUrl, spec) {
213
217
  ]);
214
218
  }
215
219
  const checks = [];
220
+ const started = Date.now();
221
+ let tokensIn = 0;
222
+ let tokensOut = 0;
216
223
  try {
217
224
  for (const scenario of spec.scenarios) {
218
225
  const page = await browser.newPage();
219
226
  try {
220
- checks.push(...await runScenario(client, page, base, spec, scenario));
227
+ const r = await runScenario(client, page, base, spec, scenario);
228
+ checks.push(...r.checks);
229
+ tokensIn += r.tokensIn;
230
+ tokensOut += r.tokensOut;
221
231
  } catch (e) {
222
232
  checks.push({ name: `[${scenario.name}]`, pass: false, detail: msg(e) });
223
233
  } finally {
@@ -227,7 +237,12 @@ async function verifyAgentWeb(baseUrl, spec) {
227
237
  } finally {
228
238
  await browser.close();
229
239
  }
230
- return report("agent-web", baseUrl, checks);
240
+ return {
241
+ ...report("agent-web", baseUrl, checks),
242
+ model: spec.model ?? "claude-sonnet-4-6",
243
+ durationMs: Date.now() - started,
244
+ ...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
245
+ };
231
246
  }
232
247
 
233
248
  export {
@@ -421,6 +421,53 @@ async function verifyApi(baseUrl, spec) {
421
421
  );
422
422
  }
423
423
 
424
+ // ../packages/verify/src/export.ts
425
+ var clamp01 = (n) => Math.min(1, Math.max(0, n));
426
+ function sumDefined(xs) {
427
+ const present = xs.filter((x) => typeof x === "number");
428
+ return present.length ? present.reduce((a, b) => a + b, 0) : void 0;
429
+ }
430
+ function toExportResult(reports, ctx) {
431
+ const checks = [];
432
+ for (const r of reports) {
433
+ for (const c of r.checks) {
434
+ checks.push({
435
+ name: c.name,
436
+ passed: c.pass,
437
+ input: c.input ?? null,
438
+ expected: c.expected ?? null,
439
+ output: c.output ?? null,
440
+ "eval.score": c.score != null ? clamp01(c.score) : c.pass ? 1 : 0,
441
+ "eval.explanation": c.explanation ?? null
442
+ });
443
+ }
444
+ }
445
+ const passed = reports.length > 0 && reports.every((r) => r.pass);
446
+ const passRate = checks.length === 0 ? 0 : checks.filter((c) => c.passed).length / checks.length;
447
+ const model = reports.find((r) => r.model)?.model;
448
+ const tokensIn = sumDefined(reports.map((r) => r.tokensIn));
449
+ const tokensOut = sumDefined(reports.map((r) => r.tokensOut));
450
+ const cost = sumDefined(reports.map((r) => r.costUsd));
451
+ const durationMs = sumDefined(reports.map((r) => r.durationMs));
452
+ const attributes = {};
453
+ if (model) attributes["gen_ai.request.model"] = model;
454
+ if (tokensIn != null) attributes["gen_ai.usage.input_tokens"] = tokensIn;
455
+ if (tokensOut != null) attributes["gen_ai.usage.output_tokens"] = tokensOut;
456
+ if (cost != null) attributes["gen_ai.response.cost"] = cost;
457
+ return {
458
+ schemaVersion: "1",
459
+ tool: ctx.tool,
460
+ mode: reports.map((r) => r.mode).join("+") || "verify",
461
+ env: ctx.env,
462
+ git_sha: ctx.gitSha ?? null,
463
+ passed,
464
+ pass_rate: passRate,
465
+ duration_ms: durationMs ?? null,
466
+ ...Object.keys(attributes).length ? { attributes } : {},
467
+ checks
468
+ };
469
+ }
470
+
424
471
  // ../packages/verify/src/index.ts
425
472
  function defineVerify(spec) {
426
473
  return spec;
@@ -456,11 +503,11 @@ async function verify(baseUrl, spec, opts) {
456
503
  return verifyTest2(spec, opts?.toolDir ?? process.cwd());
457
504
  }
458
505
  case "agent-web": {
459
- const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-3FTO2TLJ.js");
506
+ const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-BG5ZIVAB.js");
460
507
  return verifyAgentWeb2(baseUrl, spec);
461
508
  }
462
509
  case "eval": {
463
- const { verifyEval: verifyEval2 } = await import("./eval-44S2BATV.js");
510
+ const { verifyEval: verifyEval2 } = await import("./eval-YZXJSUKH.js");
464
511
  return verifyEval2(baseUrl, spec);
465
512
  }
466
513
  }
@@ -487,6 +534,7 @@ export {
487
534
  loadConfig,
488
535
  resolveUrl,
489
536
  scanSqlFiles,
537
+ toExportResult,
490
538
  defineVerify,
491
539
  verifyAll,
492
540
  allPass
@@ -1,9 +1,11 @@
1
1
  import {
2
+ clamp01,
2
3
  llmJudge,
3
4
  verifyEval
4
- } from "./chunk-XWTOJHLV.js";
5
+ } from "./chunk-3A6F2JNP.js";
5
6
  import "./chunk-QFKE5JKC.js";
6
7
  export {
8
+ clamp01,
7
9
  llmJudge,
8
10
  verifyEval
9
11
  };
package/dist/index.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export { GreenlightConfig, defineConfig, loadConfig } from '@rtrentjones/greenlight-shared';
1
+ export { GreenlightConfig, ResolveUrlOptions, defineConfig, loadConfig, resolveUrl } from '@rtrentjones/greenlight-shared';
2
2
  export { VerifySpec, defineVerify } from '@rtrentjones/greenlight-verify';
package/dist/index.js CHANGED
@@ -1,16 +1,18 @@
1
1
  import {
2
2
  defineConfig,
3
3
  defineVerify,
4
- loadConfig
5
- } from "./chunk-OBWWE7GE.js";
4
+ loadConfig,
5
+ resolveUrl
6
+ } from "./chunk-MDG3MT34.js";
6
7
  import "./chunk-HX7VA25D.js";
7
8
  import "./chunk-N3IKUCSF.js";
8
9
  import "./chunk-KP3Y6WRU.js";
9
- import "./chunk-KVOI4UL2.js";
10
- import "./chunk-XWTOJHLV.js";
10
+ import "./chunk-IYEIZYI5.js";
11
+ import "./chunk-3A6F2JNP.js";
11
12
  import "./chunk-QFKE5JKC.js";
12
13
  export {
13
14
  defineConfig,
14
15
  defineVerify,
15
- loadConfig
16
+ loadConfig,
17
+ resolveUrl
16
18
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rtrentjones/greenlight",
3
- "version": "0.5.1",
3
+ "version": "0.6.1",
4
4
  "description": "Greenlight CLI — setup and lifecycle for the harness.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -31,10 +31,10 @@
31
31
  "@anthropic-ai/sdk": "^0.69.0"
32
32
  },
33
33
  "devDependencies": {
34
- "@rtrentjones/greenlight-adapters": "0.5.1",
35
- "@rtrentjones/greenlight-loop": "0.5.1",
36
- "@rtrentjones/greenlight-verify": "0.5.1",
37
- "@rtrentjones/greenlight-shared": "0.5.1"
34
+ "@rtrentjones/greenlight-adapters": "0.6.1",
35
+ "@rtrentjones/greenlight-loop": "0.6.1",
36
+ "@rtrentjones/greenlight-shared": "0.6.1",
37
+ "@rtrentjones/greenlight-verify": "0.6.1"
38
38
  },
39
39
  "scripts": {
40
40
  "build": "node scripts/copy-assets.mjs && tsup",