npm - @rtrentjones/greenlight - Versions diffs - 0.5.1 → 0.6.0 - Mend

@rtrentjones/greenlight 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/{agent-web-3FTO2TLJ.js → agent-web-BG5ZIVAB.js} +1 -1
package/dist/bin.js +39 -23
package/dist/{chunk-XWTOJHLV.js → chunk-3A6F2JNP.js} +35 -7
package/dist/{chunk-OBWWE7GE.js → chunk-FZH2YQPJ.js} +50 -2
package/dist/{chunk-KVOI4UL2.js → chunk-IYEIZYI5.js} +18 -3
package/dist/{eval-44S2BATV.js → eval-YZXJSUKH.js} +3 -1
package/dist/index.js +3 -3
package/package.json +5 -5

package/dist/{agent-web-3FTO2TLJ.js → agent-web-BG5ZIVAB.js} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   verifyAgentWeb
-} from "./chunk-KVOI4UL2.js";
+} from "./chunk-IYEIZYI5.js";
 import "./chunk-QFKE5JKC.js";
 export {
   verifyAgentWeb

package/dist/bin.js CHANGED Viewed

@@ -7,13 +7,14 @@ import {
   loadConfig,
   resolveUrl,
   scanSqlFiles,
+  toExportResult,
   verifyAll
-} from "./chunk-OBWWE7GE.js";
+} from "./chunk-FZH2YQPJ.js";
 import "./chunk-HX7VA25D.js";
 import "./chunk-N3IKUCSF.js";
 import "./chunk-KP3Y6WRU.js";
-import "./chunk-KVOI4UL2.js";
-import "./chunk-XWTOJHLV.js";
+import "./chunk-IYEIZYI5.js";
+import "./chunk-3A6F2JNP.js";
 import "./chunk-QFKE5JKC.js";
 // src/commands/add.ts
@@ -601,7 +602,7 @@ function tokensForTool(tool) {
 }
 // src/version.ts
-var MODULE_REF = "v0.5.1";
+var MODULE_REF = "v0.6.0";
 var MODULE_SOURCE_BASE = "git::https://github.com/RTrentJones/greenlight.git//infra/modules";
 function moduleSource(module, ref = MODULE_REF) {
   return `${MODULE_SOURCE_BASE}/${module}?ref=${ref}`;
@@ -2894,7 +2895,7 @@ import { setTimeout as sleep } from "timers/promises";
 // src/commands/verify.ts
 import { spawnSync } from "child_process";
-import { resolve as resolve9 } from "path";
+import { basename, resolve as resolve9 } from "path";
 function defaultSpec(lane) {
   switch (lane) {
     case "astro":
@@ -2918,21 +2919,34 @@ function defaultSpec(lane) {
       };
   }
 }
-function printReport(report) {
-  console.log(`verify ${report.mode} ${report.url}
+function printReport(report, log = console.log) {
+  log(`verify ${report.mode} ${report.url}
 `);
   for (const c of report.checks) {
-    console.log(`  ${c.pass ? "\u2714" : "\u2718"} ${c.name}${c.detail ? ` \u2014 ${c.detail}` : ""}`);
+    log(`  ${c.pass ? "\u2714" : "\u2718"} ${c.name}${c.detail ? ` \u2014 ${c.detail}` : ""}`);
   }
-  console.log(`
+  log(`
 ${report.pass ? "\u2714 PASS" : "\u2718 FAIL"}`);
   if (!report.pass && report.logs) {
-    console.log(`
+    log(`
 --- recent logs (${report.mode}) ---
 ${report.logs}
 --- end logs ---`);
   }
 }
+function emitReports(reports, json, ctx) {
+  const log = json ? console.error : console.log;
+  for (const report of reports) printReport(report, log);
+  const pass = allPass(reports);
+  if (reports.length > 1) log(`
+${pass ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports.length} specs)`);
+  if (json) process.stdout.write(`${JSON.stringify(toExportResult(reports, ctx))}
+`);
+  process.exit(pass ? 0 : 1);
+}
+function gitSha() {
+  return process.env.VERCEL_GIT_COMMIT_SHA ?? process.env.GITHUB_SHA ?? null;
+}
 var LOG_TAIL_LINES = 50;
 function redactSecrets(text, env = process.env) {
   let out = text;
@@ -2973,6 +2987,9 @@ function flag6(args, name) {
   const i = args.indexOf(name);
   return i >= 0 ? args[i + 1] : void 0;
 }
+function jsonFlag(args) {
+  return args.includes("--json") || process.env.GREENLIGHT_VERIFY_JSON === "1";
+}
 async function verifyCommand(args) {
   const specPath = flag6(args, "--spec");
   if (specPath) {
@@ -2987,12 +3004,12 @@ async function verifyCommand(args) {
       toolDir: process.cwd()
     });
     attachFailureLogs(reports2, specs2, process.cwd());
-    for (const report of reports2) printReport(report);
-    const pass2 = allPass(reports2);
-    if (reports2.length > 1)
-      console.log(`
-${pass2 ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports2.length} specs)`);
-    process.exit(pass2 ? 0 : 1);
+    const tool = flag6(args, "--tool") ?? basename(specPath).replace(/\.config\.[tj]s$/, "");
+    emitReports(reports2, jsonFlag(args), {
+      tool,
+      env: flag6(args, "--env") ?? "preview",
+      gitSha: gitSha()
+    });
   }
   const name = args[0];
   if (!name || name.startsWith("-")) {
@@ -3025,12 +3042,11 @@ ${pass2 ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports2.length} specs)`);
   const toolDir = resolve9(process.cwd(), entry.dir ?? ".");
   const reports = await verifyAll(url, specs, { reachableTimeoutMs, toolDir });
   attachFailureLogs(reports, specs, toolDir);
-  for (const report of reports) printReport(report);
-  const pass = allPass(reports);
-  if (reports.length > 1)
-    console.log(`
-${pass ? "\u2714 ALL PASS" : "\u2718 FAIL"} (${reports.length} specs)`);
-  process.exit(pass ? 0 : 1);
+  emitReports(reports, jsonFlag(args), {
+    tool: entry.name ?? name,
+    env: override ? "preview" : flag6(args, "--env") ?? "preview",
+    gitSha: gitSha()
+  });
 }
 // src/commands/preview.ts
@@ -3387,7 +3403,7 @@ var HELP = `greenlight <command>
   config                                        load & validate the manifest, then print it
   deploy <name> --env <env>                     build + deploy an entry via its target adapter
   preview <name> [--port <n>]                   build + serve locally + verify (one command)
-  verify <name> [--env <env> | --url <url>]     run the verify harness against the URL
+  verify <name> [--env <env> | --url <url>] [--json]  run the verify harness (--json: standards-shaped result to stdout)
   promote <name> [--perform] [--push]           gated develop -> main fast-forward
   status <name>                                 last ship/deploy/verify run for a tool (via gh)
   secrets gather <name> [--repo o/r] [--env e]  guided, link-first token prompts -> GitHub secrets (no disk/logs)

package/dist/{chunk-XWTOJHLV.js → chunk-3A6F2JNP.js} RENAMED Viewed

@@ -14,6 +14,10 @@ function resultText(res) {
   }
   return JSON.stringify(res);
 }
+var clamp01 = (n) => {
+  const v = typeof n === "number" ? n : Number(n);
+  return Number.isFinite(v) ? Math.min(1, Math.max(0, v)) : 0;
+};
 function llmJudge(model) {
   return async ({ rubric, result }) => {
     if (!process.env.ANTHROPIC_API_KEY) throw new Error("ANTHROPIC_API_KEY not set");
@@ -27,7 +31,7 @@ function llmJudge(model) {
     const resp = await client.messages.create({
       model,
       max_tokens: 512,
-      system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 1\u20135 scale (5 = fully satisfies). Reply ONLY with JSON: {"score": <1-5>, "pass": <bool>, "reason": "<short>"}.',
+      system: 'You are a strict evaluation judge. Score how well RESULT satisfies RUBRIC on a 0..1 scale (1 = fully satisfies). Reply ONLY with JSON: {"score": <0..1>, "pass": <bool>, "rationale": "<one sentence>"}.',
       messages: [{ role: "user", content: `RUBRIC:
 ${rubric}
@@ -38,12 +42,23 @@ ${result}` }]
     const json = text.match(/\{[\s\S]*\}/);
     if (!json) throw new Error(`judge returned no JSON: ${text.slice(0, 120)}`);
     const parsed = JSON.parse(json[0]);
-    return { score: Number(parsed.score) || 0, pass: Boolean(parsed.pass), reason: parsed.reason };
+    return {
+      score: clamp01(parsed.score),
+      pass: Boolean(parsed.pass),
+      rationale: parsed.rationale ?? parsed.reason,
+      // `reason` = deprecated alias, one release
+      tokensIn: resp.usage?.input_tokens,
+      tokensOut: resp.usage?.output_tokens
+    };
   };
 }
 async function verifyEval(baseUrl, spec, judge) {
-  const score = judge ?? llmJudge(spec.model ?? "claude-sonnet-4-6");
+  const model = spec.model ?? "claude-sonnet-4-6";
+  const score = judge ?? llmJudge(model);
   const checks = [];
+  const started = Date.now();
+  let tokensIn = 0;
+  let tokensOut = 0;
   const client = new Client({ name: "greenlight-verify", version: "0.0.0" });
   const transport = new StreamableHTTPClientTransport(new URL(baseUrl));
   try {
@@ -53,15 +68,22 @@ async function verifyEval(baseUrl, spec, judge) {
   }
   try {
     for (const c of spec.cases) {
-      const min = c.minScore ?? 4;
+      const min = c.minScore ?? 0.8;
       try {
         const res = await client.callTool({ name: c.tool, arguments: c.args ?? {} });
-        const verdict = await score({ rubric: c.rubric, result: resultText(res) });
+        const result = resultText(res);
+        const verdict = await score({ rubric: c.rubric, result });
         const pass = verdict.pass && verdict.score >= min;
+        const rationale = verdict.rationale ?? verdict.reason;
+        tokensIn += verdict.tokensIn ?? 0;
+        tokensOut += verdict.tokensOut ?? 0;
         checks.push({
           name: `eval: ${c.name}`,
           pass,
-          detail: `score ${verdict.score}/5 (min ${min})${verdict.reason ? ` \u2014 ${verdict.reason}` : ""}`
+          score: verdict.score,
+          explanation: rationale,
+          output: result,
+          detail: `score ${verdict.score.toFixed(2)} (min ${min})${rationale ? ` \u2014 ${rationale}` : ""}`
         });
       } catch (e) {
         checks.push({ name: `eval: ${c.name}`, pass: false, detail: msg(e) });
@@ -70,10 +92,16 @@ async function verifyEval(baseUrl, spec, judge) {
   } finally {
     await client.close();
   }
-  return report("eval", baseUrl, checks);
+  return {
+    ...report("eval", baseUrl, checks),
+    model,
+    durationMs: Date.now() - started,
+    ...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
+  };
 }
 export {
+  clamp01,
   llmJudge,
   verifyEval
 };

package/dist/{chunk-OBWWE7GE.js → chunk-FZH2YQPJ.js} RENAMED Viewed

@@ -421,6 +421,53 @@ async function verifyApi(baseUrl, spec) {
   );
 }
+// ../packages/verify/src/export.ts
+var clamp01 = (n) => Math.min(1, Math.max(0, n));
+function sumDefined(xs) {
+  const present = xs.filter((x) => typeof x === "number");
+  return present.length ? present.reduce((a, b) => a + b, 0) : void 0;
+}
+function toExportResult(reports, ctx) {
+  const checks = [];
+  for (const r of reports) {
+    for (const c of r.checks) {
+      checks.push({
+        name: c.name,
+        passed: c.pass,
+        input: c.input ?? null,
+        expected: c.expected ?? null,
+        output: c.output ?? null,
+        "eval.score": c.score != null ? clamp01(c.score) : c.pass ? 1 : 0,
+        "eval.explanation": c.explanation ?? null
+      });
+    }
+  }
+  const passed = reports.length > 0 && reports.every((r) => r.pass);
+  const passRate = checks.length === 0 ? 0 : checks.filter((c) => c.passed).length / checks.length;
+  const model = reports.find((r) => r.model)?.model;
+  const tokensIn = sumDefined(reports.map((r) => r.tokensIn));
+  const tokensOut = sumDefined(reports.map((r) => r.tokensOut));
+  const cost = sumDefined(reports.map((r) => r.costUsd));
+  const durationMs = sumDefined(reports.map((r) => r.durationMs));
+  const attributes = {};
+  if (model) attributes["gen_ai.request.model"] = model;
+  if (tokensIn != null) attributes["gen_ai.usage.input_tokens"] = tokensIn;
+  if (tokensOut != null) attributes["gen_ai.usage.output_tokens"] = tokensOut;
+  if (cost != null) attributes["gen_ai.response.cost"] = cost;
+  return {
+    schemaVersion: "1",
+    tool: ctx.tool,
+    mode: reports.map((r) => r.mode).join("+") || "verify",
+    env: ctx.env,
+    git_sha: ctx.gitSha ?? null,
+    passed,
+    pass_rate: passRate,
+    duration_ms: durationMs ?? null,
+    ...Object.keys(attributes).length ? { attributes } : {},
+    checks
+  };
+}
 // ../packages/verify/src/index.ts
 function defineVerify(spec) {
   return spec;
@@ -456,11 +503,11 @@ async function verify(baseUrl, spec, opts) {
       return verifyTest2(spec, opts?.toolDir ?? process.cwd());
     }
     case "agent-web": {
-      const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-3FTO2TLJ.js");
+      const { verifyAgentWeb: verifyAgentWeb2 } = await import("./agent-web-BG5ZIVAB.js");
       return verifyAgentWeb2(baseUrl, spec);
     }
     case "eval": {
-      const { verifyEval: verifyEval2 } = await import("./eval-44S2BATV.js");
+      const { verifyEval: verifyEval2 } = await import("./eval-YZXJSUKH.js");
       return verifyEval2(baseUrl, spec);
     }
   }
@@ -487,6 +534,7 @@ export {
   loadConfig,
   resolveUrl,
   scanSqlFiles,
+  toExportResult,
   defineVerify,
   verifyAll,
   allPass

package/dist/{chunk-KVOI4UL2.js → chunk-IYEIZYI5.js} RENAMED Viewed

@@ -124,6 +124,8 @@ async function runScenario(client, page, base, spec, scenario) {
   const messages = [{ role: "user", content: `Task: ${scenario.task}` }];
   const maxSteps = spec.maxSteps ?? 12;
   let finish = null;
+  let tokensIn = 0;
+  let tokensOut = 0;
   for (let step = 0; step < maxSteps && !finish; step++) {
     const resp = await client.messages.create({
       model: spec.model ?? "claude-sonnet-4-6",
@@ -132,6 +134,8 @@ async function runScenario(client, page, base, spec, scenario) {
       tools: TOOLS,
       messages
     });
+    tokensIn += resp.usage?.input_tokens ?? 0;
+    tokensOut += resp.usage?.output_tokens ?? 0;
     const blocks = resp.content;
     messages.push({ role: "assistant", content: blocks });
     const toolUses = blocks.filter((b) => b.type === "tool_use");
@@ -160,7 +164,7 @@ async function runScenario(client, page, base, spec, scenario) {
     checks.push({ ...c, name: `${tag} ${c.name}` });
   }
   if (checks.length === 0) checks.push({ name: `${tag} agent succeeded`, pass: true });
-  return checks;
+  return { checks, tokensIn, tokensOut };
 }
 async function verifyAgentWeb(baseUrl, spec) {
   const base = baseUrl.replace(/\/+$/, "");
@@ -213,11 +217,17 @@ async function verifyAgentWeb(baseUrl, spec) {
     ]);
   }
   const checks = [];
+  const started = Date.now();
+  let tokensIn = 0;
+  let tokensOut = 0;
   try {
     for (const scenario of spec.scenarios) {
       const page = await browser.newPage();
       try {
-        checks.push(...await runScenario(client, page, base, spec, scenario));
+        const r = await runScenario(client, page, base, spec, scenario);
+        checks.push(...r.checks);
+        tokensIn += r.tokensIn;
+        tokensOut += r.tokensOut;
       } catch (e) {
         checks.push({ name: `[${scenario.name}]`, pass: false, detail: msg(e) });
       } finally {
@@ -227,7 +237,12 @@ async function verifyAgentWeb(baseUrl, spec) {
   } finally {
     await browser.close();
   }
-  return report("agent-web", baseUrl, checks);
+  return {
+    ...report("agent-web", baseUrl, checks),
+    model: spec.model ?? "claude-sonnet-4-6",
+    durationMs: Date.now() - started,
+    ...tokensIn || tokensOut ? { tokensIn, tokensOut } : {}
+  };
 }
 export {

package/dist/{eval-44S2BATV.js → eval-YZXJSUKH.js} RENAMED Viewed

@@ -1,9 +1,11 @@
 import {
+  clamp01,
   llmJudge,
   verifyEval
-} from "./chunk-XWTOJHLV.js";
+} from "./chunk-3A6F2JNP.js";
 import "./chunk-QFKE5JKC.js";
 export {
+  clamp01,
   llmJudge,
   verifyEval
 };

package/dist/index.js CHANGED Viewed

@@ -2,12 +2,12 @@ import {
   defineConfig,
   defineVerify,
   loadConfig
-} from "./chunk-OBWWE7GE.js";
+} from "./chunk-FZH2YQPJ.js";
 import "./chunk-HX7VA25D.js";
 import "./chunk-N3IKUCSF.js";
 import "./chunk-KP3Y6WRU.js";
-import "./chunk-KVOI4UL2.js";
-import "./chunk-XWTOJHLV.js";
+import "./chunk-IYEIZYI5.js";
+import "./chunk-3A6F2JNP.js";
 import "./chunk-QFKE5JKC.js";
 export {
   defineConfig,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@rtrentjones/greenlight",
-  "version": "0.5.1",
+  "version": "0.6.0",
   "description": "Greenlight CLI — setup and lifecycle for the harness.",
   "license": "MIT",
   "repository": {
@@ -31,10 +31,10 @@
     "@anthropic-ai/sdk": "^0.69.0"
   },
   "devDependencies": {
-    "@rtrentjones/greenlight-adapters": "0.5.1",
-    "@rtrentjones/greenlight-loop": "0.5.1",
-    "@rtrentjones/greenlight-verify": "0.5.1",
-    "@rtrentjones/greenlight-shared": "0.5.1"
+    "@rtrentjones/greenlight-adapters": "0.6.0",
+    "@rtrentjones/greenlight-shared": "0.6.0",
+    "@rtrentjones/greenlight-verify": "0.6.0",
+    "@rtrentjones/greenlight-loop": "0.6.0"
   },
   "scripts": {
     "build": "node scripts/copy-assets.mjs && tsup",