npm - veryfront - Versions diffs - 0.1.534 → 0.1.536 - Mend

veryfront 0.1.534 → 0.1.536

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/esm/cli/templates/manifest.d.ts +405 -405
package/esm/cli/templates/manifest.js +451 -451
package/esm/deno.d.ts +1 -0
package/esm/deno.js +5 -4
package/esm/extensions/ext-llm-anthropic/src/index.d.ts.map +1 -1
package/esm/extensions/ext-llm-anthropic/src/index.js +1 -0
package/esm/extensions/ext-llm-google/src/index.d.ts.map +1 -1
package/esm/extensions/ext-llm-google/src/index.js +1 -0
package/esm/extensions/ext-llm-openai/src/index.d.ts.map +1 -1
package/esm/extensions/ext-llm-openai/src/index.js +1 -0
package/esm/src/agent/testing/durable-run-canaries/cli-runner.d.ts +18 -0
package/esm/src/agent/testing/durable-run-canaries/cli-runner.d.ts.map +1 -0
package/esm/src/agent/testing/durable-run-canaries/cli-runner.js +65 -0
package/esm/src/agent/testing/durable-run-canaries/index.d.ts +1 -0
package/esm/src/agent/testing/durable-run-canaries/index.d.ts.map +1 -1
package/esm/src/agent/testing/durable-run-canaries/index.js +1 -0
package/esm/src/agent/testing/index.d.ts +2 -2
package/esm/src/agent/testing/index.d.ts.map +1 -1
package/esm/src/agent/testing/index.js +2 -2
package/esm/src/agent/testing/live-evals/cli-runner.d.ts +36 -0
package/esm/src/agent/testing/live-evals/cli-runner.d.ts.map +1 -0
package/esm/src/agent/testing/live-evals/cli-runner.js +143 -0
package/esm/src/agent/testing/live-evals/index.d.ts +1 -0
package/esm/src/agent/testing/live-evals/index.d.ts.map +1 -1
package/esm/src/agent/testing/live-evals/index.js +1 -0
package/esm/src/server/dev-ui/manifest.d.ts +17 -17
package/esm/src/server/dev-ui/manifest.js +17 -17
package/esm/src/utils/version-constant.d.ts +1 -1
package/esm/src/utils/version-constant.js +1 -1
package/package.json +1 -1
package/src/cli/templates/manifest.js +451 -451
package/src/deno.js +5 -4
package/src/extensions/ext-llm-anthropic/src/index.ts +1 -0
package/src/extensions/ext-llm-google/src/index.ts +1 -0
package/src/extensions/ext-llm-openai/src/index.ts +1 -0
package/src/src/agent/testing/durable-run-canaries/cli-runner.ts +117 -0
package/src/src/agent/testing/durable-run-canaries/index.ts +5 -0
package/src/src/agent/testing/index.ts +7 -0
package/src/src/agent/testing/live-evals/cli-runner.ts +234 -0
package/src/src/agent/testing/live-evals/index.ts +6 -0
package/src/src/server/dev-ui/manifest.js +17 -17
package/src/src/utils/version-constant.ts +1 -1

package/src/deno.js CHANGED Viewed

@@ -1,6 +1,6 @@
 export default {
   "name": "veryfront",
-  "version": "0.1.534",
+  "version": "0.1.536",
   "license": "Apache-2.0",
   "nodeModulesDir": "auto",
   "workspace": [
@@ -331,8 +331,8 @@ export default {
     "fmt": "deno fmt src/ cli/ react/",
     "fmt:check": "deno fmt --check src/ cli/ react/",
     "typecheck": "deno task generate && deno check src/index.ts cli/main.ts src/server/index.ts src/routing/api/index.ts src/rendering/index.ts src/platform/index.ts src/platform/adapters/index.ts src/build/index.ts src/build/production-build/index.ts src/transforms/index.ts src/config/index.ts src/utils/index.ts src/data/index.ts src/security/index.ts src/middleware/index.ts src/server/handlers/dev/index.ts src/server/handlers/request/api/index.ts src/rendering/cache/index.ts src/rendering/cache/stores/index.ts src/rendering/rsc/actions/index.ts src/html/index.ts src/modules/index.ts src/proxy/main.ts src/chat/index.ts src/markdown/index.ts src/mdx/index.ts src/fs/index.ts src/oauth/index.ts src/agent/index.ts src/agent/service/route-export.check.ts src/tool/index.ts src/workflow/index.ts src/prompt/index.ts src/resource/index.ts src/jobs/index.ts src/mcp/index.ts src/provider/index.ts",
-    "verify": "deno task generate && deno fmt --check src/ cli/ react/ && DENO_NO_PACKAGE_JSON=1 deno lint src/ cli/ react/ && deno task lint:style && deno task lint:cli-boundary && deno task lint:wildcard-exports && deno task lint:barrel-jsdoc && deno task lint:ban-zod && deno task lint:core-deps && deno task lint:dependency-boundaries && deno task docs:validate && deno task typecheck && deno task test && deno task test:e2e:binary",
-    "verify:quick": "deno task generate && deno fmt --check src/ cli/ react/ && DENO_NO_PACKAGE_JSON=1 deno lint src/ cli/ react/ && deno task lint:style && deno task lint:cli-boundary && deno task lint:wildcard-exports && deno task lint:barrel-jsdoc && deno task lint:ban-zod && deno task lint:core-deps && deno task lint:dependency-boundaries && deno task docs:validate && deno task typecheck",
+    "verify": "deno task generate && deno fmt --check src/ cli/ react/ && DENO_NO_PACKAGE_JSON=1 deno lint src/ cli/ react/ && deno task lint:style && deno task lint:cli-boundary && deno task lint:wildcard-exports && deno task lint:barrel-jsdoc && deno task lint:ban-zod && deno task lint:core-deps && deno task lint:dependency-boundaries && deno task lint:extension-contracts && deno task docs:validate && deno task typecheck && deno task test && deno task test:e2e:binary",
+    "verify:quick": "deno task generate && deno fmt --check src/ cli/ react/ && DENO_NO_PACKAGE_JSON=1 deno lint src/ cli/ react/ && deno task lint:style && deno task lint:cli-boundary && deno task lint:wildcard-exports && deno task lint:barrel-jsdoc && deno task lint:ban-zod && deno task lint:core-deps && deno task lint:dependency-boundaries && deno task lint:extension-contracts && deno task docs:validate && deno task typecheck",
     "docs": "rm -rf docs/reference && deno run --allow-read --allow-write --allow-run scripts/docs/generate-api-reference.ts",
     "docs:copy": "rm -rf ../../docs/docs/code/reference && cp -r docs/reference/ ../../docs/docs/code/reference/",
     "docs:validate": "deno run --allow-read scripts/docs/validate-api-reference.ts",
@@ -341,6 +341,7 @@ export default {
     "lint:ban-zod": "deno run --allow-read scripts/lint/ban-zod-imports.ts",
     "lint:core-deps": "deno run --allow-read scripts/lint/audit-core-deps.ts",
     "lint:dependency-boundaries": "deno run --allow-read scripts/lint/audit-dependency-boundaries.ts",
+    "lint:extension-contracts": "deno run --allow-read --allow-env --allow-sys scripts/lint/audit-extension-contracts.ts",
     "lint:ban-console": "deno run --allow-read scripts/lint/ban-console.ts",
     "lint:ban-deep-imports": "deno run --allow-read scripts/lint/ban-deep-imports.ts",
     "lint:imports": "deno run --allow-read scripts/lint/no-cross-boundary-relative-imports.ts",
@@ -354,7 +355,7 @@ export default {
     "lint:wildcard-exports": "deno run --allow-read scripts/lint/ban-wildcard-exports.ts",
     "lint:deps": "deno run --allow-read scripts/lint/audit-deps.ts",
     "lint:barrel-jsdoc": "deno run --allow-read scripts/lint/check-barrel-jsdoc.ts",
-    "test:scripts": "deno test --config=scripts/test.deno.json --no-check --allow-read --allow-write scripts/build/generate-sbom.test.ts scripts/build/npm-react-shims.test.ts scripts/lint/audit-core-deps.test.ts scripts/lint/audit-dependency-boundaries.test.ts scripts/lint/audit-deps.test.ts scripts/security/audit-npm.test.ts scripts/security/submit-dependency-snapshot.test.ts",
+    "test:scripts": "deno test --config=scripts/test.deno.json --no-check --allow-read --allow-write scripts/build/generate-sbom.test.ts scripts/build/npm-react-shims.test.ts scripts/lint/audit-core-deps.test.ts scripts/lint/audit-dependency-boundaries.test.ts scripts/lint/audit-extension-contracts.test.ts scripts/lint/audit-deps.test.ts scripts/security/audit-npm.test.ts scripts/security/submit-dependency-snapshot.test.ts",
     "test:cross-runtime": "deno run --allow-all src/platform/compat/cross-runtime.test.ts",
     "test:node": "node ./tests/node/run-tests.mjs 'src/**/*.test.ts'",
     "test:bun": "node ./tests/bun/run-tests.mjs src/",

package/src/extensions/ext-llm-anthropic/src/index.ts CHANGED Viewed

@@ -17,6 +17,7 @@ const extAnthropic: ExtensionFactory = () => {
     name: "ext-llm-anthropic",
     version: "0.1.0",
     contracts: {
+      provides: ["LLMProvider:anthropic"],
       requires: [LLMProviderRegistryName],
     },
     capabilities: [],

package/src/extensions/ext-llm-google/src/index.ts CHANGED Viewed

@@ -16,6 +16,7 @@ const extGoogle: ExtensionFactory = () => {
     name: "ext-llm-google",
     version: "0.1.0",
     contracts: {
+      provides: ["LLMProvider:google"],
       requires: [LLMProviderRegistryName],
     },
     capabilities: [],

package/src/extensions/ext-llm-openai/src/index.ts CHANGED Viewed

@@ -17,6 +17,7 @@ const extOpenAI: ExtensionFactory = () => {
     name: "ext-llm-openai",
     version: "0.1.0",
     contracts: {
+      provides: ["LLMProvider:openai"],
       requires: [LLMProviderRegistryName],
     },
     capabilities: [],

package/src/src/agent/testing/durable-run-canaries/cli-runner.ts ADDED Viewed

@@ -0,0 +1,117 @@
+import { mkdir, writeFile } from "node:fs/promises";
+import { dirname, resolve } from "node:path";
+import { cwd as getProcessCwd } from "node:process";
+import { type LiveEvalApiContext } from "../live-evals/api-client.js";
+import { resolveDurableRunCanaryEnvironment } from "./environment.js";
+import {
+  createDurableRunCanaryRunner,
+  type DurableRunCanaryCase,
+  type DurableRunCanaryResult,
+  type DurableRunCanaryRunnerConfig,
+} from "./runner.js";
+type EnvRecord = Record<string, string | undefined>;
+export interface DurableRunCanaryCliCaseFactoryInput {
+  context: LiveEvalApiContext;
+  requestTimeoutMs: number;
+}
+export interface RunDurableRunCanaryCliInput {
+  env: EnvRecord;
+  agentId: string;
+  createCases: (input: DurableRunCanaryCliCaseFactoryInput) => DurableRunCanaryCase[];
+  cwd?: string;
+  log?: (message: string) => void;
+  createRunner?: (
+    config: DurableRunCanaryRunnerConfig,
+  ) => ReturnType<typeof createDurableRunCanaryRunner>;
+}
+function createTimestampedReportPath(input: {
+  cwd: string;
+  directory: string;
+}): string {
+  return resolve(
+    input.cwd,
+    ".omx/logs",
+    input.directory,
+    `${new Date().toISOString().replaceAll(":", "-").replaceAll(".", "-")}.json`,
+  );
+}
+export async function runDurableRunCanaryCli(
+  input: RunDurableRunCanaryCliInput,
+): Promise<number> {
+  const log = input.log ?? console.log;
+  const cwd = input.cwd ?? getProcessCwd();
+  const { apiUrl, authToken, projectId, requestTimeoutMs, keepSuccessfulEvidence } =
+    resolveDurableRunCanaryEnvironment(input.env);
+  const reportPath = input.env.DURABLE_CANARY_REPORT_PATH ??
+    createTimestampedReportPath({ cwd, directory: "durable-run-staging-canaries" });
+  if (!authToken) {
+    throw new Error("Missing VERYFRONT_TOKEN");
+  }
+  if (!projectId) {
+    throw new Error("Missing AG_UI_EVAL_PROJECT_ID");
+  }
+  const context: LiveEvalApiContext = {
+    apiUrl,
+    authToken,
+    projectId: projectId || null,
+  };
+  const createRunner = input.createRunner ?? createDurableRunCanaryRunner;
+  const { runCase } = createRunner({
+    apiUrl,
+    authToken,
+    agentId: input.agentId,
+    projectId: projectId || null,
+    requestTimeoutMs,
+    keepSuccessfulEvidence,
+  });
+  const testCases = input.createCases({
+    context,
+    requestTimeoutMs,
+  });
+  log(`Durable run canaries -> ${apiUrl}`);
+  log(`Project scope -> ${projectId}`);
+  const results: DurableRunCanaryResult[] = [];
+  for (const testCase of testCases) {
+    log(`\n[run] ${testCase.label}`);
+    const result = await runCase(testCase);
+    results.push(result);
+    log(`[${result.status}] ${result.id}: ${result.details}`);
+  }
+  const summary = {
+    passed: results.filter((result) => result.status === "pass").length,
+    failed: results.filter((result) => result.status === "fail").length,
+  };
+  await mkdir(dirname(reportPath), { recursive: true });
+  await writeFile(
+    reportPath,
+    JSON.stringify(
+      {
+        generatedAt: new Date().toISOString(),
+        apiUrl,
+        projectId,
+        results,
+        summary,
+      },
+      null,
+      2,
+    ),
+  );
+  log("\nSummary");
+  log(`passed: ${summary.passed}`);
+  log(`failed: ${summary.failed}`);
+  log(`report: ${reportPath}`);
+  return summary.failed > 0 ? 1 : 0;
+}

package/src/src/agent/testing/durable-run-canaries/index.ts CHANGED Viewed

@@ -1,3 +1,8 @@
+export {
+  type DurableRunCanaryCliCaseFactoryInput,
+  runDurableRunCanaryCli,
+  type RunDurableRunCanaryCliInput,
+} from "./cli-runner.js";
 export {
   DEFAULT_DURABLE_RUN_CANARY_TIMEOUT_MS,
   type DurableRunCanaryEnvironment,

package/src/src/agent/testing/index.ts CHANGED Viewed

@@ -27,6 +27,7 @@ export {
   type DurableRunCanaryApiClient,
   type DurableRunCanaryApiConfig,
   type DurableRunCanaryCase,
+  type DurableRunCanaryCliCaseFactoryInput,
   type DurableRunCanaryCreateRootRunInput,
   type DurableRunCanaryEnvironment,
   type DurableRunCanaryMessage,
@@ -41,6 +42,8 @@ export {
   getDurableRunCanaryMessageSchema,
   parseDurableRunCanaryRunSummary,
   resolveDurableRunCanaryEnvironment,
+  runDurableRunCanaryCli,
+  type RunDurableRunCanaryCliInput,
   stringifyUnknown,
 } from "./durable-run-canaries/index.js";
@@ -86,6 +89,8 @@ export {
   type LiveEvalCaseSelectionInput,
   type LiveEvalCaseSurface,
   type LiveEvalCaseTagRule,
+  type LiveEvalCliCaseFactoryInput,
+  type LiveEvalCliCaseGroups,
   type LiveEvalContext,
   type LiveEvalConversationInput,
   type LiveEvalCreateConversationInput,
@@ -112,6 +117,8 @@ export {
   printRuntimeConfidencePreflight,
   resolveLiveEvalEnvironment,
   resolveLiveEvalRequestedCaseIds,
+  runLiveEvalCli,
+  type RunLiveEvalCliInput,
   type RuntimeConfidencePreflightResult,
   type RuntimePerformanceSummary,
   selectLiveEvalCases,

package/src/src/agent/testing/live-evals/cli-runner.ts ADDED Viewed

@@ -0,0 +1,234 @@
+import { mkdir, writeFile } from "node:fs/promises";
+import { dirname, resolve } from "node:path";
+import { cwd as getProcessCwd } from "node:process";
+import { buildRuntimePerformanceSummary, type LiveEvalRuntime } from "./performance.js";
+import {
+  buildLiveEvalCaseTagSummary,
+  buildLiveEvalRuntimeSummary,
+  buildLiveEvalStatusSummary,
+  resolveLiveEvalRequestedCaseIds,
+  selectLiveEvalCases,
+} from "./report.js";
+import {
+  containsSkillLoad,
+  countStepStartedEvents,
+  createLiveEvalCaseSupport,
+  hasFinished,
+  type LiveEvalCase,
+  type LiveEvalRunnerConfig,
+} from "./runner.js";
+import { getLiveEvalProjectFile, type LiveEvalApiContext } from "./api-client.js";
+import { resolveLiveEvalEnvironment } from "./environment.js";
+import type { LiveEvalResultRecord } from "./result.js";
+type EnvRecord = Record<string, string | undefined>;
+export interface LiveEvalCliCaseGroups {
+  readOnlyCases: LiveEvalCase[];
+  writeCases: LiveEvalCase[];
+  experimentalWriteCases: LiveEvalCase[];
+}
+export interface LiveEvalCliCaseFactoryInput {
+  authToken: string;
+  endpoint: string;
+  projectId: string | null;
+  branchId: string | null;
+  model: string | null;
+  requestTimeoutMs: number;
+  enableLlmJudge: boolean;
+  hasFinished: typeof hasFinished;
+  containsSkillLoad: typeof containsSkillLoad;
+  countStepStartedEvents: typeof countStepStartedEvents;
+  verifyFileExists: ReturnType<typeof createLiveEvalCaseSupport>["verifyFileExists"];
+  withJudge: ReturnType<typeof createLiveEvalCaseSupport>["withJudge"];
+  judgeLlm: ReturnType<typeof createLiveEvalCaseSupport>["judgeLlm"];
+}
+export interface RunLiveEvalCliInput {
+  env: EnvRecord;
+  caseSets: Record<string, readonly string[]>;
+  createCases: (input: LiveEvalCliCaseFactoryInput) => LiveEvalCliCaseGroups;
+  runtimes?: readonly LiveEvalRuntime[];
+  cwd?: string;
+  log?: (message: string) => void;
+  error?: (message: string) => void;
+  createCaseSupport?: (
+    config: LiveEvalRunnerConfig,
+  ) => ReturnType<typeof createLiveEvalCaseSupport>;
+}
+function splitCsvEnv(value: string | undefined): Set<string> {
+  return new Set(
+    (value ?? "")
+      .split(",")
+      .map((entry) => entry.trim())
+      .filter((entry) => entry.length > 0),
+  );
+}
+function createTimestampedReportPath(input: {
+  cwd: string;
+  directory: string;
+}): string {
+  return resolve(
+    input.cwd,
+    ".omx/logs",
+    input.directory,
+    `${new Date().toISOString().replaceAll(":", "-").replaceAll(".", "-")}.json`,
+  );
+}
+export async function runLiveEvalCli(input: RunLiveEvalCliInput): Promise<number> {
+  const log = input.log ?? console.log;
+  const error = input.error ?? console.error;
+  const cwd = input.cwd ?? getProcessCwd();
+  const { endpoint, authToken, apiUrl, projectId, branchId, model } = resolveLiveEvalEnvironment(
+    input.env,
+  );
+  const requestedRuntimeSelection = input.runtimes ?? ["framework"];
+  const runWriteEvals = input.env.AG_UI_EVAL_WRITE === "1";
+  const runExperimentalWriteEvals = input.env.AG_UI_EVAL_EXPERIMENTAL === "1";
+  const requestTimeoutMs = Number(input.env.AG_UI_EVAL_TIMEOUT_MS ?? "240000");
+  const progressLogIntervalMs = Number(input.env.AG_UI_EVAL_PROGRESS_MS ?? "15000");
+  const reportPath = input.env.AG_UI_EVAL_REPORT_PATH ??
+    createTimestampedReportPath({ cwd, directory: "ag-ui-live-evals" });
+  const requestedCaseIds = splitCsvEnv(input.env.AG_UI_EVAL_CASES);
+  const requestedCaseTags = splitCsvEnv(input.env.AG_UI_EVAL_TAGS);
+  const requestedCaseSetId = input.env.AG_UI_EVAL_CASE_SET?.trim() || null;
+  const enableLlmJudge = input.env.AG_UI_EVAL_LLM_JUDGE === "1";
+  const apiContext: LiveEvalApiContext = {
+    apiUrl,
+    authToken,
+    projectId: projectId ?? null,
+  };
+  const createCaseSupport = input.createCaseSupport ?? createLiveEvalCaseSupport;
+  const { judgeLlm, runEval, verifyFileExists, withJudge } = createCaseSupport({
+    endpoint,
+    authToken,
+    apiUrl,
+    projectId: projectId ?? null,
+    branchId: branchId ?? null,
+    model: model ?? null,
+    requestTimeoutMs,
+    progressLogIntervalMs,
+    enableLlmJudge,
+    readProjectFile: (readerInput) => getLiveEvalProjectFile(apiContext, readerInput),
+  });
+  const { readOnlyCases, writeCases, experimentalWriteCases } = input.createCases({
+    authToken,
+    endpoint,
+    projectId: projectId ?? null,
+    branchId: branchId ?? null,
+    model: model ?? null,
+    requestTimeoutMs,
+    enableLlmJudge,
+    hasFinished,
+    containsSkillLoad,
+    countStepStartedEvents,
+    verifyFileExists,
+    withJudge,
+    judgeLlm,
+  });
+  if (authToken.length === 0) {
+    error("Missing VERYFRONT_TOKEN");
+    return 1;
+  }
+  log(`AG-UI live evals -> ${endpoint}`);
+  log(`Veryfront API -> ${apiUrl}`);
+  log(`Project scope -> ${projectId ?? "none"}`);
+  log(`Runtime -> ${requestedRuntimeSelection.join(", ")}`);
+  log(`Write evals -> ${runWriteEvals ? "enabled" : "disabled"}`);
+  log(`Experimental evals -> ${runExperimentalWriteEvals ? "enabled" : "disabled"}`);
+  log(`Case set -> ${requestedCaseSetId ?? "none"}`);
+  log(`Case tags -> ${requestedCaseTags.size > 0 ? [...requestedCaseTags].join(", ") : "none"}`);
+  const allCases = [...readOnlyCases, ...writeCases, ...experimentalWriteCases];
+  const resolvedRequestedCaseIds = resolveLiveEvalRequestedCaseIds({
+    caseSets: input.caseSets,
+    requestedCaseIds,
+    requestedCaseSetId,
+  });
+  const cases = selectLiveEvalCases({
+    allCases,
+    readOnlyCases,
+    writeCases,
+    experimentalWriteCases,
+    requestedCaseIds: resolvedRequestedCaseIds,
+    requestedCaseTags,
+    runWriteEvals,
+    runExperimentalWriteEvals,
+  });
+  const selectedCaseTagSummary = buildLiveEvalCaseTagSummary(cases);
+  if (cases.length === 0) {
+    error("No eval cases selected.");
+    return 1;
+  }
+  const results: LiveEvalResultRecord[] = [];
+  for (const runtime of requestedRuntimeSelection) {
+    log(`\n[runtime] ${runtime}`);
+    for (const testCase of cases) {
+      log(`\n[run] ${runtime} :: ${testCase.label}`);
+      const result = await runEval(testCase, runtime);
+      results.push(result);
+      log(`[${runtime}] [${result.status}] ${result.details}`);
+    }
+  }
+  const summary = buildLiveEvalStatusSummary(results);
+  const runtimeSummary = buildLiveEvalRuntimeSummary(requestedRuntimeSelection, results);
+  const runtimePerformanceSummary = buildRuntimePerformanceSummary(results);
+  log("\nSummary");
+  log(`passed: ${summary.passed}`);
+  log(`failed: ${summary.failed}`);
+  log(`skipped: ${summary.skipped}`);
+  for (const runtime of requestedRuntimeSelection) {
+    const currentRuntimeSummary = runtimeSummary[runtime];
+    log(
+      `${runtime}: passed=${currentRuntimeSummary.passed} failed=${currentRuntimeSummary.failed} skipped=${currentRuntimeSummary.skipped}`,
+    );
+    const performance = runtimePerformanceSummary[runtime];
+    log(
+      `${runtime}: avg=${performance.avgDurationMs}ms p50=${performance.p50DurationMs}ms p95=${performance.p95DurationMs}ms min=${performance.minDurationMs}ms max=${performance.maxDurationMs}ms`,
+    );
+  }
+  await mkdir(dirname(reportPath), { recursive: true });
+  await writeFile(
+    reportPath,
+    JSON.stringify(
+      {
+        generatedAt: new Date().toISOString(),
+        endpoint,
+        apiUrl,
+        projectId: projectId ?? null,
+        runtimes: requestedRuntimeSelection,
+        writeEvals: runWriteEvals,
+        requestedCaseIds: [...resolvedRequestedCaseIds],
+        requestedCaseTags: [...requestedCaseTags],
+        requestedCaseSetId,
+        caseMetadata: Object.fromEntries(
+          cases.map((testCase) => [testCase.id, testCase.metadata ?? { tags: [] }]),
+        ),
+        selectedCaseTagSummary,
+        results,
+        summary,
+        runtimeSummary,
+        runtimePerformanceSummary,
+      },
+      null,
+      2,
+    ),
+  );
+  log(`report: ${reportPath}`);
+  return summary.failed > 0 ? 1 : 0;
+}

package/src/src/agent/testing/live-evals/index.ts CHANGED Viewed

@@ -1,3 +1,9 @@
+export {
+  type LiveEvalCliCaseFactoryInput,
+  type LiveEvalCliCaseGroups,
+  runLiveEvalCli,
+  type RunLiveEvalCliInput,
+} from "./cli-runner.js";
 export {
   DEFAULT_LIVE_EVAL_ENDPOINT,
   type LiveEvalEnvironment,