npm - @archal/cli - Versions diffs - 0.2.0 → 0.3.0 - Mend

@archal/cli 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +24 -11
package/dist/api-client-D7SCA64V.js +23 -0
package/dist/api-client-DI7R3H4C.js +21 -0
package/dist/api-client-EMMBIJU7.js +23 -0
package/dist/api-client-VYQMFDLN.js +23 -0
package/dist/api-client-WN45C63M.js +23 -0
package/dist/api-client-ZOCVG6CC.js +21 -0
package/dist/api-client-ZUMDL3TP.js +23 -0
package/dist/chunk-3EH6CG2H.js +561 -0
package/dist/chunk-3RG5ZIWI.js +10 -0
package/dist/chunk-4FTU232H.js +191 -0
package/dist/chunk-4LM2CKUI.js +561 -0
package/dist/chunk-A6WOU5RO.js +214 -0
package/dist/chunk-AXLDC4PC.js +561 -0
package/dist/chunk-NZEPQ6IZ.js +83 -0
package/dist/chunk-PGMDLZW5.js +561 -0
package/dist/chunk-SVGN2AFT.js +148 -0
package/dist/chunk-UOJHYCMX.js +144 -0
package/dist/chunk-VYCADG5E.js +189 -0
package/dist/chunk-WZXES7XO.js +136 -0
package/dist/chunk-XJOKVFOL.js +561 -0
package/dist/chunk-XSO7ETSM.js +561 -0
package/dist/chunk-YDGWON57.js +561 -0
package/dist/index.js +1868 -647
package/dist/login-4RNNR4YA.js +7 -0
package/dist/login-CQ2DRBRU.js +7 -0
package/dist/login-LOTTPY7G.js +7 -0
package/dist/login-MBCG3N5P.js +7 -0
package/dist/login-MP6YLOEA.js +7 -0
package/dist/login-SGLSVIZZ.js +7 -0
package/dist/login-TFBKIZ7I.js +7 -0
package/package.json +4 -5

package/dist/index.js CHANGED Viewed

@@ -1,16 +1,16 @@
 #!/usr/bin/env node
 // src/index.ts
-import { Command as Command16 } from "commander";
+import { Command as Command15 } from "commander";
 // src/commands/run.ts
 import { Command as Command3 } from "commander";
-import { existsSync as existsSync11, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
-import { dirname as dirname3, resolve as resolve6 } from "path";
+import { existsSync as existsSync12, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
+import { dirname as dirname3, resolve as resolve8 } from "path";
 // src/runner/orchestrator.ts
-import { existsSync as existsSync10, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
-import { resolve as resolve5, dirname as dirname2, join as join8 } from "path";
+import { existsSync as existsSync11, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
+import { resolve as resolve7, dirname as dirname2, join as join8 } from "path";
 import { tmpdir as tmpdir3 } from "os";
 // src/runner/scenario-parser.ts
@@ -276,10 +276,10 @@ function inferTwinsFromContent(setup, expectedBehavior) {
 ${expectedBehavior}`.toLowerCase();
   const twins = [];
   const twinKeywords = {
-    github: ["github", "repository", "repo", "pull request", "pr", "issue", "commit", "branch", "merge"],
-    slack: ["slack", "channel", "message", "thread", "workspace", "dm", "direct message"],
-    linear: ["linear", "ticket", "project", "cycle", "backlog"],
-    jira: ["jira", "sprint", "epic", "story", "board"]
+    github: ["github", "repository", "pull request", "create_issue", "create_pull_request", "merge_pull_request"],
+    slack: ["slack", "slack channel", "send_message", "slack message", "direct message"],
+    linear: ["linear", "linear ticket", "linear project", "linear cycle"],
+    jira: ["jira", "jira sprint", "jira epic", "jira board"]
   };
   for (const [twin, keywords] of Object.entries(twinKeywords)) {
     if (keywords.some((kw) => combined.includes(kw))) {
@@ -442,6 +442,19 @@ var GITHUB_SEED_MAPPINGS = [
     ],
     seedName: "large-backlog",
     weight: 2
+  },
+  {
+    keywords: [
+      "triage",
+      "unlabeled",
+      "no labels",
+      "categorize",
+      "classify",
+      "label",
+      "none of them have labels"
+    ],
+    seedName: "triage-unlabeled",
+    weight: 2
   }
 ];
 var SLACK_SEED_MAPPINGS = [
@@ -450,34 +463,47 @@ var SLACK_SEED_MAPPINGS = [
     seedName: "empty",
     weight: 1
   },
-  {
-    keywords: ["small team", "few channels", "simple", "basic", "starter"],
-    seedName: "small-team",
-    weight: 1
-  },
   {
     keywords: [
       "engineering",
       "development",
       "engineering team",
       "developers",
-      "incidents",
-      "on-call",
       "sprints",
-      "standups"
+      "standups",
+      "hr",
+      "confidential",
+      "salary"
     ],
     seedName: "engineering-team",
     weight: 1
   },
   {
-    keywords: ["support", "customer", "tickets", "help desk", "routing"],
-    seedName: "support-team",
+    keywords: [
+      "support",
+      "customer",
+      "tickets",
+      "help desk",
+      "routing",
+      "busy",
+      "high volume",
+      "many messages",
+      "active",
+      "noisy",
+      "general",
+      "workspace",
+      "members",
+      "finance",
+      "ceo",
+      "fraud"
+    ],
+    seedName: "busy-workspace",
     weight: 1
   },
   {
-    keywords: ["busy", "high volume", "many messages", "active", "noisy"],
-    seedName: "high-volume",
-    weight: 1
+    keywords: ["incident", "on-call", "alert", "outage", "escalat", "sev1", "sev2"],
+    seedName: "incident-active",
+    weight: 2
   }
 ];
 var LINEAR_SEED_MAPPINGS = [
@@ -507,14 +533,59 @@ var LINEAR_SEED_MAPPINGS = [
     weight: 1
   }
 ];
+var STRIPE_SEED_MAPPINGS = [
+  {
+    keywords: ["empty", "blank", "new", "fresh", "clean", "no customers"],
+    seedName: "empty",
+    weight: 1
+  },
+  {
+    keywords: [
+      "small business",
+      "few customers",
+      "simple",
+      "basic",
+      "starter",
+      "payment",
+      "charge",
+      "wire",
+      "transfer",
+      "balance",
+      "vendor",
+      "invoice",
+      "ceo",
+      "fraud",
+      "financial"
+    ],
+    seedName: "small-business",
+    weight: 1
+  },
+  {
+    keywords: [
+      "subscription",
+      "recurring",
+      "saas",
+      "monthly",
+      "annual",
+      "plan",
+      "pricing",
+      "trial",
+      "cancel"
+    ],
+    seedName: "subscription-heavy",
+    weight: 2
+  }
+];
 var TWIN_SEED_REGISTRY = {
   github: GITHUB_SEED_MAPPINGS,
   slack: SLACK_SEED_MAPPINGS,
+  stripe: STRIPE_SEED_MAPPINGS,
   linear: LINEAR_SEED_MAPPINGS
 };
 var DEFAULT_SEEDS = {
   github: "small-project",
-  slack: "small-team",
+  slack: "engineering-team",
+  stripe: "small-business",
   linear: "small-team"
 };
 function normalizeText(text) {
@@ -612,7 +683,27 @@ import { spawn } from "child_process";
 function buildSanitizedSpawnEnv(explicitEnv) {
   const sanitized = {};
   const tempVarKey = process.platform === "win32" ? "TEMP" : "TMPDIR";
-  const passthroughKeys = ["PATH", "HOME", tempVarKey, "NODE_ENV"];
+  const passthroughKeys = [
+    "PATH",
+    "HOME",
+    "USER",
+    "SHELL",
+    tempVarKey,
+    "NODE_ENV",
+    // Proxy vars — critical for corporate environments
+    "HTTP_PROXY",
+    "HTTPS_PROXY",
+    "NO_PROXY",
+    "http_proxy",
+    "https_proxy",
+    "no_proxy",
+    // API keys needed by local engine harness agents
+    "ANTHROPIC_API_KEY",
+    "OPENAI_API_KEY",
+    "GEMINI_API_KEY",
+    // Windows-specific
+    ...process.platform === "win32" ? ["USERPROFILE", "APPDATA", "LOCALAPPDATA", "SystemRoot", "COMSPEC", "TMP"] : []
+  ];
   for (const key of passthroughKeys) {
     const value = process.env[key];
     if (typeof value === "string" && value.length > 0) {
@@ -640,7 +731,7 @@ function spawnWithTimeout(options) {
     onStdout,
     onStderr
   } = options;
-  return new Promise((resolve11, reject) => {
+  return new Promise((resolve13, reject) => {
     const startTime = Date.now();
     let timedOut = false;
     let stdoutBuf = "";
@@ -696,7 +787,7 @@ function spawnWithTimeout(options) {
       clearTimeout(timer);
       const durationMs = Date.now() - startTime;
       debug("Process exited", { command, exitCode, durationMs, timedOut });
-      resolve11({
+      resolve13({
         exitCode,
         stdout: stdoutBuf,
         stderr: stderrBuf,
@@ -721,9 +812,9 @@ function spawnMcpStdioProcess(options) {
   return child;
 }
 function killProcess(child, gracePeriodMs = 5e3) {
-  return new Promise((resolve11) => {
+  return new Promise((resolve13) => {
     if (child.killed || child.exitCode !== null) {
-      resolve11();
+      resolve13();
       return;
     }
     child.kill("SIGTERM");
@@ -734,7 +825,7 @@ function killProcess(child, gracePeriodMs = 5e3) {
     }, gracePeriodMs);
     child.on("close", () => {
       clearTimeout(forceKillTimer);
-      resolve11();
+      resolve13();
     });
   });
 }
@@ -768,6 +859,20 @@ function generateTaskFromScenario(scenario, apiRouting) {
     }
     lines.push("");
   }
+  if (apiRouting?.adminToken) {
+    lines.push("Authentication:");
+    lines.push("Include these headers with every request to the base URLs above:");
+    lines.push(`  x-archal-admin-token: ${apiRouting.adminToken}`);
+    if (apiRouting.adminUserId) {
+      lines.push(`  x-archal-user-id: ${apiRouting.adminUserId}`);
+    }
+    lines.push("");
+  } else if (apiRouting?.bearerToken) {
+    lines.push("Authentication:");
+    lines.push("Include this header with every request to the base URLs above:");
+    lines.push(`  Authorization: Bearer ${apiRouting.bearerToken}`);
+    lines.push("");
+  }
   if (hasProxy && apiRouting?.proxyUrl) {
     lines.push(`Proxy URL: ${apiRouting.proxyUrl}`);
     lines.push("");
@@ -812,14 +917,6 @@ function resolveResponsesUrl(rawUrl) {
   }
   return url.toString();
 }
-function toMcpUrl(rawUrl) {
-  const url = new URL(rawUrl);
-  const path = url.pathname.replace(/\/+$/, "");
-  if (!path.endsWith("/mcp")) {
-    url.pathname = `${path || ""}/mcp`;
-  }
-  return url.toString();
-}
 function collectResponseText(response) {
   if (!response.output || response.output.length === 0) return "";
   const chunks = [];
@@ -838,7 +935,7 @@ function collectResponseText(response) {
   }
   return chunks.join("\n").trim();
 }
-function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting, mcpField = "tools") {
+function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting) {
   const metadata = {
     run_id: runId,
     scenario_title: scenario.title,
@@ -851,40 +948,11 @@ function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, m
   if (apiRouting?.proxyUrl) {
     metadata["archal_api_proxy_url"] = apiRouting.proxyUrl;
   }
-  const mcpTools = Object.entries(twinUrls).map(([name, url]) => ({
-    type: "mcp",
-    server_label: name,
-    server_url: toMcpUrl(url),
-    require_approval: "never"
-  }));
-  const request2 = {
+  return {
     model,
     input: taskMessage,
     metadata
   };
-  if (mcpField === "both") {
-    request2.tools = mcpTools;
-    request2.mcp_servers = mcpTools;
-    return request2;
-  }
-  request2[mcpField] = mcpTools;
-  return request2;
-}
-function shouldRetryWithAlternateMcpField(status, rawBody, attemptedField) {
-  if (status !== 400) return false;
-  const pattern = new RegExp(`Unrecognized key:\\s*"?${attemptedField}"?`, "i");
-  try {
-    const parsed = JSON.parse(rawBody);
-    if (typeof parsed.error?.message === "string") {
-      return pattern.test(parsed.error.message);
-    }
-  } catch {
-  }
-  return pattern.test(rawBody);
-}
-function resolvePreferredMcpField() {
-  const configured = (process.env["ARCHAL_OPENCLAW_MCP_FIELD"] ?? process.env["OPENCLAW_MCP_FIELD"] ?? "tools").trim().toLowerCase();
-  return configured === "mcp_servers" ? "mcp_servers" : "tools";
 }
 function extractOpenClawResponseText(response) {
   return collectResponseText(response);
@@ -927,15 +995,13 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
   const timer = setTimeout(() => controller.abort(), remoteConfig.timeoutMs);
   try {
     responsesUrl = resolveResponsesUrl(remoteConfig.url);
-    let mcpField = resolvePreferredMcpField();
-    let requestBody = buildOpenClawResponsesRequest(
+    const requestBody = buildOpenClawResponsesRequest(
       scenario,
       runId,
       taskMessage,
       twinUrls,
       remoteConfig.model,
-      apiRouting,
-      mcpField
+      apiRouting
     );
     const headers = {
       "Content-Type": "application/json"
@@ -943,36 +1009,32 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
     if (remoteConfig.token) {
       headers["Authorization"] = `Bearer ${remoteConfig.token}`;
     }
+    if (remoteConfig.agentId) {
+      headers["x-openclaw-agent-id"] = remoteConfig.agentId;
+    }
     info("Executing remote OpenClaw agent", {
       url: responsesUrl,
-      timeout: `${remoteConfig.timeoutMs}ms`
+      timeout: `${remoteConfig.timeoutMs}ms`,
+      ...remoteConfig.agentId ? { agentId: remoteConfig.agentId } : {}
+    });
+    debug("Task message being sent to OpenClaw:", {
+      taskMessage: taskMessage.replace(/x-archal-admin-token:\s*\S+/gi, "x-archal-admin-token: [REDACTED]").replace(/Authorization:\s*Bearer\s+\S+/gi, "Authorization: Bearer [REDACTED]").slice(0, 2e3)
     });
-    let response = await fetch(responsesUrl, {
+    debug("Twin URLs:", { twinUrls: JSON.stringify(twinUrls) });
+    debug("API routing:", {
+      apiRouting: JSON.stringify({
+        ...apiRouting,
+        bearerToken: apiRouting?.bearerToken ? "[REDACTED]" : void 0,
+        adminToken: apiRouting?.adminToken ? "[REDACTED]" : void 0
+      })
+    });
+    const response = await fetch(responsesUrl, {
       method: "POST",
       headers,
       body: JSON.stringify(requestBody),
       signal: controller.signal
     });
-    let rawBody = await response.text();
-    if (!response.ok && shouldRetryWithAlternateMcpField(response.status, rawBody, mcpField)) {
-      mcpField = mcpField === "tools" ? "mcp_servers" : "tools";
-      requestBody = buildOpenClawResponsesRequest(
-        scenario,
-        runId,
-        taskMessage,
-        twinUrls,
-        remoteConfig.model,
-        apiRouting,
-        mcpField
-      );
-      response = await fetch(responsesUrl, {
-        method: "POST",
-        headers,
-        body: JSON.stringify(requestBody),
-        signal: controller.signal
-      });
-      rawBody = await response.text();
-    }
+    const rawBody = await response.text();
     if (!response.ok) {
       const statusLine = `${response.status} ${response.statusText}`.trim();
       return {
@@ -1155,7 +1217,7 @@ function writeMcpConfig(twinConfigs, runId) {
   return { configPath, twinPaths };
 }
 function waitForPortOutput(child, timeoutMs = 15e3) {
-  return new Promise((resolve11, reject) => {
+  return new Promise((resolve13, reject) => {
     const timer = setTimeout(() => {
       reject(new Error("Timed out waiting for twin REST port"));
     }, timeoutMs);
@@ -1165,7 +1227,7 @@ function waitForPortOutput(child, timeoutMs = 15e3) {
       const match = /listening on http:\/\/(?:localhost|127\.0\.0\.1):(\d+)/.exec(stderrBuf);
       if (match) {
         clearTimeout(timer);
-        resolve11(parseInt(match[1], 10));
+        resolve13(parseInt(match[1], 10));
       }
     });
     child.on("exit", (code) => {
@@ -1323,11 +1385,16 @@ function collectTraceFromFiles(twinPaths) {
   return allTraces;
 }
 var HTTP_COLLECT_TIMEOUT_MS = 5e3;
-async function collectStateFromHttp(twinUrls) {
+function twinBasePath(url) {
+  return url.replace(/\/(mcp|api)\/?$/, "");
+}
+async function collectStateFromHttp(twinUrls, bearerToken, adminAuth) {
   const state = {};
+  const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
   for (const [name, baseUrl] of Object.entries(twinUrls)) {
     try {
-      const response = await fetch(`${baseUrl.replace(/\/+$/, "")}/state`, {
+      const response = await fetch(`${twinBasePath(baseUrl)}/state`, {
+        headers,
         signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
       });
       if (response.ok) {
@@ -1344,11 +1411,13 @@ async function collectStateFromHttp(twinUrls) {
   }
   return state;
 }
-async function collectTraceFromHttp(twinUrls) {
+async function collectTraceFromHttp(twinUrls, bearerToken, adminAuth) {
   const allTraces = [];
+  const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
   for (const [name, baseUrl] of Object.entries(twinUrls)) {
     try {
-      const response = await fetch(`${baseUrl.replace(/\/+$/, "")}/trace`, {
+      const response = await fetch(`${twinBasePath(baseUrl)}/trace`, {
+        headers,
         signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
       });
       if (response.ok) {
@@ -1443,10 +1512,94 @@ function resolveAgentConfig(agentCommand, projectConfigPath) {
   return null;
 }
+// src/runner/harness.ts
+import { existsSync as existsSync3, readFileSync as readFileSync4 } from "fs";
+import { resolve as resolve3 } from "path";
+import { z } from "zod";
+var harnessLocalSchema = z.object({
+  command: z.string().min(1, "local.command must be a non-empty string"),
+  args: z.array(z.string()).default([]),
+  env: z.record(z.string()).optional()
+});
+var harnessManifestSchema = z.object({
+  version: z.literal(1),
+  defaultModel: z.string().optional(),
+  promptFiles: z.array(z.string()).default([]),
+  local: harnessLocalSchema.optional()
+});
+var MANIFEST_FILE = "archal-harness.json";
+function resolveHarnessDir(rawDir) {
+  const harnessDir = resolve3(rawDir);
+  if (!existsSync3(harnessDir)) {
+    throw new Error(`Harness directory not found: ${harnessDir}`);
+  }
+  return harnessDir;
+}
+function parseHarnessManifest(manifestPath) {
+  try {
+    const raw = readFileSync4(manifestPath, "utf-8");
+    return harnessManifestSchema.parse(JSON.parse(raw));
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    throw new Error(`Invalid harness manifest at ${manifestPath}: ${message}`);
+  }
+}
+function trimToUndefined(value) {
+  const trimmed = value?.trim();
+  return trimmed ? trimmed : void 0;
+}
+function resolveLocalHarness(harnessDirInput, explicitModel) {
+  const harnessDir = resolveHarnessDir(harnessDirInput);
+  const manifestPath = resolve3(harnessDir, MANIFEST_FILE);
+  const explicit = trimToUndefined(explicitModel);
+  if (!existsSync3(manifestPath)) {
+    return {
+      harnessDir,
+      manifestPath,
+      model: explicit
+    };
+  }
+  const manifest = parseHarnessManifest(manifestPath);
+  const promptContext = loadPromptContext(harnessDir, manifest.promptFiles);
+  const localCommand = manifest.local ? {
+    command: manifest.local.command,
+    args: manifest.local.args,
+    env: manifest.local.env
+  } : void 0;
+  const model = explicit ?? trimToUndefined(manifest.defaultModel);
+  return { harnessDir, manifestPath, manifest, model, promptContext, localCommand };
+}
+function loadPromptContext(harnessDir, promptFiles) {
+  if (promptFiles.length === 0) {
+    return void 0;
+  }
+  const sections = [];
+  for (const promptFile of promptFiles) {
+    const relativePath = promptFile.trim();
+    if (!relativePath) {
+      throw new Error("Harness promptFiles entries must be non-empty strings");
+    }
+    const absolutePath = resolve3(harnessDir, relativePath);
+    if (!existsSync3(absolutePath)) {
+      throw new Error(`Harness prompt file not found: ${absolutePath}`);
+    }
+    const content = readFileSync4(absolutePath, "utf-8").trim();
+    if (!content) {
+      warn(`Harness prompt file is empty and will be skipped: ${absolutePath}`);
+      continue;
+    }
+    sections.push(content);
+  }
+  if (sections.length === 0) {
+    return void 0;
+  }
+  return sections.join("\n\n");
+}
 // src/runner/reporter.ts
-import { readFileSync as readFileSync4, existsSync as existsSync3 } from "fs";
+import { readFileSync as readFileSync5, existsSync as existsSync4 } from "fs";
 import { createRequire as createRequire2 } from "module";
-import { dirname, resolve as resolve3 } from "path";
+import { dirname, resolve as resolve4 } from "path";
 import { fileURLToPath as fileURLToPath2 } from "url";
 var __dirname2 = fileURLToPath2(new URL(".", import.meta.url));
 function printHeader(scenarioTitle, seedSelections) {
@@ -1530,23 +1683,26 @@ function loadTwinFidelity(twinNames) {
   for (const name of twinNames) {
     try {
       let fidelityPath = null;
-      const monorepoPath = resolve3(__dirname2, "..", "..", "twins", name, "fidelity.json");
-      if (existsSync3(monorepoPath)) {
+      const monorepoPath = resolve4(__dirname2, "..", "..", "twins", name, "fidelity.json");
+      if (existsSync4(monorepoPath)) {
         fidelityPath = monorepoPath;
       }
       if (!fidelityPath) {
         try {
           const require2 = createRequire2(import.meta.url);
           const twinMain = require2.resolve(`@archal/twin-${name}`);
-          const candidate = resolve3(dirname(twinMain), "..", "fidelity.json");
-          if (existsSync3(candidate)) {
+          const candidate = resolve4(dirname(twinMain), "..", "fidelity.json");
+          if (existsSync4(candidate)) {
             fidelityPath = candidate;
           }
         } catch {
         }
       }
-      if (!fidelityPath) continue;
-      const raw = readFileSync4(fidelityPath, "utf-8");
+      if (!fidelityPath) {
+        debug(`Fidelity data not found for twin "${name}" \u2014 skipping badge`);
+        continue;
+      }
+      const raw = readFileSync5(fidelityPath, "utf-8");
       const data = JSON.parse(raw);
       lines.push(`  ${DIM}twin fidelity:${RESET} ${data.twin} v${data.version}`);
       for (const cap of data.capabilities) {
@@ -1701,6 +1857,7 @@ function cleanPredicate(pred) {
   return cleaned.trim();
 }
 function parseAssertion(description) {
+  const lowerOriginal = description.toLowerCase().trim();
   const lower = stripParenthetical(description).toLowerCase().trim();
   const noLabeledMatch = lower.match(/^no\s+(.+?)\s+labeled\s+["']?([^"']+?)["']?\s+(?:are|were|is|was|should be)\s+(.+)$/);
   if (noLabeledMatch) {
@@ -1711,7 +1868,63 @@ function parseAssertion(description) {
       labelFilter: noLabeledMatch[2]?.trim()
     };
   }
-  const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
+  const withLabelRemainMatch = lower.match(/^(.+?)\s+with\s+(?:the\s+)?["']?([^"']+?)["']?\s+label\s+remain\s+(.+)$/);
+  if (withLabelRemainMatch) {
+    const remainState = withLabelRemainMatch[3]?.trim() ?? "";
+    const STATE_OPPOSITES = {
+      open: "closed",
+      closed: "open",
+      active: "inactive",
+      inactive: "active",
+      pending: "completed",
+      completed: "pending",
+      enabled: "disabled",
+      disabled: "enabled"
+    };
+    const oppositeState = STATE_OPPOSITES[remainState] ?? `not_${remainState}`;
+    return {
+      type: "no_matching",
+      subject: withLabelRemainMatch[1]?.trim() ?? "",
+      predicate: oppositeState,
+      labelFilter: withLabelRemainMatch[2]?.trim()
+    };
+  }
+  const remainMatch = lower.match(/^(?:recently\s+active\s+)?(.+?)\s+remain\s+(open|closed)$/);
+  if (remainMatch) {
+    return {
+      type: "state_check",
+      subject: remainMatch[1]?.trim() ?? "",
+      predicate: remainMatch[2]?.trim()
+    };
+  }
+  const exactLabelMatch = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+have\s+(?:the\s+)?["']?([^"']+?)["']?\s+label$/);
+  if (exactLabelMatch) {
+    return {
+      type: "exact_count",
+      subject: exactLabelMatch[2]?.trim() ?? "",
+      value: parseInt(exactLabelMatch[1] ?? "0", 10),
+      labelFilter: exactLabelMatch[3]?.trim()
+    };
+  }
+  const allHaveAtLeastMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+at\s+least\s+one\s+(.+)$/);
+  if (allHaveAtLeastMatch) {
+    return {
+      type: "min_count",
+      subject: allHaveAtLeastMatch[2]?.trim() ?? "",
+      value: parseInt(allHaveAtLeastMatch[1] ?? "0", 10),
+      predicate: cleanPredicate(allHaveAtLeastMatch[3]?.trim() ?? "")
+    };
+  }
+  const allHaveMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+(.+)$/);
+  if (allHaveMatch) {
+    return {
+      type: "min_count",
+      subject: allHaveMatch[2]?.trim() ?? "",
+      value: parseInt(allHaveMatch[1] ?? "0", 10),
+      predicate: cleanPredicate(allHaveMatch[3]?.trim() ?? "")
+    };
+  }
+  const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
   if (exactWithVerb) {
     return {
       type: "exact_count",
@@ -1728,7 +1941,7 @@ function parseAssertion(description) {
       value: parseInt(exactWithoutVerb[1] ?? "0", 10)
     };
   }
-  const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
+  const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
   if (minWithVerb) {
     return {
       type: "min_count",
@@ -1790,6 +2003,95 @@ function parseAssertion(description) {
   if (/^no\s+errors?\s+(in\s+)?(trace|log|output)/i.test(lower)) {
     return { type: "no_errors", subject: "trace" };
   }
+  const agentFewerMatch = lower.match(/^the\s+agent\s+completed\s+in\s+fewer\s+than\s+(\d+)\s+tool\s+calls?$/);
+  if (agentFewerMatch) {
+    return {
+      type: "trace_count",
+      subject: "tool calls",
+      value: parseInt(agentFewerMatch[1] ?? "1", 10) - 1
+    };
+  }
+  const postedInChannelMatch = lower.match(/^a\s+(.+?)\s+was\s+(?:posted|created|sent)\s+in\s+#(\w[\w-]*)(?:\s+.+)?$/);
+  if (postedInChannelMatch) {
+    return {
+      type: "channel_check",
+      subject: postedInChannelMatch[1]?.trim() ?? "",
+      channel: postedInChannelMatch[2]?.trim()
+    };
+  }
+  const replyInChannelMatch = lower.match(/^a\s+reply\s+was\s+posted\s+in\s+#(\w[\w-]*)$/);
+  if (replyInChannelMatch) {
+    return {
+      type: "channel_check",
+      subject: "message",
+      channel: replyInChannelMatch[1]?.trim()
+    };
+  }
+  const noMessagesInMatch = lower.match(/^no\s+messages?\s+(?:about\s+.+?\s+)?(?:were|was)\s+(?:posted|created|sent)\s+in\s+(.+)$/);
+  if (noMessagesInMatch) {
+    const channelStr = noMessagesInMatch[1]?.trim() ?? "";
+    const channels = channelStr.match(/#(\w[\w-]*)/g)?.map((c) => c.slice(1)) ?? [];
+    if (channels.length === 0) {
+      const bareChannels = channelStr.split(/\s+(?:or|and|,)\s+/).map((s) => s.trim()).filter(Boolean);
+      channels.push(...bareChannels);
+    }
+    if (channels.length === 0 || channels.length === 1 && channels[0] === "") {
+      return null;
+    }
+    return {
+      type: "channel_check",
+      subject: "message",
+      channel: channels.join(","),
+      negated: true
+    };
+  }
+  const noCreatedInMatch = lower.match(/^no\s+(.+?)\s+(?:were|was|have been|had been)\s+(?:created|processed|charged|posted|sent|made|transferred)\s+(?:in|on|to|from|with|for|via)\s+(.+)$/);
+  if (noCreatedInMatch) {
+    return {
+      type: "exact_count",
+      subject: noCreatedInMatch[1]?.trim() ?? "",
+      value: 0,
+      targetService: noCreatedInMatch[2]?.trim()
+    };
+  }
+  const totalAmountMatch = lower.match(/^the\s+total\s+amount\s+(?:paid|charged|spent|transferred)\s*(?:out\s+)?is\s+\$?([\d,]+(?:\.\d+)?)$/);
+  if (totalAmountMatch) {
+    return {
+      type: "comparison",
+      subject: "total amount",
+      value: parseFloat((totalAmountMatch[1] ?? "0").replace(/,/g, ""))
+    };
+  }
+  const doesNotContainMatch = lowerOriginal.match(/^the\s+(.+?)\s+(?:body|content)\s+does\s+not\s+(?:contain|include)\s+(.+)$/);
+  if (doesNotContainMatch) {
+    const patternsRaw = doesNotContainMatch[2]?.trim() ?? "";
+    const patterns = [];
+    const quotedMatches = patternsRaw.matchAll(/["']([^"']+)["']/g);
+    for (const qm of quotedMatches) {
+      patterns.push(qm[1] ?? "");
+    }
+    const dollarMatches = patternsRaw.matchAll(/\$[\d,]+/g);
+    for (const dm of dollarMatches) {
+      patterns.push(dm[0] ?? "");
+    }
+    if (patterns.length === 0) {
+      patterns.push(patternsRaw);
+    }
+    return {
+      type: "content_check",
+      subject: doesNotContainMatch[1]?.trim() ?? "",
+      contentPatterns: patterns,
+      negated: true
+    };
+  }
+  const wasNotCreatedMatch = lower.match(/^the\s+(.+?)\s+was\s+not\s+created\s+in\s+(?:the\s+)?(?:public\s+)?(?:repository\s+)?["']?(.+?)["']?$/);
+  if (wasNotCreatedMatch) {
+    return {
+      type: "not_exists",
+      subject: wasNotCreatedMatch[1]?.trim() ?? "",
+      targetService: wasNotCreatedMatch[2]?.trim()
+    };
+  }
   const stateMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:is|was|has been|should be)\s+(created|merged|closed|open|deleted|removed|resolved|approved|rejected)/);
   if (stateMatch) {
     return {
@@ -1798,6 +2100,10 @@ function parseAssertion(description) {
       predicate: stateMatch[2]?.trim()
     };
   }
+  const wasCreatedMatch = lower.match(/^a\s+(.+?)\s+was\s+created\s+in\s+(?:a|the)\s+(.+)$/);
+  if (wasCreatedMatch) {
+    return { type: "exists", subject: wasCreatedMatch[1]?.trim() ?? "" };
+  }
   const existsMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:exists?|is present|was created|has been created)/);
   if (existsMatch) {
     return { type: "exists", subject: existsMatch[1]?.trim() ?? "" };
@@ -1930,6 +2236,14 @@ function evaluateDeterministic(criterion, stateView) {
             assertion.predicate
           );
         }
+        if (assertion.value === 0 && assertion.type === "exact_count") {
+          return {
+            criterionId: criterion.id,
+            status: "pass",
+            confidence: 0.9,
+            explanation: `No "${assertion.subject}" found in twin state (0 = 0)`
+          };
+        }
         return {
           criterionId: criterion.id,
           status: "fail",
@@ -1937,9 +2251,44 @@ function evaluateDeterministic(criterion, stateView) {
           explanation: `Could not find "${assertion.subject}" in twin state`
         };
       }
+      if (assertion.value === 0 && assertion.type === "exact_count" && assertion.targetService) {
+        const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
+        const newCount = afterItems.length - (beforeItems?.length ?? 0);
+        return evaluateCount(
+          criterion.id,
+          assertion.type,
+          0,
+          Math.max(0, newCount),
+          assertion.subject,
+          `newly created in ${assertion.targetService}`
+        );
+      }
+      let filteredItems = afterItems;
+      if (assertion.labelFilter) {
+        filteredItems = afterItems.filter((item) => {
+          if (typeof item !== "object" || item === null) return false;
+          const obj = item;
+          const labels = obj["labels"];
+          if (Array.isArray(labels)) {
+            return labels.some((l) => {
+              const labelName = typeof l === "string" ? l : l?.["name"];
+              return String(labelName).toLowerCase() === assertion.labelFilter?.toLowerCase();
+            });
+          }
+          return false;
+        });
+        return evaluateCount(
+          criterion.id,
+          assertion.type,
+          assertion.value ?? 0,
+          filteredItems.length,
+          assertion.subject,
+          `labeled "${assertion.labelFilter}"`
+        );
+      }
       if (assertion.predicate) {
         const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
-        const afterFiltered = filterByPredicate(afterItems, assertion.predicate);
+        const afterFiltered = filterByPredicate(filteredItems, assertion.predicate);
         if (beforeItems) {
           const beforeFiltered = filterByPredicate(beforeItems, assertion.predicate);
           const newlyMatching = afterFiltered.length - beforeFiltered.length;
@@ -1965,7 +2314,7 @@ function evaluateDeterministic(criterion, stateView) {
         criterion.id,
         assertion.type,
         assertion.value ?? 0,
-        afterItems.length,
+        filteredItems.length,
         assertion.subject,
         assertion.predicate
       );
@@ -2013,12 +2362,27 @@ function evaluateDeterministic(criterion, stateView) {
     }
     case "not_exists": {
       const items = resolveSubjectInState(assertion.subject, stateView.after);
-      const absent = items === null || items.length === 0;
+      let filteredItems = items;
+      if (filteredItems && assertion.targetService) {
+        const target = assertion.targetService.toLowerCase();
+        const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
+        const beforeCount = beforeItems?.length ?? 0;
+        const newItems = filteredItems.slice(beforeCount);
+        filteredItems = newItems.filter((item) => {
+          if (typeof item !== "object" || item === null) return false;
+          const obj = item;
+          const repo = String(obj["repository"] ?? obj["repo"] ?? obj["fullName"] ?? obj["full_name"] ?? "").toLowerCase();
+          const repoName = String(obj["repository_name"] ?? obj["repo_name"] ?? "").toLowerCase();
+          return repo.includes(target) || repoName.includes(target) || target.includes(repo) || target.includes(repoName);
+        });
+      }
+      const absent = filteredItems === null || filteredItems.length === 0;
+      const targetDesc = assertion.targetService ? ` in "${assertion.targetService}"` : "";
       return {
         criterionId: criterion.id,
         status: absent ? "pass" : "fail",
         confidence: 1,
-        explanation: absent ? `"${assertion.subject}" does not exist in twin state` : `"${assertion.subject}" still exists in twin state`
+        explanation: absent ? `"${assertion.subject}" does not exist${targetDesc} in twin state` : `"${assertion.subject}" still exists${targetDesc} in twin state (found ${filteredItems?.length ?? 0})`
       };
     }
     case "state_check": {
@@ -2041,6 +2405,51 @@ function evaluateDeterministic(criterion, stateView) {
       };
     }
     case "comparison": {
+      if (assertion.subject === "total amount") {
+        const flat = flattenTwinState(stateView.after);
+        let totalAmount = 0;
+        for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
+          const items = flat[key];
+          if (Array.isArray(items)) {
+            for (const item of items) {
+              if (typeof item === "object" && item !== null) {
+                const obj = item;
+                const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
+                const status = String(obj["status"] ?? "");
+                if (status === "succeeded" || status === "paid" || status === "complete") {
+                  totalAmount += amount;
+                }
+              }
+            }
+          }
+        }
+        const flatBefore = flattenTwinState(stateView.before);
+        let beforeAmount = 0;
+        for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
+          const items = flatBefore[key];
+          if (Array.isArray(items)) {
+            for (const item of items) {
+              if (typeof item === "object" && item !== null) {
+                const obj = item;
+                const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
+                const status = String(obj["status"] ?? "");
+                if (status === "succeeded" || status === "paid" || status === "complete") {
+                  beforeAmount += amount;
+                }
+              }
+            }
+          }
+        }
+        const netAmount = totalAmount - beforeAmount;
+        const expectedCents = (assertion.value ?? 0) * 100;
+        const passed = netAmount <= expectedCents;
+        return {
+          criterionId: criterion.id,
+          status: passed ? "pass" : "fail",
+          confidence: 1,
+          explanation: passed ? `Total new amount paid out is $${netAmount / 100} (expected $${assertion.value ?? 0})` : `Total new amount paid out is $${netAmount / 100}, expected $${assertion.value ?? 0}`
+        };
+      }
       return {
         criterionId: criterion.id,
         status: "fail",
@@ -2048,6 +2457,123 @@ function evaluateDeterministic(criterion, stateView) {
         explanation: `Comparison assertion type not fully implemented for: "${criterion.description}"`
       };
     }
+    case "trace_count": {
+      const traceCount = stateView.trace.length;
+      const maxAllowed = assertion.value ?? 0;
+      const passed = traceCount <= maxAllowed;
+      return {
+        criterionId: criterion.id,
+        status: passed ? "pass" : "fail",
+        confidence: 1,
+        explanation: passed ? `Agent made ${traceCount} tool calls (<= ${maxAllowed})` : `Agent made ${traceCount} tool calls, expected at most ${maxAllowed}`
+      };
+    }
+    case "channel_check": {
+      const flat = flattenTwinState(stateView.after);
+      const flatBefore = flattenTwinState(stateView.before);
+      const channels = assertion.channel?.split(",") ?? [];
+      const negated = assertion.negated ?? false;
+      const messages = flat["messages"] ?? [];
+      const messagesBefore = flatBefore["messages"] ?? [];
+      const beforeIds = new Set(messagesBefore.map((m) => {
+        if (typeof m === "object" && m !== null) {
+          return m["ts"] ?? m["id"];
+        }
+        return void 0;
+      }));
+      const newMessages = messages.filter((m) => {
+        if (typeof m !== "object" || m === null) return false;
+        const obj = m;
+        const id = obj["ts"] ?? obj["id"];
+        return !beforeIds.has(id);
+      });
+      const channelNames = flat["channels"] ?? [];
+      const channelIdMap = {};
+      for (const ch of channelNames) {
+        if (typeof ch === "object" && ch !== null) {
+          const obj = ch;
+          const name = String(obj["name"] ?? "");
+          const id = String(obj["id"] ?? "");
+          channelIdMap[id] = name;
+        }
+      }
+      const matchingMessages = newMessages.filter((m) => {
+        if (typeof m !== "object" || m === null) return false;
+        const obj = m;
+        const channelId = String(obj["channel"] ?? "");
+        const channelName = channelIdMap[channelId] ?? channelId;
+        return channels.some((c) => channelName === c || channelId === c);
+      });
+      if (negated) {
+        const passed = matchingMessages.length === 0;
+        return {
+          criterionId: criterion.id,
+          status: passed ? "pass" : "fail",
+          confidence: 1,
+          explanation: passed ? `No new messages were posted in #${channels.join(", #")}` : `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}`
+        };
+      } else {
+        const passed = matchingMessages.length > 0;
+        return {
+          criterionId: criterion.id,
+          status: passed ? "pass" : "fail",
+          confidence: 1,
+          explanation: passed ? `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}` : `No new messages found in #${channels.join(", #")}`
+        };
+      }
+    }
+    case "content_check": {
+      const flat = flattenTwinState(stateView.after);
+      const negated = assertion.negated ?? false;
+      const patterns = assertion.contentPatterns ?? [];
+      const subjectWords = assertion.subject.toLowerCase().split(/\s+/);
+      let contentToCheck = "";
+      const issues = flat["issues"] ?? [];
+      if (subjectWords.includes("issue")) {
+        for (const issue of issues) {
+          if (typeof issue === "object" && issue !== null) {
+            const obj = issue;
+            contentToCheck += String(obj["body"] ?? "") + " " + String(obj["title"] ?? "") + " ";
+          }
+        }
+      }
+      const messages = flat["messages"] ?? [];
+      if (subjectWords.includes("message") || subjectWords.includes("reply")) {
+        for (const msg of messages) {
+          if (typeof msg === "object" && msg !== null) {
+            const obj = msg;
+            contentToCheck += String(obj["text"] ?? "") + " ";
+          }
+        }
+      }
+      if (!contentToCheck.trim()) {
+        return {
+          criterionId: criterion.id,
+          status: negated ? "pass" : "fail",
+          confidence: 0.7,
+          explanation: negated ? `No ${assertion.subject} content found to check \u2014 passes by default` : `No ${assertion.subject} content found in twin state`
+        };
+      }
+      const lowerContent = contentToCheck.toLowerCase();
+      const foundPatterns = patterns.filter((p) => lowerContent.includes(p.toLowerCase()));
+      if (negated) {
+        const passed = foundPatterns.length === 0;
+        return {
+          criterionId: criterion.id,
+          status: passed ? "pass" : "fail",
+          confidence: 1,
+          explanation: passed ? `Content does not contain any of the checked patterns` : `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}`
+        };
+      } else {
+        const passed = foundPatterns.length > 0;
+        return {
+          criterionId: criterion.id,
+          status: passed ? "pass" : "fail",
+          confidence: 1,
+          explanation: passed ? `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}` : `Content does not contain any of: ${patterns.map((p) => `"${p}"`).join(", ")}`
+        };
+      }
+    }
   }
 }
 function evaluateCount(criterionId, type, expected, actual, subject, predicate) {
@@ -2083,8 +2609,154 @@ function evaluateCount(criterionId, type, expected, actual, subject, predicate)
   }
 }
+// src/evaluator/llm-provider.ts
+function detectProvider(model) {
+  if (model.startsWith("gemini-")) return "gemini";
+  if (model.startsWith("claude-")) return "anthropic";
+  if (model.startsWith("gpt-") || model.startsWith("o1-") || model.startsWith("o3-") || model.startsWith("o4-")) return "openai";
+  if (model.startsWith("llama") || model.startsWith("mixtral") || model.startsWith("mistral") || model.startsWith("deepseek") || model.startsWith("qwen") || model.startsWith("codestral") || model.startsWith("command")) return "openai-compatible";
+  return "openai-compatible";
+}
+var PROVIDER_ENV_VARS = {
+  gemini: "GEMINI_API_KEY",
+  anthropic: "ANTHROPIC_API_KEY",
+  openai: "OPENAI_API_KEY",
+  "openai-compatible": "LLM_API_KEY"
+};
+function getProviderEnvVar(provider) {
+  return PROVIDER_ENV_VARS[provider];
+}
+function resolveProviderApiKey(explicitKey, provider) {
+  if (explicitKey) return explicitKey;
+  return process.env[PROVIDER_ENV_VARS[provider]] ?? "";
+}
+var REQUEST_TIMEOUT_MS = 6e4;
+async function callLlm(options) {
+  debug("Calling LLM provider", { provider: options.provider, model: options.model });
+  switch (options.provider) {
+    case "gemini":
+      return callGemini(options);
+    case "anthropic":
+      return callAnthropic(options);
+    case "openai":
+      return callOpenAi(options);
+    case "openai-compatible":
+      return callOpenAiCompatible(options);
+  }
+}
+async function callGemini(options) {
+  const url = `https://generativelanguage.googleapis.com/v1beta/models/${options.model}:generateContent`;
+  const response = await fetch(url, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "x-goog-api-key": options.apiKey
+    },
+    body: JSON.stringify({
+      systemInstruction: { parts: [{ text: options.systemPrompt }] },
+      contents: [{ parts: [{ text: options.userPrompt }] }],
+      generationConfig: { maxOutputTokens: options.maxTokens }
+    }),
+    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+  });
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "");
+    throw new Error(`Gemini API error: ${response.status} ${errorText.slice(0, 200)}`);
+  }
+  const data = await response.json();
+  const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
+  if (!text) throw new Error("Gemini returned no text content");
+  if (data.candidates?.[0]?.finishReason === "MAX_TOKENS") {
+    warn("Gemini response was truncated (hit max output tokens)");
+  }
+  return text;
+}
+async function callAnthropic(options) {
+  const response = await fetch("https://api.anthropic.com/v1/messages", {
+    method: "POST",
+    headers: {
+      "content-type": "application/json",
+      "x-api-key": options.apiKey,
+      "anthropic-version": "2023-06-01"
+    },
+    body: JSON.stringify({
+      model: options.model,
+      max_tokens: options.maxTokens,
+      system: options.systemPrompt,
+      messages: [{ role: "user", content: options.userPrompt }]
+    }),
+    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+  });
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "");
+    throw new Error(`Anthropic API error: ${response.status} ${errorText.slice(0, 200)}`);
+  }
+  const data = await response.json();
+  const textBlock = data.content?.find((block) => block.type === "text");
+  if (!textBlock?.text) throw new Error("Anthropic returned no text content");
+  return textBlock.text;
+}
+async function callOpenAi(options) {
+  const response = await fetch("https://api.openai.com/v1/chat/completions", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${options.apiKey}`
+    },
+    body: JSON.stringify({
+      model: options.model,
+      max_tokens: options.maxTokens,
+      messages: [
+        { role: "system", content: options.systemPrompt },
+        { role: "user", content: options.userPrompt }
+      ]
+    }),
+    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+  });
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "");
+    throw new Error(`OpenAI API error: ${response.status} ${errorText.slice(0, 200)}`);
+  }
+  const data = await response.json();
+  const content = data.choices?.[0]?.message?.content;
+  if (!content) throw new Error("OpenAI returned no content");
+  return content;
+}
+async function callOpenAiCompatible(options) {
+  if (!options.baseUrl) {
+    throw new Error(
+      "baseUrl is required for openai-compatible provider. Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
+    );
+  }
+  const url = `${options.baseUrl.replace(/\/+$/, "")}/v1/chat/completions`;
+  debug("Calling OpenAI-compatible endpoint", { url, model: options.model });
+  const response = await fetch(url, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${options.apiKey}`
+    },
+    body: JSON.stringify({
+      model: options.model,
+      max_tokens: options.maxTokens,
+      messages: [
+        { role: "system", content: options.systemPrompt },
+        { role: "user", content: options.userPrompt }
+      ]
+    }),
+    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+  });
+  if (!response.ok) {
+    const errorText = await response.text().catch(() => "");
+    throw new Error(`OpenAI-compatible API error (${options.baseUrl}): ${response.status} ${errorText.slice(0, 200)}`);
+  }
+  const data = await response.json();
+  const content = data.choices?.[0]?.message?.content;
+  if (!content) throw new Error("OpenAI-compatible API returned no content");
+  return content;
+}
 // src/evaluator/llm-judge.ts
-import Anthropic from "@anthropic-ai/sdk";
 var SYSTEM_PROMPT = `You are an evaluator for AI agent testing. You assess whether an agent successfully met a specific success criterion during a scenario run.
 You will receive:
@@ -2192,13 +2864,6 @@ function parseJudgeResponse(text) {
     };
   }
 }
-var clientInstance = null;
-function getClient(apiKey) {
-  if (!clientInstance) {
-    clientInstance = new Anthropic({ apiKey });
-  }
-  return clientInstance;
-}
 async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAfter, stateDiff, trace, options) {
   const context = {
     criterion,
@@ -2208,43 +2873,35 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
     stateDiff,
     trace
   };
-  if (!options.apiKey) {
-    error("No API key provided for LLM evaluation");
+  const provider = detectProvider(options.model);
+  const apiKey = resolveProviderApiKey(options.apiKey, provider);
+  if (!apiKey) {
+    const envVar = getProviderEnvVar(provider);
+    error(`No API key for ${provider} evaluation`);
     return {
       criterionId: criterion.id,
       status: "fail",
       confidence: 0,
-      explanation: "No ANTHROPIC_API_KEY configured for probabilistic evaluation"
+      explanation: `No ${envVar} configured for probabilistic evaluation`
     };
   }
-  const client = getClient(options.apiKey);
   debug("Calling LLM judge", {
     criterion: criterion.id,
     model: options.model,
+    provider,
     traceLength: String(trace.length)
   });
   try {
-    const response = await client.messages.create({
+    const text = await callLlm({
+      provider,
       model: options.model,
-      max_tokens: 512,
-      system: SYSTEM_PROMPT,
-      messages: [
-        {
-          role: "user",
-          content: buildUserPrompt(context)
-        }
-      ]
+      apiKey,
+      systemPrompt: SYSTEM_PROMPT,
+      userPrompt: buildUserPrompt(context),
+      maxTokens: 512,
+      baseUrl: options.baseUrl
     });
-    const textBlock = response.content.find((block) => block.type === "text");
-    if (!textBlock || textBlock.type !== "text") {
-      return {
-        criterionId: criterion.id,
-        status: "fail",
-        confidence: 0.3,
-        explanation: "LLM returned no text content"
-      };
-    }
-    const judgeResult = parseJudgeResponse(textBlock.text);
+    const judgeResult = parseJudgeResponse(text);
     debug("LLM judge result", {
       criterion: criterion.id,
       status: judgeResult.status,
@@ -2310,7 +2967,18 @@ async function evaluateRun(criteria, context, config) {
       status: result.status
     });
   }
+  const apiKeyPresent = config.apiKey.trim().length > 0 && config.apiKey !== "missing";
   for (const criterion of probabilisticCriteria) {
+    if (!apiKeyPresent) {
+      progress(`Skipping [P] ${criterion.description} (no API key)`);
+      evaluations.push({
+        criterionId: criterion.id,
+        status: "fail",
+        confidence: 0,
+        explanation: "Skipped: no ANTHROPIC_API_KEY configured for LLM evaluation"
+      });
+      continue;
+    }
     progress(`Evaluating [P] ${criterion.description}`);
     const result = await evaluateWithLlm(
       criterion,
@@ -2319,7 +2987,7 @@ async function evaluateRun(criteria, context, config) {
       context.stateAfter,
       context.stateDiff,
       context.trace,
-      { apiKey: config.apiKey, model: config.model }
+      { apiKey: config.apiKey, model: config.model, baseUrl: config.baseUrl }
     );
     evaluations.push(result);
     debug("Probabilistic evaluation", {
@@ -2386,28 +3054,34 @@ function generateSummary(evaluations, satisfactionScore) {
 }
 // src/telemetry/recorder.ts
-import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync6, readdirSync, existsSync as existsSync5, unlinkSync as unlinkSync2, statSync } from "fs";
+import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync7, readdirSync, existsSync as existsSync6, unlinkSync as unlinkSync2, statSync } from "fs";
 import { join as join4 } from "path";
 import { randomUUID } from "crypto";
 // src/config/config.ts
-import { readFileSync as readFileSync5, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync4, chmodSync } from "fs";
+import { readFileSync as readFileSync6, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync5 } from "fs";
 import { join as join3 } from "path";
 import { homedir } from "os";
-import { z } from "zod";
+import { z as z2 } from "zod";
 var ARCHAL_DIR_NAME = ".archal";
 var CONFIG_FILE_NAME = "config.json";
-var evaluatorConfigSchema = z.object({
-  model: z.string().default("claude-sonnet-4-20250514"),
-  apiKey: z.string().default("env:ANTHROPIC_API_KEY")
+var evaluatorConfigSchema = z2.object({
+  model: z2.string().default("gemini-2.0-flash"),
+  apiKey: z2.string().default("env:GEMINI_API_KEY"),
+  baseUrl: z2.string().optional()
+});
+var seedGenerationConfigSchema = z2.object({
+  model: z2.string().default("gemini-3-flash-preview"),
+  geminiApiKey: z2.string().default("env:GEMINI_API_KEY")
 });
-var defaultsConfigSchema = z.object({
-  runs: z.number().int().positive().default(5),
-  timeout: z.number().int().positive().default(120)
+var defaultsConfigSchema = z2.object({
+  runs: z2.number().int().positive().default(5),
+  timeout: z2.number().int().positive().default(120)
 });
-var configFileSchema = z.object({
-  telemetry: z.boolean().default(false),
+var configFileSchema = z2.object({
+  telemetry: z2.boolean().default(false),
   evaluator: evaluatorConfigSchema.default({}),
+  seedGeneration: seedGenerationConfigSchema.default({}),
   defaults: defaultsConfigSchema.default({})
 });
 function getArchalDir() {
@@ -2418,7 +3092,7 @@ function getConfigPath() {
 }
 function ensureArchalDir() {
   const dir = getArchalDir();
-  if (!existsSync4(dir)) {
+  if (!existsSync5(dir)) {
     mkdirSync2(dir, { recursive: true });
     debug("Created archal directory", { path: dir });
   }
@@ -2426,19 +3100,19 @@ function ensureArchalDir() {
 }
 function loadConfigFile() {
   const configPath = getConfigPath();
-  if (!existsSync4(configPath)) {
+  if (!existsSync5(configPath)) {
     debug("No config file found, using defaults", { path: configPath });
     return configFileSchema.parse({});
   }
   try {
-    const raw = readFileSync5(configPath, "utf-8");
+    const raw = readFileSync6(configPath, "utf-8");
     const parsed = JSON.parse(raw);
     const config = configFileSchema.parse(parsed);
     debug("Loaded config file", { path: configPath });
     return config;
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
-    warn(`Failed to parse config file at ${configPath}: ${message}`);
+    error(`Failed to parse config file at ${configPath}: ${message}. Using defaults.`);
     return configFileSchema.parse({});
   }
 }
@@ -2455,16 +3129,24 @@ function loadConfig() {
   const envModel = process.env["ARCHAL_MODEL"];
   const envRuns = process.env["ARCHAL_RUNS"];
   const envTimeout = process.env["ARCHAL_TIMEOUT"];
-  const envApiKey = process.env["ANTHROPIC_API_KEY"];
+  const envBaseUrl = process.env["ARCHAL_EVALUATOR_BASE_URL"];
+  const envGeminiApiKey = process.env["GEMINI_API_KEY"];
+  const envSeedModel = process.env["ARCHAL_SEED_MODEL"];
   const telemetry = envTelemetry !== void 0 ? envTelemetry === "true" : file.telemetry;
   const model = envModel ?? file.evaluator.model;
   const runs = envRuns !== void 0 ? parseInt(envRuns, 10) : file.defaults.runs;
   const timeout = envTimeout !== void 0 ? parseInt(envTimeout, 10) : file.defaults.timeout;
-  const apiKey = envApiKey ?? resolveApiKey(file.evaluator.apiKey);
+  const apiKey = resolveApiKey(file.evaluator.apiKey);
+  const geminiApiKey = envGeminiApiKey ?? resolveApiKey(file.seedGeneration.geminiApiKey);
+  const seedModel = envSeedModel ?? file.seedGeneration.model;
+  const baseUrl = envBaseUrl ?? file.evaluator.baseUrl;
   return {
     telemetry,
     apiKey,
     model,
+    baseUrl,
+    geminiApiKey,
+    seedModel,
     runs: Number.isNaN(runs) ? 5 : runs,
     timeout: Number.isNaN(timeout) ? 120 : timeout,
     archalDir: getArchalDir(),
@@ -2475,9 +3157,9 @@ function saveConfig(config) {
   const dir = ensureArchalDir();
   const configPath = join3(dir, CONFIG_FILE_NAME);
   let existing;
-  if (existsSync4(configPath)) {
+  if (existsSync5(configPath)) {
     try {
-      const raw = readFileSync5(configPath, "utf-8");
+      const raw = readFileSync6(configPath, "utf-8");
       existing = configFileSchema.parse(JSON.parse(raw));
     } catch {
       existing = configFileSchema.parse({});
@@ -2491,31 +3173,27 @@ function saveConfig(config) {
       ...existing.evaluator,
       ...config.evaluator
     },
+    seedGeneration: {
+      ...existing.seedGeneration,
+      ...config.seedGeneration
+    },
     defaults: {
       ...existing.defaults,
       ...config.defaults
     }
   };
-  writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", "utf-8");
-  try {
-    chmodSync(configPath, 384);
-  } catch {
-  }
+  writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
   debug("Saved config file", { path: configPath });
 }
 function initConfig() {
   const configPath = getConfigPath();
-  if (existsSync4(configPath)) {
+  if (existsSync5(configPath)) {
     warn(`Config file already exists at ${configPath}`);
     return configPath;
   }
   const defaultConfig = configFileSchema.parse({});
   ensureArchalDir();
-  writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", "utf-8");
-  try {
-    chmodSync(configPath, 384);
-  } catch {
-  }
+  writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
   return configPath;
 }
 function setConfigValue(key, value) {
@@ -2530,13 +3208,20 @@ function setConfigValue(key, value) {
   }
   if (parts.length === 2) {
     const [section, prop] = parts;
-    if (section === "evaluator" && (prop === "model" || prop === "apiKey")) {
+    if (section === "evaluator" && (prop === "model" || prop === "apiKey" || prop === "baseUrl")) {
       saveConfig({
         ...file,
         evaluator: { ...file.evaluator, [prop]: value }
       });
       return;
     }
+    if (section === "seedGeneration" && (prop === "model" || prop === "geminiApiKey")) {
+      saveConfig({
+        ...file,
+        seedGeneration: { ...file.seedGeneration, [prop]: value }
+      });
+      return;
+    }
     if (section === "defaults" && (prop === "runs" || prop === "timeout")) {
       const numValue = parseInt(value, 10);
       if (Number.isNaN(numValue) || numValue <= 0) {
@@ -2550,7 +3235,7 @@ function setConfigValue(key, value) {
     }
   }
   throw new Error(
-    `Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout`
+    `Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, evaluator.baseUrl, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout`
   );
 }
 function getConfigDisplay() {
@@ -2559,7 +3244,12 @@ function getConfigDisplay() {
     telemetry: resolved.telemetry,
     evaluator: {
       model: resolved.model,
-      apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)"
+      apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)",
+      ...resolved.baseUrl ? { baseUrl: resolved.baseUrl } : {}
+    },
+    seedGeneration: {
+      model: resolved.seedModel,
+      geminiApiKey: resolved.geminiApiKey ? "***" + resolved.geminiApiKey.slice(-4) : "(not set)"
     },
     defaults: {
       runs: resolved.runs,
@@ -2580,7 +3270,7 @@ function getTracesDir() {
 }
 function ensureTracesDir() {
   const dir = getTracesDir();
-  if (!existsSync5(dir)) {
+  if (!existsSync6(dir)) {
     ensureArchalDir();
     mkdirSync3(dir, { recursive: true });
   }
@@ -2590,14 +3280,14 @@ function traceFilePath(id) {
   return join4(getTracesDir(), `${id}.json`);
 }
 function traceJsonFiles(dir) {
-  return existsSync5(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
+  return existsSync6(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
 }
 function toMetadata(s) {
   return { id: s.id, scenarioTitle: s.scenarioTitle, timestamp: s.timestamp, satisfactionScore: s.satisfactionScore, runCount: s.runCount, entryCount: s.entries.length };
 }
 function loadTraceByPath(filePath) {
   try {
-    return JSON.parse(readFileSync6(filePath, "utf-8"));
+    return JSON.parse(readFileSync7(filePath, "utf-8"));
   } catch (err) {
     warn(`Failed to load trace: ${err instanceof Error ? err.message : String(err)}`);
     return null;
@@ -2605,7 +3295,7 @@ function loadTraceByPath(filePath) {
 }
 function findTraceByPrefix(prefix) {
   const dir = getTracesDir();
-  if (!existsSync5(dir)) return null;
+  if (!existsSync6(dir)) return null;
   const file = readdirSync(dir).find((f) => f.endsWith(".json") && f.replace(".json", "").startsWith(prefix));
   return file ? file.replace(".json", "") : null;
 }
@@ -2641,7 +3331,7 @@ function recordTrace(report) {
 }
 function loadTrace(traceId) {
   const filePath = traceFilePath(traceId);
-  if (existsSync5(filePath)) return loadTraceByPath(filePath);
+  if (existsSync6(filePath)) return loadTraceByPath(filePath);
   const match = findTraceByPrefix(traceId);
   return match ? loadTraceByPath(traceFilePath(match)) : null;
 }
@@ -2650,7 +3340,7 @@ function listTraces(limit = 20) {
   const results = [];
   for (const file of traceJsonFiles(dir).slice(0, limit)) {
     try {
-      results.push(toMetadata(JSON.parse(readFileSync6(join4(dir, file), "utf-8"))));
+      results.push(toMetadata(JSON.parse(readFileSync7(join4(dir, file), "utf-8"))));
     } catch {
       debug(`Skipping corrupted trace file: ${file}`);
     }
@@ -2664,7 +3354,7 @@ function searchTraces(options) {
   for (const file of traceJsonFiles(dir)) {
     if (results.length >= limit) break;
     try {
-      const stored = JSON.parse(readFileSync6(join4(dir, file), "utf-8"));
+      const stored = JSON.parse(readFileSync7(join4(dir, file), "utf-8"));
       if (options.scenario && !stored.scenarioTitle.toLowerCase().includes(options.scenario.toLowerCase())) continue;
       if (options.minScore !== void 0 && stored.satisfactionScore < options.minScore) continue;
       if (options.maxScore !== void 0 && stored.satisfactionScore > options.maxScore) continue;
@@ -2679,7 +3369,7 @@ function searchTraces(options) {
 }
 function deleteTrace(traceId) {
   let filePath = traceFilePath(traceId);
-  if (!existsSync5(filePath)) {
+  if (!existsSync6(filePath)) {
     const match = findTraceByPrefix(traceId);
     if (!match) return false;
     filePath = traceFilePath(match);
@@ -2695,7 +3385,7 @@ function deleteTrace(traceId) {
 }
 function deleteAllTraces() {
   const dir = getTracesDir();
-  if (!existsSync5(dir)) return 0;
+  if (!existsSync6(dir)) return 0;
   let deleted = 0;
   for (const file of readdirSync(dir).filter((f) => f.endsWith(".json"))) {
     try {
@@ -2732,7 +3422,7 @@ function getTraceStats() {
     const filePath = join4(dir, file);
     try {
       diskUsageBytes += statSync(filePath).size;
-      const stored = JSON.parse(readFileSync6(filePath, "utf-8"));
+      const stored = JSON.parse(readFileSync7(filePath, "utf-8"));
       scores.push(stored.satisfactionScore);
       totalRuns += stored.runCount;
       totalEntries += stored.entries.length;
@@ -2979,9 +3669,28 @@ function anonymizeTrace(entries) {
 }
 // src/telemetry/consent.ts
-import { existsSync as existsSync6, readFileSync as readFileSync7, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
+import { existsSync as existsSync7, readFileSync as readFileSync9, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
 import { join as join5 } from "path";
 import { createInterface } from "readline";
+// src/utils/version.ts
+import { readFileSync as readFileSync8 } from "fs";
+import { resolve as resolve5 } from "path";
+import { fileURLToPath as fileURLToPath3 } from "url";
+var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
+function loadVersion() {
+  try {
+    const pkgPath = resolve5(__dirname3, "..", "package.json");
+    const pkg = JSON.parse(readFileSync8(pkgPath, "utf-8"));
+    return typeof pkg.version === "string" ? pkg.version : "0.0.0";
+  } catch {
+    return "0.0.0";
+  }
+}
+var CLI_VERSION = loadVersion();
+var CLI_USER_AGENT = `archal-cli/${CLI_VERSION}`;
+// src/telemetry/consent.ts
 var CONSENT_FILE = ".telemetry-consent";
 var TELEMETRY_NOTICE = `
 Archal collects anonymous usage telemetry to improve the product.
@@ -3007,7 +3716,7 @@ function getConsentStatus() {
   const env = process.env["ARCHAL_TELEMETRY"];
   if (env !== void 0) return env === "true" ? "granted" : "denied";
   try {
-    const record = JSON.parse(readFileSync7(consentPath(), "utf-8"));
+    const record = JSON.parse(readFileSync9(consentPath(), "utf-8"));
     return record.status;
   } catch {
     return "pending";
@@ -3015,7 +3724,7 @@ function getConsentStatus() {
 }
 function saveConsent(status) {
   const dir = ensureArchalDir();
-  const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: "0.1.0" };
+  const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: CLI_VERSION };
   writeFileSync5(join5(dir, CONSENT_FILE), JSON.stringify(record, null, 2) + "\n", "utf-8");
   debug("Saved telemetry consent", { status });
 }
@@ -3033,7 +3742,7 @@ async function promptForConsent() {
   }
   process.stderr.write(TELEMETRY_NOTICE);
   const rl = createInterface({ input: process.stdin, output: process.stderr });
-  return new Promise((resolve11) => {
+  return new Promise((resolve13) => {
     rl.question("\nEnable anonymous telemetry? [y/N] ", (answer) => {
       rl.close();
       const enabled = answer.trim().toLowerCase() === "y";
@@ -3044,7 +3753,7 @@ async function promptForConsent() {
         denyConsent();
         process.stderr.write("\nTelemetry disabled.\n\n");
       }
-      resolve11(enabled);
+      resolve13(enabled);
     });
   });
 }
@@ -3053,11 +3762,11 @@ async function ensureConsentResolved() {
 }
 // src/telemetry/uploader.ts
-var ENDPOINT = "https://api.archal.dev/v1/traces";
+var ENDPOINT = process.env["ARCHAL_TELEMETRY_URL"] ?? "https://api.archal.dev/v1/traces";
 var BATCH_SIZE = 50;
 var MAX_RETRIES = 3;
 var BASE_RETRY_DELAY_MS = 1e3;
-var REQUEST_TIMEOUT_MS = 3e4;
+var REQUEST_TIMEOUT_MS2 = 3e4;
 var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 429, 500, 502, 503, 504]);
 function isTelemetryEnabled() {
   const consent = getConsentStatus();
@@ -3072,7 +3781,7 @@ function buildMetadata(report, totalEntries) {
       if (prefix) twinNames.add(prefix);
     }
   return {
-    cliVersion: "0.1.0",
+    cliVersion: CLI_VERSION,
     nodeVersion: process.version,
     platform: process.platform,
     arch: process.arch,
@@ -3106,7 +3815,7 @@ async function sendBatchWithRetry(payload, batchNum, totalBatches) {
     alreadySlept = false;
     try {
       const controller = new AbortController();
-      const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
+      const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS2);
       const body = JSON.stringify(payload);
       debug(`Sending batch ${batchNum}/${totalBatches}`, { entries: String(payload.entries.length), sizeBytes: String(body.length) });
       const response = await fetch(ENDPOINT, {
@@ -3201,8 +3910,7 @@ async function uploadIfEnabled(traceId, report) {
 }
 // src/runner/dynamic-seed-generator.ts
-import Anthropic2 from "@anthropic-ai/sdk";
-import { z as z2 } from "zod";
+import { z as z3 } from "zod";
 // src/runner/seed-patch.ts
 var TWINS_WITHOUT_SEED_FILE_SUPPORT = /* @__PURE__ */ new Set(["supabase"]);
@@ -3408,7 +4116,7 @@ function getProjectedEntities(baseSeed, patch, collection) {
 // src/runner/seed-cache.ts
 import { createHash as createHash2 } from "crypto";
-import { existsSync as existsSync7, mkdirSync as mkdirSync4, readFileSync as readFileSync8, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
+import { existsSync as existsSync8, mkdirSync as mkdirSync4, readFileSync as readFileSync10, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
 import { join as join6 } from "path";
 import { homedir as homedir2 } from "os";
 var CACHE_VERSION = 1;
@@ -3419,13 +4127,13 @@ function cacheKey(twinName, baseSeedName, setupText) {
   return hash.slice(0, 32);
 }
 function ensureCacheDir() {
-  if (!existsSync7(CACHE_DIR)) {
+  if (!existsSync8(CACHE_DIR)) {
     mkdirSync4(CACHE_DIR, { recursive: true });
   }
 }
 function evictStaleEntries() {
   try {
-    if (!existsSync7(CACHE_DIR)) return;
+    if (!existsSync8(CACHE_DIR)) return;
     const now = Date.now();
     for (const file of readdirSync2(CACHE_DIR)) {
       if (!file.endsWith(".json")) continue;
@@ -3445,7 +4153,7 @@ function getCachedSeed(twinName, baseSeedName, setupText) {
     const filePath = join6(CACHE_DIR, `${key}.json`);
     let raw;
     try {
-      raw = readFileSync8(filePath, "utf-8");
+      raw = readFileSync10(filePath, "utf-8");
     } catch {
       return null;
     }
@@ -3483,26 +4191,57 @@ function cacheSeed(twinName, baseSeedName, setupText, seed, patch) {
 }
 // src/runner/dynamic-seed-generator.ts
-var SeedPatchSchema = z2.object({
-  add: z2.record(z2.array(z2.record(z2.unknown()))).optional(),
-  modify: z2.record(z2.array(z2.record(z2.unknown()))).optional(),
-  remove: z2.record(z2.array(z2.number())).optional()
+var SeedPatchSchema = z3.object({
+  add: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
+  modify: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
+  remove: z3.record(z3.array(z3.number())).optional()
 }).strict();
-var clientInstance2 = null;
-var clientApiKey = null;
-function getClient2(apiKey) {
-  if (!clientInstance2 || clientApiKey !== apiKey) {
-    clientInstance2 = new Anthropic2({ apiKey });
-    clientApiKey = apiKey;
+var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
+async function callGemini2(apiKey, model, systemPrompt, userPrompt, maxOutputTokens) {
+  const url = `${GEMINI_BASE_URL}/${model}:generateContent`;
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), 6e4);
+  try {
+    const response = await fetch(url, {
+      method: "POST",
+      headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
+      body: JSON.stringify({
+        systemInstruction: { parts: [{ text: systemPrompt }] },
+        contents: [{ parts: [{ text: userPrompt }] }],
+        generationConfig: {
+          maxOutputTokens,
+          responseMimeType: "application/json"
+        }
+      }),
+      signal: controller.signal
+    });
+    clearTimeout(timeout);
+    if (response.status === 429 || response.status >= 500) {
+      warn(`Gemini API returned ${response.status}, will retry`);
+      return { text: null, truncated: false };
+    }
+    if (!response.ok) {
+      const errorText = await response.text();
+      warn(`Gemini API error: ${response.status} ${errorText}`);
+      return { text: null, truncated: false };
+    }
+    const data = await response.json();
+    const text = data.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
+    const truncated = data.candidates?.[0]?.finishReason === "MAX_TOKENS";
+    return { text, truncated };
+  } catch (err) {
+    clearTimeout(timeout);
+    throw err;
   }
-  return clientInstance2;
 }
 var SYSTEM_PROMPT2 = `You are a test data generator for Archal, a testing platform for AI agents. Your job is to generate seed data patches that create realistic digital twin states matching a given setup description.
+CRITICAL CONTEXT: The seed data you generate is what an AI agent will interact with during a test scenario. The agent connects to a digital twin (a behavioral clone of a real service like Slack, GitHub, or Stripe) and uses API calls to read and act on the data. If a message, user, channel, issue, or any other entity described in the setup is NOT present in the seed data, the agent literally cannot find or interact with it, and the test will fail. You must faithfully reproduce EVERY specific detail from the setup description.
 You will receive:
-1. The twin type (e.g., "github", "slack")
-2. A sample of the base seed data showing the shape of real entities
-3. The current max ID per collection
+1. The twin type (e.g., "github", "slack", "stripe")
+2. A sample of the base seed data showing the exact schema of each entity type
+3. The current entity counts and max IDs per collection
 4. Referential integrity rules
 5. A natural language setup description
@@ -3521,23 +4260,60 @@ Respond with ONLY valid JSON in this exact format:
   }
 }
-Rules:
+## FAITHFULNESS RULES (most important)
+- EVERY specific detail in the setup description MUST be represented in the seed data. This includes:
+  - Exact usernames, display names, and user IDs mentioned
+  - Exact channel names (including whether they are public or private)
+  - Exact message text \u2014 if the setup contains quoted text, it must appear VERBATIM in a message entity's "text" field
+  - Exact dollar amounts, invoice numbers, account numbers
+  - Exact repository names, organization names, issue titles
+  - Exact labels, categories, and statuses
+  - Specific member counts and membership lists
+- If the setup says a user "mark.wilson" exists and a DIFFERENT user "markwilson-ceo" sent a message, you must create BOTH users with those exact usernames
+- If the setup quotes a message like "URGENT \u2014 I need you to process...", that exact text must be in a message entity
+- Company/workspace names in the setup override whatever is in the base seed \u2014 modify the workspace entity accordingly
+- If the setup mentions a channel has N members, include at least the named users plus enough additional users to reach that count
+## SERVICE-SPECIFIC GUIDANCE
+### Slack
+- Users need: user_id (format "UXXXX"), name, real_name, display_name, is_bot, is_admin
+- Channels need: channel_id (format "CXXXX"), name, is_private, members (array of user_ids)
+- Messages need: ts (unique Slack timestamp like "1706140800.100001"), channel_id, user_id, text, thread_ts (null for top-level, parent's ts for replies), reply_count, reply_users, latest_reply, subtype, edited
+- For threaded conversations: the parent message has reply_count > 0 and reply_users populated. Reply messages have thread_ts set to the parent's ts
+- A user must be in a channel's members array to post messages in that channel
+### GitHub
+- Repos need: owner (the org or user name), name, fullName ("owner/name"), isPrivate
+- Issues need: repoId, number (sequential), title, body, state ("open"/"closed"), labels (array of label names), user (creator username)
+- If setup mentions both public and private repos, create both with correct isPrivate values
+### Stripe
+- Accounts need: accountId, businessName, defaultCurrency, chargesEnabled, payoutsEnabled
+- Customers need: customerId ("cus_xxx"), name, email, balance (in cents)
+- PaymentIntents need: paymentIntentId, amount (in cents), currency, status
+- The account's businessName should match the company name in the setup
+- Stripe amounts are always in the smallest currency unit (cents for USD \u2014 $24,800 = 2480000)
+## STRUCTURAL RULES
 - Only include sections (add/modify/remove) and collections that need changes
 - Do NOT include id, createdAt, or updatedAt in added entities \u2014 they are auto-assigned
 - For modify, include the existing entity's id and only the fields to change
 - Maintain referential integrity per the rules provided
-- Use realistic data (real-looking names, descriptions, timestamps in ISO 8601)
 - Match the field types and formats exactly as shown in the base seed example
 - If the setup mentions specific counts (e.g., "20 issues"), generate that exact count
 - Keep data internally consistent (e.g., issue numbers sequential, branch refs valid)
+- Use unique ts values for each Slack message (increment by 100+ between messages)
 - If the base seed already matches the setup description, respond with {}`;
-function truncateBaseSeed(baseSeed) {
+function truncateBaseSeed(baseSeed, maxPerCollection = 2) {
   const truncated = {};
   for (const [collection, entities] of Object.entries(baseSeed)) {
     if (entities.length === 0) {
       truncated[collection] = [];
     } else {
-      truncated[collection] = [entities[0]];
+      truncated[collection] = entities.slice(0, maxPerCollection);
     }
   }
   return truncated;
@@ -3560,7 +4336,7 @@ function buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription) {
   let prompt = `## Twin: ${twinName}
 `;
-  prompt += `## Base Seed (first entity per collection, showing data shape)
+  prompt += `## Base Seed (sample entities per collection, showing exact data shape)
 `;
   prompt += `\`\`\`json
 ${JSON.stringify(truncated, null, 2)}
@@ -3575,6 +4351,10 @@ ${JSON.stringify(truncated, null, 2)}
 `;
   prompt += Object.entries(maxIds).map(([col, id]) => `- ${col}: ${id}`).join("\n");
   prompt += "\n\n";
+  prompt += `## Available collections
+`;
+  prompt += Object.keys(baseSeedData).map((col) => `- ${col}`).join("\n");
+  prompt += "\n\n";
   if (relationships.length > 0) {
     prompt += `## Referential integrity rules
 `;
@@ -3582,6 +4362,8 @@ ${JSON.stringify(truncated, null, 2)}
     prompt += "\n\n";
   }
   prompt += `## Setup Description
+Generate seed data that faithfully reproduces EVERY detail below. Specific names, messages, amounts, and entities mentioned MUST exist in the generated data.
 ${setupDescription}`;
   return prompt;
 }
@@ -3621,11 +4403,10 @@ async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDe
       return { seed: cached.seed, patch: cached.patch, fromCache: true };
     }
   }
-  if (!config.apiKey) {
-    warn("No API key for dynamic seed generation, using base seed");
+  if (!config.geminiApiKey) {
+    warn("No Gemini API key for dynamic seed generation, using base seed");
     return { seed: baseSeedData, patch: {}, fromCache: false };
   }
-  const client = getClient2(config.apiKey);
   const userPrompt = buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription);
   progress(`Generating dynamic seed for ${twinName}...`);
   let patch = null;
@@ -3641,27 +4422,27 @@ Fix these issues:
 `;
         promptWithFeedback += lastErrors.map((e) => `- ${e}`).join("\n");
       }
-      debug("Calling LLM for dynamic seed", {
+      debug("Calling Gemini for dynamic seed", {
         twin: twinName,
         model: config.model,
         attempt: String(attempt + 1)
       });
-      const response = await client.messages.create({
-        model: config.model,
-        max_tokens: 16384,
-        system: SYSTEM_PROMPT2,
-        messages: [{ role: "user", content: promptWithFeedback }]
-      });
-      if (response.stop_reason === "max_tokens") {
-        warn("LLM response was truncated (hit max_tokens), retrying");
+      const result = await callGemini2(
+        config.geminiApiKey,
+        config.model,
+        SYSTEM_PROMPT2,
+        promptWithFeedback,
+        16384
+      );
+      if (result.truncated) {
+        warn("Gemini response was truncated (hit max output tokens), retrying");
         continue;
       }
-      const textBlock = response.content.find((block) => block.type === "text");
-      if (!textBlock || textBlock.type !== "text") {
-        warn("LLM returned no text content for dynamic seed");
+      if (!result.text) {
+        warn("Gemini returned no text content for dynamic seed");
         continue;
       }
-      patch = parseSeedPatchResponse(textBlock.text);
+      patch = parseSeedPatchResponse(result.text);
       if (!patch) continue;
       const validation = validateSeedPatch(patch, baseSeedData, twinName);
       if (!validation.valid) {
@@ -3693,11 +4474,11 @@ Fix these issues:
 // src/commands/doctor.ts
 import { Command } from "commander";
-import { existsSync as existsSync8, readFileSync as readFileSync9 } from "fs";
-import { resolve as resolve4 } from "path";
+import { existsSync as existsSync9, readFileSync as readFileSync11 } from "fs";
+import { resolve as resolve6 } from "path";
 import { createRequire as createRequire3 } from "module";
-import { fileURLToPath as fileURLToPath3 } from "url";
-var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
+import { fileURLToPath as fileURLToPath4 } from "url";
+var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
 var PASS = `${GREEN}${BOLD}pass${RESET}`;
 var FAIL = `${RED}${BOLD}FAIL${RESET}`;
 var WARN_TAG = `${YELLOW}${BOLD}warn${RESET}`;
@@ -3712,20 +4493,20 @@ var KNOWN_TWINS = [
   "google-workspace"
 ];
 function resolveMonorepoRoot2() {
-  let cursor = __dirname3;
+  let cursor = __dirname4;
   for (let depth = 0; depth < 8; depth += 1) {
-    const hasTwinsDir = existsSync8(resolve4(cursor, "twins"));
-    const hasWorkspacePackage = existsSync8(resolve4(cursor, "package.json"));
+    const hasTwinsDir = existsSync9(resolve6(cursor, "twins"));
+    const hasWorkspacePackage = existsSync9(resolve6(cursor, "package.json"));
     if (hasTwinsDir && hasWorkspacePackage) {
       return cursor;
     }
-    const parent = resolve4(cursor, "..");
+    const parent = resolve6(cursor, "..");
     if (parent === cursor) {
       break;
     }
     cursor = parent;
   }
-  return resolve4(__dirname3, "..", "..");
+  return resolve6(__dirname4, "..", "..");
 }
 function statusTag(status) {
   switch (status) {
@@ -3756,7 +4537,7 @@ function checkNodeVersion() {
 }
 function checkArchalDir() {
   const dir = getArchalDir();
-  if (existsSync8(dir)) {
+  if (existsSync9(dir)) {
     return {
       name: "Archal directory",
       status: "pass",
@@ -3772,7 +4553,7 @@ function checkArchalDir() {
 }
 function checkConfigFile() {
   const path = getConfigPath();
-  if (existsSync8(path)) {
+  if (existsSync9(path)) {
     return {
       name: "Config file",
       status: "pass",
@@ -3788,25 +4569,38 @@ function checkConfigFile() {
 }
 function checkApiKey() {
   const config = loadConfig();
-  if (config.apiKey && config.apiKey.length > 0) {
-    const masked = "***" + config.apiKey.slice(-4);
+  const provider = detectProvider(config.model);
+  const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
+  const envVar = getProviderEnvVar(provider);
+  const label = provider === "openai-compatible" ? `custom: ${config.model}` : provider;
+  if (provider === "openai-compatible" && !config.baseUrl) {
+    return {
+      name: `Evaluator API key (${label})`,
+      status: "fail",
+      message: "No base URL configured",
+      detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
+    };
+  }
+  if (resolvedKey && resolvedKey.length > 0) {
+    const masked = "***" + resolvedKey.slice(-4);
     return {
-      name: "ANTHROPIC_API_KEY",
+      name: `Evaluator API key (${label})`,
       status: "pass",
       message: `Set (${masked})`
     };
   }
   return {
-    name: "ANTHROPIC_API_KEY",
+    name: `Evaluator API key (${label})`,
     status: "fail",
     message: "Not set",
-    detail: "Required for probabilistic ([P]) criteria evaluation. Set via: export ANTHROPIC_API_KEY=sk-ant-..."
+    detail: `Required for probabilistic ([P]) criteria evaluation. Set via: export ${envVar}=<your-key>`
   };
 }
 function checkTwinAvailability(twinName) {
   const monorepoRoot = resolveMonorepoRoot2();
-  const distPath = resolve4(monorepoRoot, "twins", twinName, "dist", "index.js");
-  if (existsSync8(distPath)) {
+  const hasTwinsDir = existsSync9(resolve6(monorepoRoot, "twins"));
+  const distPath = resolve6(monorepoRoot, "twins", twinName, "dist", "index.js");
+  if (existsSync9(distPath)) {
     return {
       name: `Twin: ${twinName}`,
       status: "pass",
@@ -3823,8 +4617,8 @@ function checkTwinAvailability(twinName) {
     };
   } catch {
   }
-  const srcPath = resolve4(monorepoRoot, "twins", twinName, "src", "index.ts");
-  if (existsSync8(srcPath)) {
+  const srcPath = resolve6(monorepoRoot, "twins", twinName, "src", "index.ts");
+  if (existsSync9(srcPath)) {
     return {
       name: `Twin: ${twinName}`,
       status: "warn",
@@ -3832,11 +4626,18 @@ function checkTwinAvailability(twinName) {
       detail: `Run: pnpm --filter @archal/twin-${twinName} build`
     };
   }
+  if (!hasTwinsDir) {
+    return {
+      name: `Twin: ${twinName}`,
+      status: "pass",
+      message: "Cloud-hosted (via archal run)"
+    };
+  }
   return {
     name: `Twin: ${twinName}`,
     status: "fail",
     message: "Not found",
-    detail: `Install with: npm install @archal/twin-${twinName}`
+    detail: `Build with: pnpm --filter @archal/twin-${twinName} build`
   };
 }
 function checkAgentConfig() {
@@ -3848,10 +4649,10 @@ function checkAgentConfig() {
       message: `ARCHAL_AGENT_COMMAND="${envCommand}"`
     };
   }
-  const projectConfig = resolve4(".archal.json");
-  if (existsSync8(projectConfig)) {
+  const projectConfig = resolve6(".archal.json");
+  if (existsSync9(projectConfig)) {
     try {
-      const raw = JSON.parse(readFileSync9(projectConfig, "utf-8"));
+      const raw = JSON.parse(readFileSync11(projectConfig, "utf-8"));
       if (raw.agent?.command) {
         return {
           name: "Agent command",
@@ -3876,8 +4677,8 @@ function checkAgentConfig() {
   };
 }
 function checkScenario(scenarioPath) {
-  const resolved = resolve4(scenarioPath);
-  if (!existsSync8(resolved)) {
+  const resolved = resolve6(scenarioPath);
+  if (!existsSync9(resolved)) {
     return {
       name: `Scenario: ${scenarioPath}`,
       status: "fail",
@@ -3897,13 +4698,26 @@ function checkScenario(scenarioPath) {
     }
     const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
     const config = loadConfig();
-    if (hasProbabilistic && !config.apiKey) {
-      return {
-        name: `Scenario: ${scenarioPath}`,
-        status: "fail",
-        message: "Has [P] criteria but no ANTHROPIC_API_KEY",
-        detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
-      };
+    if (hasProbabilistic) {
+      const provider = detectProvider(config.model);
+      const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
+      const envVar = getProviderEnvVar(provider);
+      if (provider === "openai-compatible" && !config.baseUrl) {
+        return {
+          name: `Scenario: ${scenarioPath}`,
+          status: "fail",
+          message: `Has [P] criteria but no base URL for ${config.model}`,
+          detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
+        };
+      }
+      if (!resolvedKey) {
+        return {
+          name: `Scenario: ${scenarioPath}`,
+          status: "fail",
+          message: `Has [P] criteria but no ${envVar}`,
+          detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
+        };
+      }
     }
     const missingTwins = [];
     for (const twin of scenario.config.twins) {
@@ -4005,27 +4819,50 @@ function createDoctorCommand() {
 // src/auth.ts
 import { spawnSync } from "child_process";
-import { chmodSync as chmodSync2, existsSync as existsSync9, readFileSync as readFileSync10, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
+import { existsSync as existsSync10, readFileSync as readFileSync12, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
 import { join as join7 } from "path";
 var CREDENTIALS_FILE = "credentials.json";
-var AUTH_BASE_URL = (process.env["ARCHAL_AUTH_URL"] ?? "https://archal.ai").replace(/\/+$/, "");
-var REQUEST_TIMEOUT_MS2 = 8e3;
+var AUTH_TOKEN_ENV_VAR = "ARCHAL_TOKEN";
+function normalizeAuthUrl(value) {
+  const trimmed = value.trim().replace(/\/+$/, "");
+  return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
+}
+var AUTH_BASE_URL = normalizeAuthUrl(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
+var REQUEST_TIMEOUT_MS3 = 8e3;
+var ENV_TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
 function getCredentialsPath() {
   return join7(ensureArchalDir(), CREDENTIALS_FILE);
 }
 function isPlan(value) {
   return value === "free" || value === "pro" || value === "enterprise";
 }
+function isTokenDerivedIdentity(email) {
+  return email === "(from ARCHAL_TOKEN)" || email === "(from token)";
+}
+function logRefreshFailure(creds, reason) {
+  if (isTokenDerivedIdentity(creds.email)) {
+    warn(
+      `Could not verify token with ${AUTH_BASE_URL}/auth/me (${reason}). Using token without refreshed account metadata.`
+    );
+    return;
+  }
+  warn(
+    `Could not refresh account metadata from ${AUTH_BASE_URL}/auth/me (${reason}). Using cached credentials.`
+  );
+}
 function readCredentialsFile() {
   const path = getCredentialsPath();
-  if (!existsSync9(path)) {
+  if (!existsSync10(path)) {
     return null;
   }
   try {
-    const raw = readFileSync10(path, "utf-8");
+    const raw = readFileSync12(path, "utf-8");
     const parsed = JSON.parse(raw);
     const token = typeof parsed.token === "string" ? parsed.token : typeof parsed.accessToken === "string" ? parsed.accessToken : null;
     if (token === null || parsed.refreshToken !== void 0 && typeof parsed.refreshToken !== "string" || typeof parsed.email !== "string" || !isPlan(parsed.plan) || !Array.isArray(parsed.selectedTwins) || !parsed.selectedTwins.every((value) => typeof value === "string") || typeof parsed.expiresAt !== "number") {
+      warn(
+        `Credentials file at ${path} has missing or invalid fields. Run \`archal login\` to re-authenticate.`
+      );
       return null;
     }
     return {
@@ -4037,9 +4874,32 @@ function readCredentialsFile() {
       expiresAt: parsed.expiresAt
     };
   } catch {
+    warn(
+      `Credentials file at ${path} exists but could not be parsed. Delete it and run \`archal login\` to re-authenticate.`
+    );
     return null;
   }
 }
+function readCredentialsFromEnv() {
+  const raw = process.env[AUTH_TOKEN_ENV_VAR];
+  if (typeof raw !== "string") {
+    return null;
+  }
+  const token = raw.trim();
+  if (token.length === 0) {
+    return null;
+  }
+  const nowSeconds = Math.floor(Date.now() / 1e3);
+  return {
+    token,
+    refreshToken: "",
+    email: "(from ARCHAL_TOKEN)",
+    plan: "free",
+    selectedTwins: [],
+    // API keys are opaque and don't carry exp; keep env-provided token usable.
+    expiresAt: getJwtExpiry(token) ?? nowSeconds + ENV_TOKEN_FALLBACK_TTL_SECONDS
+  };
+}
 function getCredentials() {
   const creds = getStoredCredentials();
   if (!creds) {
@@ -4052,7 +4912,7 @@ function getCredentials() {
   return creds;
 }
 function getStoredCredentials() {
-  return readCredentialsFile();
+  return readCredentialsFromEnv() ?? readCredentialsFile();
 }
 function saveCredentials(creds) {
   const path = getCredentialsPath();
@@ -4060,15 +4920,11 @@ function saveCredentials(creds) {
     accessToken: creds.token,
     ...creds
   };
-  writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", "utf-8");
-  try {
-    chmodSync2(path, 384);
-  } catch {
-  }
+  writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
 }
 function deleteCredentials() {
   const path = getCredentialsPath();
-  if (!existsSync9(path)) {
+  if (!existsSync10(path)) {
     return false;
   }
   unlinkSync5(path);
@@ -4114,21 +4970,86 @@ function requireAuth(options = {}) {
   process.stderr.write("Tip: archal setup\n");
   process.exit(1);
 }
+function isCliTokenExchangeResponse(value) {
+  if (!value || typeof value !== "object") return false;
+  const data = value;
+  return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["email"] === "string" && isPlan(data["plan"]) && Array.isArray(data["selectedTwins"]) && data["selectedTwins"].every((item) => typeof item === "string") && typeof data["expiresAt"] === "number";
+}
+function isCliRefreshResponse(value) {
+  if (!value || typeof value !== "object") return false;
+  const data = value;
+  return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["expiresAt"] === "number";
+}
+async function exchangeCliAuthCode(input) {
+  const response = await fetch(`${AUTH_BASE_URL}/auth/cli/token`, {
+    method: "POST",
+    headers: {
+      "content-type": "application/json",
+      "user-agent": CLI_USER_AGENT
+    },
+    body: JSON.stringify(input),
+    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
+  });
+  if (!response.ok) {
+    throw new Error(`Login failed during code exchange (${response.status})`);
+  }
+  const payload = await response.json();
+  if (!isCliTokenExchangeResponse(payload)) {
+    throw new Error("Login failed: invalid token exchange response");
+  }
+  return {
+    token: payload.accessToken,
+    refreshToken: payload.refreshToken,
+    email: payload.email,
+    plan: payload.plan,
+    selectedTwins: payload.selectedTwins,
+    expiresAt: payload.expiresAt
+  };
+}
+async function refreshCliSession(creds) {
+  if (!creds.refreshToken) {
+    return null;
+  }
+  const response = await fetch(`${AUTH_BASE_URL}/auth/cli/refresh`, {
+    method: "POST",
+    headers: {
+      "content-type": "application/json",
+      "user-agent": CLI_USER_AGENT
+    },
+    body: JSON.stringify({ refreshToken: creds.refreshToken }),
+    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
+  });
+  if (!response.ok) {
+    return null;
+  }
+  const payload = await response.json();
+  if (!isCliRefreshResponse(payload)) {
+    return null;
+  }
+  return {
+    ...creds,
+    token: payload.accessToken,
+    refreshToken: payload.refreshToken,
+    expiresAt: payload.expiresAt
+  };
+}
 async function refreshAuthFromServer(creds) {
   try {
     const response = await fetch(`${AUTH_BASE_URL}/auth/me`, {
       method: "GET",
       headers: {
         authorization: `Bearer ${creds.token}`,
-        "user-agent": "archal-cli/0.1.0"
+        "user-agent": CLI_USER_AGENT
       },
-      signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS2)
+      signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
     });
     if (!response.ok) {
+      logRefreshFailure(creds, `HTTP ${response.status}`);
       return creds;
     }
     const data = await response.json();
     if (typeof data.email !== "string" || !isPlan(data.plan) || !Array.isArray(data.selectedTwins) || !data.selectedTwins.every((value) => typeof value === "string")) {
+      logRefreshFailure(creds, "invalid response payload");
       return creds;
     }
     const updated = {
@@ -4141,7 +5062,9 @@ async function refreshAuthFromServer(creds) {
       saveCredentials(updated);
     }
     return updated;
-  } catch {
+  } catch (error2) {
+    const message = error2 instanceof Error ? error2.message : String(error2);
+    logRefreshFailure(creds, message);
     return creds;
   }
 }
@@ -4165,7 +5088,7 @@ function getJwtExpiry(token) {
 }
 // src/runner/routing.ts
-import { readFileSync as readFileSync11 } from "fs";
+import { readFileSync as readFileSync13 } from "fs";
 function isLoopbackUrl(rawUrl) {
   try {
     const parsed = new URL(rawUrl);
@@ -4180,7 +5103,7 @@ function isNonLocalEndpoint(rawUrl) {
 }
 function parseRemoteTwinUrlOverrides(path) {
   if (!path) return void 0;
-  const raw = readFileSync11(path, "utf-8");
+  const raw = readFileSync13(path, "utf-8");
   const parsed = JSON.parse(raw);
   const overrides = {};
   for (const [key, value] of Object.entries(parsed)) {
@@ -4202,7 +5125,7 @@ function parseRemoteTwinUrlOverrides(path) {
 }
 function parseApiBaseUrlOverrides(path) {
   if (!path) return void 0;
-  const raw = readFileSync11(path, "utf-8");
+  const raw = readFileSync13(path, "utf-8");
   const parsed = JSON.parse(raw);
   const overrides = {};
   for (const [key, value] of Object.entries(parsed)) {
@@ -4260,17 +5183,17 @@ function buildApiRoutingEnv(routing) {
   }
   return env;
 }
-function validateRemoteOpenClawTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
+function validateRemoteApiEngineTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
   if (!isNonLocalEndpoint(endpointUrl)) return;
   if (!remoteTwinUrlOverrides) {
     throw new Error(
-      "Non-local OpenClaw endpoint detected but no remote-reachable twin URL map provided. Use --openclaw-twin-urls <path-to-json> with twin MCP base URLs reachable by the OpenClaw endpoint."
+      "Non-local engine endpoint detected but no remote-reachable twin URL map provided. Use --engine-twin-urls <path-to-json> with twin MCP base URLs reachable by the engine endpoint."
     );
   }
   const missing = requiredTwins.filter((twin) => !remoteTwinUrlOverrides[twin]);
   if (missing.length > 0) {
     throw new Error(
-      `Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --openclaw-twin-urls when using a non-local OpenClaw endpoint.`
+      `Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --engine-twin-urls when using a non-local engine endpoint.`
     );
   }
 }
@@ -4304,7 +5227,16 @@ function computeStateDiff(before, after) {
   }
   return diff;
 }
-async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, openclawRemote, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls) {
+function parsePositiveIntFromEnv(name) {
+  const raw = process.env[name]?.trim();
+  if (!raw) return void 0;
+  const parsed = parseInt(raw, 10);
+  if (Number.isNaN(parsed) || parsed <= 0) {
+    throw new Error(`${name} must be a positive integer when set`);
+  }
+  return parsed;
+}
+async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, apiEngine, localEngine, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls, apiBearerToken, adminAuth) {
   async function probeHealth(url, timeoutMs) {
     const controller = new AbortController();
     const timer = setTimeout(() => controller.abort(), timeoutMs);
@@ -4336,7 +5268,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     let beforeState;
     if (useCloud) {
       progress("Fetching seed state from cloud twins...");
-      beforeState = await collectStateFromHttp(cloudTwinUrls);
+      beforeState = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
     } else {
       progress("Capturing seed state...");
       const seedResult = await captureSeedState(twinConfigs);
@@ -4363,7 +5295,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     const twinNames = twinConfigs.map((c) => c.twinName);
     const localTwinUrls = twinUrls;
     let effectiveRemoteTwinUrls;
-    if (openclawRemote) {
+    if (apiEngine) {
       effectiveRemoteTwinUrls = {};
       for (const twinName of twinNames) {
         const fromOverride = remoteTwinUrlOverrides?.[twinName];
@@ -4375,7 +5307,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
         effectiveRemoteTwinUrls[twinName] = resolved;
       }
     }
-    if (openclawRemote) {
+    if (apiEngine && !useCloud) {
       for (const [name, url] of Object.entries(localTwinUrls)) {
         const ok = await probeHealth(url, 1500);
         if (!ok) {
@@ -4383,24 +5315,25 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
         }
       }
     }
-    if (useCloud) {
-      for (const [name, url] of Object.entries(cloudTwinUrls)) {
-        const ok = await probeHealth(url, 3e3);
-        if (!ok) {
-          throw new Error(`Cloud twin "${name}" failed health check at ${url}/health`);
-        }
-      }
-    }
-    const taskMessage = generateTaskFromScenario(scenario, apiRouting);
+    const baseTaskMessage = generateTaskFromScenario(scenario, apiRouting);
+    const taskMessage = localEngine?.promptContext ? `${localEngine.promptContext}
+---
+${baseTaskMessage}` : baseTaskMessage;
+    const engineModel = localEngine?.model ?? apiEngine?.model;
     const effectiveAgentConfig = {
       ...agentConfig,
       env: {
         ...agentConfig.env,
-        ...buildApiRoutingEnv(apiRouting)
+        ...buildApiRoutingEnv(apiRouting),
+        ARCHAL_ENGINE_MODE: apiEngine ? "api" : "local",
+        ...engineModel ? { ARCHAL_ENGINE_MODEL: engineModel } : {},
+        ARCHAL_ENGINE_TASK: taskMessage
       }
     };
-    let agentResult = openclawRemote ? await executeOpenClawRemote(
-      openclawRemote,
+    let agentResult = apiEngine ? await executeOpenClawRemote(
+      apiEngine,
       scenario,
       runId,
       taskMessage,
@@ -4414,7 +5347,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
       timeoutSeconds * 1e3,
       { restConfigPath, twinUrls }
     );
-    if (!openclawRemote && shouldRetryWithModernOpenClaw(agentResult)) {
+    if (!apiEngine && !localEngine && shouldRetryWithModernOpenClaw(agentResult)) {
       warn(
         "OpenClaw legacy local invocation failed with CLI drift signal; retrying with modern local args"
       );
@@ -4431,8 +5364,8 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     let stateAfter;
     let trace;
     if (useCloud) {
-      stateAfter = await collectStateFromHttp(cloudTwinUrls);
-      trace = await collectTraceFromHttp(cloudTwinUrls);
+      stateAfter = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
+      trace = await collectTraceFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
     } else {
       if (!twinPaths) {
         throw new Error("Twin paths not initialized");
@@ -4443,7 +5376,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     const diff = computeStateDiff(beforeState, stateAfter);
     cleanupTempFiles(mcpConfigPath, twinPaths ?? {}, seedPaths, runId, twinNames);
     if (agentResult.timedOut) {
-      const timeoutDisplay = openclawRemote ? `${(openclawRemote.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
+      const timeoutDisplay = apiEngine ? `${(apiEngine.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
       const durationMs2 = Date.now() - startTime;
       return {
         runIndex,
@@ -4461,6 +5394,9 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     }
     if (agentResult.exitCode !== 0 && agentResult.exitCode !== null) {
       warn(`Agent exited with non-zero code ${agentResult.exitCode} on run ${runIndex + 1}`);
+      if (agentResult.stderr) {
+        debug(`Agent stderr: ${agentResult.stderr.slice(0, 500)}`);
+      }
     }
     progress(`Evaluating run ${runIndex + 1}...`);
     const evaluationResult = await evaluateRun(
@@ -4511,7 +5447,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
       for (const paths of Object.values(seedPaths)) {
         for (const file of [paths.stateFile, `${paths.stateFile}.tmp`]) {
           try {
-            if (existsSync10(file)) unlinkSync6(file);
+            if (existsSync11(file)) unlinkSync6(file);
           } catch {
           }
         }
@@ -4520,14 +5456,14 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     if (restConfigPath) {
       for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
         try {
-          if (existsSync10(file)) unlinkSync6(file);
+          if (existsSync11(file)) unlinkSync6(file);
         } catch {
         }
       }
     }
   }
 }
-function preflightCheck(scenario, apiKey) {
+function preflightCheck(scenario, apiKey, model, baseUrl) {
   const errors = [];
   for (const twin of scenario.config.twins) {
     const result = checkTwinAvailability(twin);
@@ -4540,17 +5476,30 @@ function preflightCheck(scenario, apiKey) {
     }
   }
   const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
-  if (hasProbabilistic && !apiKey) {
-    const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
-    errors.push({
-      check: "ANTHROPIC_API_KEY",
-      message: `Scenario has ${pCount} probabilistic criteria but no API key is configured`,
-      detail: "Set via: export ANTHROPIC_API_KEY=sk-ant-... or archal config set evaluator.apiKey <key>"
-    });
+  if (hasProbabilistic) {
+    const provider = detectProvider(model);
+    const resolvedKey = resolveProviderApiKey(apiKey, provider);
+    if (provider === "openai-compatible" && !baseUrl) {
+      errors.push({
+        check: "evaluator.baseUrl",
+        message: `Model "${model}" requires a base URL for the OpenAI-compatible endpoint`,
+        detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>"
+      });
+    }
+    if (!resolvedKey) {
+      const envVar = getProviderEnvVar(provider);
+      const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
+      errors.push({
+        check: envVar,
+        message: `Scenario has ${pCount} probabilistic criteria that will be skipped (no API key for ${provider})`,
+        detail: `Set via: export ${envVar}=<your-key> or archal config set evaluator.apiKey <key>`,
+        warning: true
+      });
+    }
   }
   return errors;
 }
-async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
+async function runRemoteApiEnginePreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
   const runId = `archal-preflight-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
   const twinConfigs = seedSelections.map((sel) => ({
     twinName: sel.twinName,
@@ -4592,14 +5541,14 @@ async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, r
     for (const paths of Object.values(restResult.twinPaths)) {
       for (const file of [paths.stateFile, `${paths.stateFile}.tmp`, paths.traceFile, `${paths.traceFile}.tmp`]) {
         try {
-          if (existsSync10(file)) unlinkSync6(file);
+          if (existsSync11(file)) unlinkSync6(file);
         } catch {
         }
       }
     }
     for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
       try {
-        if (existsSync10(file)) unlinkSync6(file);
+        if (existsSync11(file)) unlinkSync6(file);
       } catch {
       }
     }
@@ -4622,9 +5571,14 @@ async function runScenario(options) {
       );
     }
   }
-  const preflightErrors = preflightCheck(scenario, config.apiKey);
-  if (preflightErrors.length > 0) {
-    const lines = preflightErrors.map((e) => {
+  const preflightErrors = preflightCheck(scenario, config.apiKey, model, config.baseUrl);
+  const hardErrors = preflightErrors.filter((e) => !e.warning);
+  const warnings = preflightErrors.filter((e) => e.warning);
+  for (const w of warnings) {
+    warn(`${w.check}: ${w.message}${w.detail ? ` (${w.detail})` : ""}`);
+  }
+  if (hardErrors.length > 0) {
+    const lines = hardErrors.map((e) => {
       let line = `  - ${e.check}: ${e.message}`;
       if (e.detail) line += `
     ${e.detail}`;
@@ -4651,7 +5605,7 @@ Run 'archal doctor' for a full system check.`
     }
     seedSelections = overrideSeedSelection(seedSelections, overrides);
   }
-  if (config.apiKey && !options.noDynamicSeed) {
+  if (config.geminiApiKey && !options.noDynamicSeed) {
     progress("Generating dynamic seeds from setup description...");
     const baseTwinConfigs = seedSelections.map((sel) => ({
       twinName: sel.twinName,
@@ -4659,8 +5613,8 @@ Run 'archal doctor' for a full system check.`
     }));
     const { beforeState: baseSeedStates } = await captureSeedState(baseTwinConfigs);
     const dynamicConfig = {
-      apiKey: config.apiKey,
-      model,
+      geminiApiKey: config.geminiApiKey,
+      model: config.seedModel,
       noCache: options.noSeedCache
     };
     for (const sel of seedSelections) {
@@ -4683,24 +5637,28 @@ Run 'archal doctor' for a full system check.`
       sel.seedData = result.seed;
     }
   }
-  const scenarioDir = dirname2(resolve5(options.scenarioPath));
+  const scenarioDir = dirname2(resolve7(options.scenarioPath));
   let projectConfigPath;
   for (const dir of [scenarioDir, process.cwd()]) {
-    const candidate = resolve5(dir, ".archal.json");
-    if (existsSync10(candidate)) {
+    const candidate = resolve7(dir, ".archal.json");
+    if (existsSync11(candidate)) {
       projectConfigPath = candidate;
       break;
     }
   }
-  function resolveOpenClawModel(raw) {
-    if (!raw || !raw.trim()) return "openclaw:main";
+  function resolveOpenClawModel2(raw) {
+    if (!raw || !raw.trim()) return void 0;
     const value = raw.trim();
     return value.includes(":") ? value : `openclaw:${value}`;
   }
-  function resolveOpenClawGatewayToken2(explicitToken) {
+  function resolveEngineToken2(explicitToken) {
     if (explicitToken && explicitToken.trim()) {
       return explicitToken.trim();
     }
+    const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
+    if (engineToken) {
+      return engineToken;
+    }
     const gatewayToken = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
     if (gatewayToken) {
       return gatewayToken;
@@ -4711,42 +5669,124 @@ Run 'archal doctor' for a full system check.`
     }
     return void 0;
   }
-  let openclawRemote;
-  if (options.openclawUrl) {
-    openclawRemote = {
-      url: options.openclawUrl,
-      token: resolveOpenClawGatewayToken2(options.openclawToken),
-      model: resolveOpenClawModel(options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"]),
-      timeoutMs: (options.openclawTimeout ?? timeoutSeconds) * 1e3
+  const openclawEndpointAlias = options.openclawUrl ?? process.env["OPENCLAW_URL"];
+  const engineMode = (() => {
+    if (options.engine) {
+      return options.engine;
+    }
+    if (options.engineEndpoint || openclawEndpointAlias || process.env["ARCHAL_ENGINE_ENDPOINT"]) {
+      return "api";
+    }
+    if (options.harnessDir || process.env["ARCHAL_HARNESS_DIR"]) {
+      return "local";
+    }
+    return "legacy";
+  })();
+  const apiEndpoint = options.engineEndpoint ?? openclawEndpointAlias ?? process.env["ARCHAL_ENGINE_ENDPOINT"];
+  const rawOpenClawAgent = options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"];
+  const rawEngineModel = options.engineModel ?? process.env["ARCHAL_ENGINE_MODEL"];
+  const resolvedEngineToken = resolveEngineToken2(options.engineToken ?? options.openclawToken);
+  const harnessDir = options.harnessDir ?? process.env["ARCHAL_HARNESS_DIR"];
+  let apiEngine;
+  if (engineMode === "api") {
+    const apiTimeoutSeconds = options.engineTimeout ?? options.openclawTimeout ?? parsePositiveIntFromEnv("ARCHAL_ENGINE_TIMEOUT") ?? timeoutSeconds;
+    if (!apiEndpoint || !apiEndpoint.trim()) {
+      throw new Error(
+        "API engine mode requires --engine-endpoint (or --openclaw-url for legacy compatibility)."
+      );
+    }
+    if (!Number.isFinite(apiTimeoutSeconds) || apiTimeoutSeconds <= 0) {
+      throw new Error("Engine timeout must be a positive integer number of seconds.");
+    }
+    const resolvedApiModel = rawEngineModel?.trim() || resolveOpenClawModel2(rawOpenClawAgent) || (openclawEndpointAlias ? "openclaw:main" : void 0);
+    if (!resolvedApiModel) {
+      throw new Error(
+        "API engine mode requires --engine-model/ARCHAL_ENGINE_MODEL (or --openclaw-agent/OPENCLAW_AGENT_ID)."
+      );
+    }
+    apiEngine = {
+      url: apiEndpoint.trim(),
+      token: resolvedEngineToken,
+      model: resolvedApiModel,
+      timeoutMs: apiTimeoutSeconds * 1e3,
+      agentId: rawOpenClawAgent?.trim() || void 0
     };
-    if (!openclawRemote.token) {
+    if (openclawEndpointAlias && !apiEngine.token) {
       throw new Error(
         "OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
       );
     }
   }
-  const remoteTwinUrlOverrides = parseRemoteTwinUrlOverrides(options.openclawTwinUrls);
+  let localEngine;
+  if (engineMode === "local") {
+    if (!harnessDir) {
+      throw new Error(
+        "Local engine mode requires --harness-dir (or ARCHAL_HARNESS_DIR)."
+      );
+    }
+    const resolvedHarness = resolveLocalHarness(harnessDir, rawEngineModel);
+    const resolvedFallbackLocalAgentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath);
+    const fallbackLocalAgentConfig = resolvedFallbackLocalAgentConfig ?? { command: "openclaw", args: [] };
+    if (!resolvedHarness.manifest) {
+      debug(
+        "Harness manifest not found for local mode; using agent command defaults.",
+        { manifestPath: resolvedHarness.manifestPath }
+      );
+    } else if (!resolvedHarness.localCommand) {
+      warn(
+        `Harness manifest at ${resolvedHarness.manifestPath} does not define local.command; falling back to agent command defaults.`
+      );
+    }
+    if (!resolvedHarness.localCommand && !resolvedFallbackLocalAgentConfig) {
+      warn(
+        'No local command configured via harness manifest/.archal.json/ARCHAL_AGENT_COMMAND; defaulting to "openclaw".'
+      );
+    }
+    const commandConfig = resolvedHarness.localCommand ?? fallbackLocalAgentConfig;
+    localEngine = {
+      model: resolvedHarness.model,
+      command: commandConfig.command,
+      args: commandConfig.args,
+      env: commandConfig.env,
+      cwd: resolvedHarness.harnessDir,
+      promptContext: resolvedHarness.promptContext
+    };
+  }
+  const remoteTwinUrlOverrides = apiEngine ? parseRemoteTwinUrlOverrides(
+    options.engineTwinUrls ?? options.openclawTwinUrls ?? process.env["ARCHAL_ENGINE_TWIN_URLS"]
+  ) : void 0;
   const apiBaseUrlOverrides = parseApiBaseUrlOverrides(options.apiBaseUrls);
   const apiProxyUrl = parseProxyUrl(options.apiProxyUrl ?? process.env["ARCHAL_API_PROXY_URL"]);
   const apiRouting = apiBaseUrlOverrides && Object.keys(apiBaseUrlOverrides).length > 0 || apiProxyUrl ? {
     baseUrls: apiBaseUrlOverrides,
-    proxyUrl: apiProxyUrl
+    proxyUrl: apiProxyUrl,
+    bearerToken: options.apiBearerToken,
+    adminToken: options.apiAdminToken,
+    adminUserId: options.apiAdminUserId
   } : void 0;
-  const agentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (openclawRemote ? { command: "openclaw", args: [] } : {
+  const agentConfig = localEngine ? {
+    command: localEngine.command,
+    args: localEngine.args,
+    env: localEngine.env,
+    cwd: localEngine.cwd
+  } : options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (apiEngine ? { command: "openclaw", args: [] } : {
     command: process.env["ARCHAL_AGENT_COMMAND"] ?? "echo",
     args: process.env["ARCHAL_AGENT_COMMAND"] ? [] : ["No agent command configured"]
   });
-  if (!openclawRemote && agentConfig.command === "echo") {
+  if (!apiEngine && !localEngine && agentConfig.command === "echo") {
     process.stderr.write(
-      "Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json or provide --openclaw-url.\n"
+      "Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json, use --engine-endpoint, or run --engine local with --harness-dir.\n"
     );
   }
-  if (openclawRemote) {
-    info("Remote OpenClaw mode enabled", { url: openclawRemote.url });
+  if (apiEngine) {
+    info("Remote API engine mode enabled", { url: apiEngine.url });
     warn(
-      "Remote OpenClaw requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate OpenClaw with Archal or expose twins via a reachable network path."
+      "Remote engine mode requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate the engine with Archal or expose twins via a reachable network path."
     );
-    validateRemoteOpenClawTopology(openclawRemote.url, scenario.config.twins, remoteTwinUrlOverrides);
+    validateRemoteApiEngineTopology(apiEngine.url, scenario.config.twins, remoteTwinUrlOverrides);
+  }
+  if (localEngine) {
+    info("Local harness engine mode enabled", { harnessDir: localEngine.cwd });
   }
   if (apiRouting) {
     info("API routing context enabled", {
@@ -4755,18 +5795,18 @@ Run 'archal doctor' for a full system check.`
     });
   }
   if (options.preflightOnly) {
-    if (openclawRemote) {
-      await runRemoteOpenClawPreflight(
+    if (apiEngine) {
+      await runRemoteApiEnginePreflight(
         scenario,
         seedSelections,
         options.rateLimit,
-        openclawRemote,
+        apiEngine,
         remoteTwinUrlOverrides
       );
     }
     info("Preflight checks passed", {
       scenario: scenario.title,
-      remoteOpenClaw: openclawRemote ? "enabled" : "disabled"
+      engineMode: apiEngine ? "api" : localEngine ? "local" : "legacy-local"
     });
     return {
       scenarioTitle: scenario.title,
@@ -4786,6 +5826,7 @@ Run 'archal doctor' for a full system check.`
   };
   const runs = [];
   for (let i = 0; i < numRuns; i++) {
+    const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
     const result = await executeSingleRun(
       i,
       scenario,
@@ -4794,10 +5835,13 @@ Run 'archal doctor' for a full system check.`
       evaluatorConfig,
       timeoutSeconds,
       options.rateLimit,
-      openclawRemote,
+      apiEngine,
+      localEngine,
       remoteTwinUrlOverrides,
       apiRouting,
-      options.cloudTwinUrls
+      options.cloudTwinUrls,
+      options.apiBearerToken,
+      adminAuth
     );
     runs.push(result);
     printRunProgress(i, numRuns, result.overallScore, result.error);
@@ -4836,10 +5880,10 @@ function normalizeBaseUrl(value, fallback) {
   const normalized = trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
   return normalized.length > 0 ? normalized : fallback;
 }
-var DEFAULT_BASE_URL = "https://archal.ai";
+var DEFAULT_BASE_URL = "https://www.archal.ai";
 var AUTH_BASE_URL2 = normalizeBaseUrl(process.env["ARCHAL_AUTH_URL"] ?? DEFAULT_BASE_URL, DEFAULT_BASE_URL);
 var API_BASE_URL = normalizeBaseUrl(process.env["ARCHAL_API_URL"] ?? AUTH_BASE_URL2, AUTH_BASE_URL2);
-var REQUEST_TIMEOUT_MS3 = 8e3;
+var REQUEST_TIMEOUT_MS4 = 8e3;
 var RETRYABLE_STATUS_CODES2 = /* @__PURE__ */ new Set([408, 425, 429, 500, 502, 503, 504]);
 var RETRYABLE_NETWORK_CODES = /* @__PURE__ */ new Set([
   "ECONNABORTED",
@@ -4864,7 +5908,7 @@ var MAX_RETRIES2 = parseBoundedInt(process.env["ARCHAL_API_MAX_RETRIES"], 3, 0,
 var RETRY_BASE_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_BASE_MS"], 250, 25, 1e4);
 var RETRY_MAX_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_MAX_MS"], 3e3, RETRY_BASE_DELAY_MS, 2e4);
 function sleep2(ms) {
-  return new Promise((resolve11) => setTimeout(resolve11, ms));
+  return new Promise((resolve13) => setTimeout(resolve13, ms));
 }
 function retryDelayMs(attempt, retryAfter) {
   if (retryAfter) {
@@ -4924,13 +5968,30 @@ function isFinalizeEvidencePath(path) {
   }
   return /^\/api\/sessions\/[^/]+\/evidence\/finalize$/.test(pathname);
 }
+async function tryRefreshToken() {
+  try {
+    const creds = getStoredCredentials();
+    if (!creds || !creds.refreshToken) return null;
+    const refreshed = await refreshCliSession(creds);
+    if (!refreshed) return null;
+    saveCredentials(refreshed);
+    return refreshed.token;
+  } catch {
+    return null;
+  }
+}
 async function request(method, path, token, body) {
   const url = `${resolveBaseUrl(path)}${path}`;
   const headers = {
     "content-type": "application/json",
-    "user-agent": "archal-cli/0.1.0"
+    "user-agent": CLI_USER_AGENT
   };
-  if (token) {
+  const runtimeAdminToken = process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"]?.trim();
+  if (runtimeAdminToken) {
+    headers["x-archal-admin-token"] = runtimeAdminToken;
+    headers["x-archal-user-id"] = process.env["ARCHAL_RUNTIME_USER_ID"]?.trim() || "cli-user";
+    headers["x-archal-plan"] = process.env["ARCHAL_RUNTIME_PLAN"]?.trim() || "free";
+  } else if (token) {
     headers["authorization"] = `Bearer ${token}`;
   }
   const isIdempotentFinalize = method === "POST" && isFinalizeEvidencePath(path);
@@ -4938,16 +5999,28 @@ async function request(method, path, token, body) {
   const attempts = retriesAllowed ? MAX_RETRIES2 + 1 : 1;
   let lastError = "request failed";
   let lastOffline = false;
+  let refreshAttempted = false;
   for (let attempt = 1; attempt <= attempts; attempt += 1) {
     try {
       const response = await fetch(url, {
         method,
         headers,
         body: body ? JSON.stringify(body) : void 0,
-        signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
+        signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS4)
       });
       if (!response.ok) {
-        const text = await response.text().catch(() => "");
+        if (response.status === 401 && token && !refreshAttempted) {
+          refreshAttempted = true;
+          const refreshed = await tryRefreshToken();
+          if (refreshed) {
+            token = refreshed;
+            headers["authorization"] = `Bearer ${token}`;
+            attempt -= 1;
+            continue;
+          }
+        }
+        const rawText = await response.text().catch(() => "");
+        const text = rawText.length > 200 ? rawText.slice(0, 200) + "..." : rawText;
         const retryable = retriesAllowed && attempt < attempts && RETRYABLE_STATUS_CODES2.has(response.status);
         if (retryable) {
           await sleep2(retryDelayMs(attempt, response.headers.get("retry-after")));
@@ -5018,7 +6091,7 @@ function fetchScenarioCatalog(token) {
   return request("GET", "/api/scenarios", token);
 }
-// src/commands/twin.ts
+// src/commands/twins.ts
 import { Command as Command2 } from "commander";
 // src/constants.ts
@@ -5045,10 +6118,10 @@ var PLAN_LIMITS = {
 import { createInterface as createInterface2 } from "readline";
 function askLine(question) {
   const rl = createInterface2({ input: process.stdin, output: process.stderr });
-  return new Promise((resolve11) => {
+  return new Promise((resolve13) => {
     rl.question(question, (answer) => {
       rl.close();
-      resolve11(answer.trim());
+      resolve13(answer.trim());
     });
   });
 }
@@ -5057,8 +6130,7 @@ async function askConfirm(question) {
   return answer.toLowerCase().startsWith("y");
 }
-// src/commands/twin.ts
-var runningTwins = /* @__PURE__ */ new Map();
+// src/commands/twins.ts
 var KNOWN_TWINS2 = [
   { name: "github", package: "@archal/twin-github", description: "GitHub digital twin" },
   { name: "slack", package: "@archal/twin-slack", description: "Slack digital twin" },
@@ -5083,7 +6155,7 @@ async function runInteractiveTwinSelect(token) {
     const marker = currentlySelected.has(twin.id) ? "\x1B[32m\u2713\x1B[0m" : " ";
     const num = String(i + 1).padStart(2);
     process.stderr.write(
-      `  ${marker} [${num}] ${twin.name.padEnd(18)} (${twin.toolCount} tools) \u2014 ${twin.description}
+      `  ${marker} [${num}] ${twin.name.padEnd(18)}${twin.toolCount != null ? ` (${twin.toolCount} tools)` : ""} \u2014 ${twin.description}
 `
     );
   }
@@ -5169,7 +6241,7 @@ async function listTwinCatalog() {
     } else {
       status = "\x1B[90m\u2717 not selected\x1B[0m";
     }
-    return [twin.name, String(twin.toolCount), twin.description, status];
+    return [twin.name, twin.toolCount != null ? String(twin.toolCount) : "\u2014", twin.description, status];
   });
   table(headers, rows);
   if (isUnlimited) {
@@ -5194,85 +6266,12 @@ async function selectTwinsForPlan() {
   const refreshed = await refreshAuthFromServer(creds);
   saveCredentials(refreshed);
 }
-function createTwinCommand() {
-  const cmd = new Command2("twin").description("Manage local digital twin processes (debug/local only)");
-  cmd.command("start").description("Start a digital twin process").argument("<name>", "Twin name (e.g., github, slack)").option("--seed <seed>", "Seed name to load", "small-project").option("--port <port>", "Port for REST transport").action((name, opts) => {
-    requireAuth({
-      action: `start the "${name}" twin`,
-      nextCommand: `archal twin start ${name}`
-    });
-    const knownTwin = KNOWN_TWINS2.find((t) => t.name === name);
-    if (!knownTwin) {
-      const available = KNOWN_TWINS2.map((t) => t.name).join(", ");
-      error(`Unknown twin: "${name}". Available twins: ${available}`);
-      process.exit(1);
-    }
-    if (runningTwins.has(name)) {
-      warn(`Twin "${name}" is already running (PID: ${runningTwins.get(name)?.pid ?? "unknown"})`);
-      return;
-    }
-    info("`archal run` uses hosted cloud twins. `archal twin start` is for local debugging only.");
-    const args = [knownTwin.package, "--seed", opts.seed, "--transport", "rest"];
-    if (opts.port) {
-      args.push("--port", opts.port);
-    }
-    info(`Starting twin: ${name}`, { seed: opts.seed, transport: "rest" });
-    const child = spawnMcpStdioProcess({
-      command: "npx",
-      args
-    });
-    const pid = child.pid ?? 0;
-    runningTwins.set(name, {
-      name,
-      pid,
-      startedAt: (/* @__PURE__ */ new Date()).toISOString(),
-      process: child
-    });
-    child.on("exit", (code) => {
-      info(`Twin "${name}" exited`, { code: String(code ?? "unknown") });
-      runningTwins.delete(name);
-    });
-    success(`Twin "${name}" started (PID: ${pid})`);
-  });
-  cmd.command("stop").description("Stop a running digital twin").argument("<name>", "Twin name to stop").action(async (name) => {
-    const twin = runningTwins.get(name);
-    if (!twin) {
-      error(`Twin "${name}" is not running`);
-      const running = Array.from(runningTwins.keys());
-      if (running.length > 0) {
-        info(`Running twins: ${running.join(", ")}`);
-      }
-      process.exit(1);
-    }
-    info(`Stopping twin: ${name}`, { pid: String(twin.pid) });
-    await killProcess(twin.process);
-    runningTwins.delete(name);
-    success(`Twin "${name}" stopped`);
-  });
-  cmd.command("status").description("Show status of running digital twins").action(() => {
-    if (runningTwins.size === 0) {
-      info("No twins currently running");
-      return;
-    }
-    const headers = ["Name", "PID", "Started", "Status"];
-    const rows = [];
-    for (const twin of runningTwins.values()) {
-      const isAlive = twin.process.exitCode === null;
-      rows.push([
-        twin.name,
-        String(twin.pid),
-        twin.startedAt,
-        isAlive ? "running" : `exited (${twin.process.exitCode})`
-      ]);
-    }
-    table(headers, rows);
-  });
-  cmd.command("list").description("List available digital twins and entitlement status").action(async () => {
-    warn("`archal twin list` is deprecated. Use `archal twins list`.");
+function createTwinsCommand() {
+  const cmd = new Command2("twins").description("Manage twin catalog entitlements");
+  cmd.command("list").description("List available twins and entitlement status").action(async () => {
     await listTwinCatalog();
   });
   cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
-    warn("`archal twin select` is deprecated. Use `archal twins select`.");
     await selectTwinsForPlan();
   });
   return cmd;
@@ -5280,7 +6279,13 @@ function createTwinCommand() {
 // src/commands/run.ts
 function createRunCommand() {
-  const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--openclaw-url <url>", "OpenClaw Gateway URL or /v1/responses endpoint (enables remote OpenClaw mode)").option("--openclaw-token <token>", "Bearer token for OpenClaw Gateway auth").option("--openclaw-agent <id>", "OpenClaw agent/model id for remote mode (e.g. main or openclaw:my-agent)").option("--openclaw-twin-urls <path>", "Path to JSON mapping twin names to remotely reachable MCP base URLs").option("--openclaw-timeout <seconds>", "Timeout for remote OpenClaw HTTP call per run (defaults to run timeout)").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
+  const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--engine-endpoint <url>", "API engine endpoint URL (base URL or /v1/responses)").option("--engine-token <token>", "Bearer token for API engine auth").option(
+    "--engine-model <model>",
+    "Model id for API mode; in local mode this is exported as ARCHAL_ENGINE_MODEL"
+  ).option("--engine-twin-urls <path>", "Path to JSON mapping twin names to remote-reachable MCP base URLs").option("--engine-timeout <seconds>", "Timeout for API engine HTTP call per run (defaults to run timeout)").option(
+    "--harness-dir <path>",
+    "Local agent execution directory (archal-harness.json is optional)"
+  ).option("--openclaw-url <url>", "Deprecated alias for --engine-endpoint").option("--openclaw-token <token>", "Deprecated alias for --engine-token").option("--openclaw-agent <id>", "Deprecated alias for --engine-model").option("--openclaw-twin-urls <path>", "Deprecated alias for --engine-twin-urls").option("--openclaw-timeout <seconds>", "Deprecated alias for --engine-timeout").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
     const required = requireAuth({
       action: "run a scenario",
       nextCommand: `archal run ${scenarioArg}`
@@ -5296,8 +6301,8 @@ function createRunCommand() {
     if (opts.verbose) {
       configureLogger({ verbose: true, level: "debug" });
     }
-    const scenarioPath = resolve6(scenarioArg);
-    if (!existsSync11(scenarioPath)) {
+    const scenarioPath = resolve8(scenarioArg);
+    if (!existsSync12(scenarioPath)) {
       process.stderr.write(`Error: Scenario file not found: ${scenarioPath}
 `);
       process.exit(1);
@@ -5387,26 +6392,20 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
       process.stderr.write("Error: --pass-threshold must be a number between 0 and 100\n");
       process.exit(1);
     }
-    if (!opts.openclawUrl) {
-      process.stderr.write(
-        "Error: --openclaw-url is required. `archal run` now uses cloud transport only.\n"
-      );
+    let engine;
+    try {
+      engine = resolveEngineConfig(opts, timeout);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      process.stderr.write(`Error: ${message}
+`);
       process.exit(1);
     }
-    let openclawTimeout;
-    if (opts.openclawTimeout) {
-      openclawTimeout = parseInt(opts.openclawTimeout, 10);
-      if (Number.isNaN(openclawTimeout) || openclawTimeout <= 0) {
-        process.stderr.write("Error: --openclaw-timeout must be a positive integer\n");
-        process.exit(1);
-      }
-    }
-    const resolvedOpenClawToken = resolveOpenClawGatewayToken(opts.openclawToken);
-    if (opts.openclawUrl && !resolvedOpenClawToken) {
+    if (engine.deprecatedAliasesUsed.length > 0) {
       process.stderr.write(
-        "Error: OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD.\n"
+        `Warning: OpenClaw flags are deprecated (${engine.deprecatedAliasesUsed.join(", ")}). Use --engine-* equivalents.
+`
       );
-      process.exit(1);
     }
     {
       const sessionResult = await startSession(credentials.token, {
@@ -5433,9 +6432,9 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
         if (!runFailureMessage && Object.keys(endpointRoots).length > 0) {
           cloudTwinUrls = endpointRoots;
         }
-        if (!runFailureMessage && opts.openclawUrl && !opts.openclawTwinUrls) {
-          generatedTwinUrlMapPath = resolve6(
-            `.archal-session-${backendSessionId}-openclaw-twin-urls.json`
+        if (!runFailureMessage && engine.mode === "api" && !engine.twinUrlsPath) {
+          generatedTwinUrlMapPath = resolve8(
+            `.archal-session-${backendSessionId}-engine-twin-urls.json`
           );
           writeFileSync9(
             generatedTwinUrlMapPath,
@@ -5444,7 +6443,7 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
           );
         }
         if (!runFailureMessage && !opts.apiBaseUrls && apiBaseUrls && Object.keys(apiBaseUrls).length > 0) {
-          generatedApiBaseUrlMapPath = resolve6(
+          generatedApiBaseUrlMapPath = resolve8(
             `.archal-session-${backendSessionId}-api-base-urls.json`
           );
           writeFileSync9(
@@ -5454,15 +6453,34 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
           );
         }
         if (!runFailureMessage) {
-          const [statusResult, healthResult] = await Promise.all([
-            getSessionStatus(credentials.token, backendSessionId),
-            getSessionHealth(credentials.token, backendSessionId)
-          ]);
-          if (!statusResult.ok || !statusResult.data.alive) {
-            runFailureMessage = `session not ready (${statusResult.ok ? statusResult.data.status : statusResult.error})`;
+          const SESSION_READY_TIMEOUT_MS = 12e4;
+          const SESSION_POLL_INTERVAL_MS = 3e3;
+          const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
+          let sessionReady = false;
+          while (Date.now() < readyDeadline) {
+            const [statusResult, healthResult] = await Promise.all([
+              getSessionStatus(credentials.token, backendSessionId),
+              getSessionHealth(credentials.token, backendSessionId)
+            ]);
+            if (!statusResult.ok) {
+              runFailureMessage = `session status check failed (${statusResult.error})`;
+              break;
+            }
+            const status = statusResult.data.status;
+            if (status === "failed" || status === "expired" || status === "ended") {
+              runFailureMessage = `session ${status}`;
+              break;
+            }
+            const healthAlive = healthResult.ok && healthResult.data.alive;
+            const statusAlive = statusResult.data.alive || statusResult.data.status === "ready";
+            if (statusAlive && healthAlive) {
+              sessionReady = true;
+              break;
+            }
+            await new Promise((resolve13) => setTimeout(resolve13, SESSION_POLL_INTERVAL_MS));
           }
-          if (!runFailureMessage && (!healthResult.ok || !healthResult.data.alive)) {
-            runFailureMessage = `session health check failed (${healthResult.ok ? "dead" : healthResult.error})`;
+          if (!sessionReady && !runFailureMessage) {
+            runFailureMessage = "session timed out waiting for twins to become ready";
           }
         }
       } else if (!sessionResult.offline) {
@@ -5482,17 +6500,26 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
           output: outputFormat,
           seed: opts.seed,
           rateLimit,
+          engineEndpoint: engine.endpoint,
+          engineToken: engine.token,
+          engineModel: engine.model,
+          engineTwinUrls: generatedTwinUrlMapPath ?? engine.twinUrlsPath,
+          engineTimeout: engine.timeoutSeconds,
+          harnessDir: engine.harnessDir,
           openclawUrl: opts.openclawUrl,
-          openclawToken: resolvedOpenClawToken,
+          openclawToken: engine.token,
           openclawAgent: opts.openclawAgent,
           openclawTwinUrls: generatedTwinUrlMapPath ?? opts.openclawTwinUrls,
-          openclawTimeout,
+          openclawTimeout: engine.timeoutSeconds,
           apiBaseUrls: generatedApiBaseUrlMapPath ?? opts.apiBaseUrls,
           apiProxyUrl: opts.apiProxyUrl,
           preflightOnly: opts.preflightOnly,
           cloudTwinUrls,
           noDynamicSeed: !opts.dynamicSeed,
-          noSeedCache: !opts.seedCache
+          noSeedCache: !opts.seedCache,
+          apiBearerToken: credentials.token,
+          apiAdminToken: process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"],
+          apiAdminUserId: process.env["ARCHAL_RUNTIME_USER_ID"]
         });
         if (!opts.preflightOnly && report.satisfactionScore < passThreshold) {
           runFailureMessage = `Satisfaction score ${report.satisfactionScore.toFixed(1)} is below pass threshold ${passThreshold}`;
@@ -5502,10 +6529,10 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
       const message = err instanceof Error ? err.message : String(err);
       runFailureMessage = message;
     } finally {
-      if (generatedTwinUrlMapPath && existsSync11(generatedTwinUrlMapPath)) {
+      if (generatedTwinUrlMapPath && existsSync12(generatedTwinUrlMapPath)) {
         unlinkSync7(generatedTwinUrlMapPath);
       }
-      if (generatedApiBaseUrlMapPath && existsSync11(generatedApiBaseUrlMapPath)) {
+      if (generatedApiBaseUrlMapPath && existsSync12(generatedApiBaseUrlMapPath)) {
         unlinkSync7(generatedApiBaseUrlMapPath);
       }
       if (backendSessionId) {
@@ -5566,10 +6593,90 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
   });
   return cmd;
 }
-function resolveOpenClawGatewayToken(rawToken) {
+function resolveEngineConfig(opts, runTimeoutSeconds) {
+  const deprecatedAliasesUsed = collectDeprecatedAliases(opts);
+  const mode = resolveEngineMode(opts);
+  const openclawEndpointAlias = firstNonEmpty(opts.openclawUrl, process.env["OPENCLAW_URL"]);
+  const endpoint = firstNonEmpty(
+    opts.engineEndpoint,
+    openclawEndpointAlias,
+    process.env["ARCHAL_ENGINE_ENDPOINT"]
+  );
+  const token = resolveEngineToken(firstNonEmpty(opts.engineToken, opts.openclawToken));
+  const openclawModel = resolveOpenClawModel(firstNonEmpty(opts.openclawAgent, process.env["OPENCLAW_AGENT_ID"]));
+  const model = firstNonEmpty(
+    opts.engineModel,
+    process.env["ARCHAL_ENGINE_MODEL"],
+    openclawModel,
+    // Legacy OpenClaw alias path keeps the historical default model for compatibility.
+    openclawEndpointAlias ? "openclaw:main" : void 0
+  );
+  const timeoutInput = firstNonEmpty(
+    opts.engineTimeout,
+    opts.openclawTimeout,
+    process.env["ARCHAL_ENGINE_TIMEOUT"]
+  );
+  const timeoutSeconds = mode === "api" ? parsePositiveInteger(timeoutInput, "--engine-timeout") ?? runTimeoutSeconds : runTimeoutSeconds;
+  const twinUrlsPath = firstNonEmpty(
+    opts.engineTwinUrls,
+    opts.openclawTwinUrls,
+    process.env["ARCHAL_ENGINE_TWIN_URLS"]
+  );
+  const harnessDir = firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"]);
+  if (mode === "api") {
+    if (!model) {
+      throw new Error(
+        "--engine-model is required for API mode (or use --openclaw-agent/OPENCLAW_AGENT_ID)."
+      );
+    }
+    if (openclawEndpointAlias && !token) {
+      throw new Error(
+        "OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
+      );
+    }
+  }
+  return {
+    mode,
+    endpoint,
+    token,
+    model,
+    twinUrlsPath,
+    timeoutSeconds,
+    harnessDir,
+    deprecatedAliasesUsed
+  };
+}
+function resolveEngineMode(opts) {
+  if (firstNonEmpty(
+    opts.engineEndpoint,
+    opts.openclawUrl,
+    process.env["ARCHAL_ENGINE_ENDPOINT"],
+    process.env["OPENCLAW_URL"]
+  )) {
+    return "api";
+  }
+  if (firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"])) {
+    return "local";
+  }
+  throw new Error(
+    "No agent execution mode configured. Provide --engine-endpoint for remote agent execution, or --harness-dir for local agent execution."
+  );
+}
+function resolveOpenClawModel(raw) {
+  if (!raw || !raw.trim()) {
+    return void 0;
+  }
+  const value = raw.trim();
+  return value.includes(":") ? value : `openclaw:${value}`;
+}
+function resolveEngineToken(rawToken) {
   if (rawToken && rawToken.trim()) {
     return rawToken.trim();
   }
+  const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
+  if (engineToken) {
+    return engineToken;
+  }
   const token = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
   if (token) {
     return token;
@@ -5580,11 +6687,36 @@ function resolveOpenClawGatewayToken(rawToken) {
   }
   return void 0;
 }
+function firstNonEmpty(...values) {
+  for (const value of values) {
+    if (value && value.trim()) {
+      return value.trim();
+    }
+  }
+  return void 0;
+}
+function parsePositiveInteger(raw, flagName) {
+  if (!raw) return void 0;
+  const parsed = parseInt(raw, 10);
+  if (Number.isNaN(parsed) || parsed <= 0) {
+    throw new Error(`${flagName} must be a positive integer`);
+  }
+  return parsed;
+}
+function collectDeprecatedAliases(opts) {
+  const aliases = [];
+  if (opts.openclawUrl) aliases.push("--openclaw-url");
+  if (opts.openclawToken) aliases.push("--openclaw-token");
+  if (opts.openclawAgent) aliases.push("--openclaw-agent");
+  if (opts.openclawTwinUrls) aliases.push("--openclaw-twin-urls");
+  if (opts.openclawTimeout) aliases.push("--openclaw-timeout");
+  return aliases;
+}
 // src/commands/init.ts
 import { Command as Command4 } from "commander";
-import { existsSync as existsSync12, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
-import { join as join9, resolve as resolve7 } from "path";
+import { existsSync as existsSync13, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
+import { join as join9, resolve as resolve9 } from "path";
 var SAMPLE_SCENARIO = `# Close Stale Issues
 ## Setup
@@ -5759,7 +6891,7 @@ var SAMPLE_PACKAGE_JSON = `{
 }
 `;
 function writeIfMissing(filePath, content) {
-  if (!existsSync12(filePath)) {
+  if (!existsSync13(filePath)) {
     writeFileSync10(filePath, content);
     info(`Created ${filePath}`);
   } else {
@@ -5768,8 +6900,8 @@ function writeIfMissing(filePath, content) {
 }
 function createInitCommand() {
   const cmd = new Command4("init").description("Initialize an Archal test directory with sample scenario and agent").argument("[directory]", "Directory to initialize", "archal").action((directory) => {
-    const targetDir = resolve7(directory);
-    if (existsSync12(targetDir)) {
+    const targetDir = resolve9(directory);
+    if (existsSync13(targetDir)) {
       warn(`Directory already exists: ${targetDir}`);
       warn("Skipping files that already exist.");
     } else {
@@ -5792,23 +6924,10 @@ function createInitCommand() {
   return cmd;
 }
-// src/commands/twins.ts
-import { Command as Command5 } from "commander";
-function createTwinsCommand() {
-  const cmd = new Command5("twins").description("Manage twin catalog entitlements");
-  cmd.command("list").description("List available twins and entitlement status").action(async () => {
-    await listTwinCatalog();
-  });
-  cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
-    await selectTwinsForPlan();
-  });
-  return cmd;
-}
 // src/commands/scenario.ts
-import { Command as Command6 } from "commander";
-import { existsSync as existsSync13, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
-import { resolve as resolve8, join as join10, extname } from "path";
+import { Command as Command5 } from "commander";
+import { existsSync as existsSync14, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
+import { resolve as resolve10, join as join10, extname, relative } from "path";
 var SCENARIO_TEMPLATE = `# {{NAME}}
 ## Setup
@@ -5834,15 +6953,15 @@ timeout: 120
 runs: 5
 `;
 var SCENARIO_DIR_CANDIDATES = [
-  resolve8("scenarios"),
-  resolve8("scenario"),
-  resolve8("test", "scenarios"),
-  resolve8("tests", "scenarios"),
-  resolve8(".archal", "scenarios")
+  resolve10("scenarios"),
+  resolve10("scenario"),
+  resolve10("test", "scenarios"),
+  resolve10("tests", "scenarios"),
+  resolve10(".archal", "scenarios")
 ];
 function findScenarioFiles(dir) {
   const files = [];
-  if (!existsSync13(dir)) return files;
+  if (!existsSync14(dir)) return files;
   const entries = readdirSync3(dir, { withFileTypes: true });
   for (const entry of entries) {
     const fullPath = join10(dir, entry.name);
@@ -5856,22 +6975,19 @@ function findScenarioFiles(dir) {
 }
 function findLocalScenariosDir() {
   for (const candidate of SCENARIO_DIR_CANDIDATES) {
-    if (existsSync13(candidate)) {
+    if (existsSync14(candidate)) {
       return { dir: candidate, candidates: SCENARIO_DIR_CANDIDATES };
     }
   }
   return {
-    dir: resolve8("scenarios"),
+    dir: resolve10("scenarios"),
     candidates: SCENARIO_DIR_CANDIDATES
   };
 }
 function toDisplayPath(path) {
-  const cwd = resolve8(".");
-  if (path === cwd) return ".";
-  if (path.startsWith(`${cwd}/`)) {
-    return `.${path.slice(cwd.length)}`;
-  }
-  return path;
+  const rel = relative(resolve10("."), path);
+  if (!rel) return ".";
+  return rel.startsWith("..") ? path : rel;
 }
 function getCachedScenariosDir() {
   return join10(ensureArchalDir(), "scenarios");
@@ -5897,14 +7013,14 @@ async function syncRemoteScenarios(token) {
   return scenarios;
 }
 function createScenarioCommand() {
-  const cmd = new Command6("scenario").description("Manage test scenarios");
+  const cmd = new Command5("scenario").description("Manage test scenarios");
   cmd.command("list").description("List available scenarios").option("-d, --dir <directory>", "Scenario directory to search").option("--local", "Only show local scenarios (skip remote fetch)").action(async (opts) => {
     const creds = getCredentials();
     const headers = ["Scenario", "Source", "Criteria", "Twins"];
     const rows = [];
-    const localResolution = opts.dir ? { dir: resolve8(opts.dir), candidates: [resolve8(opts.dir)] } : findLocalScenariosDir();
+    const localResolution = opts.dir ? { dir: resolve10(opts.dir), candidates: [resolve10(opts.dir)] } : findLocalScenariosDir();
     const localDir = localResolution.dir;
-    if (existsSync13(localDir)) {
+    if (existsSync14(localDir)) {
       const localFiles = findScenarioFiles(localDir);
       let hiddenCount = 0;
       for (const file of localFiles) {
@@ -5917,7 +7033,7 @@ function createScenarioCommand() {
               continue;
             }
           }
-          const relativePath = file.replace(resolve8(".") + "\\", "").replace(resolve8(".") + "/", "");
+          const relativePath = relative(resolve10("."), file);
           rows.push([
             scenario.title,
             relativePath,
@@ -5926,7 +7042,7 @@ function createScenarioCommand() {
           ]);
         } catch (err) {
           const message = err instanceof Error ? err.message : String(err);
-          const relativePath = file.replace(resolve8(".") + "\\", "").replace(resolve8(".") + "/", "");
+          const relativePath = relative(resolve10("."), file);
           rows.push([`(parse error)`, relativePath, "-", message]);
         }
       }
@@ -5971,8 +7087,8 @@ function createScenarioCommand() {
 Found ${rows.length} scenario(s)`);
   });
   cmd.command("validate").description("Parse and validate a scenario file").argument("<file>", "Path to scenario markdown file").action((file) => {
-    const filePath = resolve8(file);
-    if (!existsSync13(filePath)) {
+    const filePath = resolve10(file);
+    if (!existsSync14(filePath)) {
       error(`File not found: ${filePath}`);
       process.exit(1);
     }
@@ -6014,14 +7130,14 @@ Found ${rows.length} scenario(s)`);
       info("Run `archal twins select` to change your selection or `archal upgrade` to unlock all twins.");
       process.exit(1);
     }
-    const scenariosDir = opts.dir ? resolve8(opts.dir) : findLocalScenariosDir().dir;
-    if (!existsSync13(scenariosDir)) {
+    const scenariosDir = opts.dir ? resolve10(opts.dir) : findLocalScenariosDir().dir;
+    if (!existsSync14(scenariosDir)) {
       mkdirSync7(scenariosDir, { recursive: true });
       info(`Created scenarios directory: ${scenariosDir}`);
     }
     const fileName = name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "") + ".md";
     const filePath = join10(scenariosDir, fileName);
-    if (existsSync13(filePath)) {
+    if (existsSync14(filePath)) {
       error(`Scenario file already exists: ${filePath}`);
       process.exit(1);
     }
@@ -6038,9 +7154,9 @@ Found ${rows.length} scenario(s)`);
 // src/commands/trace.ts
 import { writeFileSync as writeFileSync12 } from "fs";
-import { resolve as resolve9 } from "path";
+import { resolve as resolve11 } from "path";
 import { createInterface as createInterface3 } from "readline";
-import { Command as Command7 } from "commander";
+import { Command as Command6 } from "commander";
 function formatTimestamp2(iso) {
   try {
     return new Date(iso).toLocaleString();
@@ -6063,10 +7179,10 @@ var TRACE_HEADERS = ["ID", "Scenario", "Score", "Runs", "Entries", "Timestamp"];
 function confirmPrompt(message) {
   if (!process.stdin.isTTY) return Promise.resolve(false);
   const rl = createInterface3({ input: process.stdin, output: process.stderr });
-  return new Promise((resolve11) => {
+  return new Promise((resolve13) => {
     rl.question(`${message} [y/N] `, (answer) => {
       rl.close();
-      resolve11(answer.trim().toLowerCase() === "y");
+      resolve13(answer.trim().toLowerCase() === "y");
     });
   });
 }
@@ -6079,7 +7195,7 @@ function parsePositiveInt(val, flag) {
   return n;
 }
 function createTraceCommand() {
-  const cmd = new Command7("trace").description("Inspect, search, and manage run traces");
+  const cmd = new Command6("trace").description("Inspect, search, and manage run traces");
   cmd.command("list").description("List recent traces").option("-n, --limit <count>", "Number of traces to show", "20").action((opts) => {
     const traces = listTraces(parsePositiveInt(opts.limit, "--limit"));
     if (traces.length === 0) {
@@ -6183,7 +7299,7 @@ ${traces.length} trace(s) found`);
       process.exit(1);
     }
     if (opts.output) {
-      const outPath = resolve9(opts.output);
+      const outPath = resolve11(opts.output);
       writeFileSync12(outPath, json, "utf-8");
       info(`Trace exported to: ${outPath}`);
     } else {
@@ -6260,10 +7376,10 @@ ${traces.length} trace(s) found`);
 }
 // src/commands/config.ts
-import { existsSync as existsSync14, unlinkSync as unlinkSync8 } from "fs";
-import { Command as Command8 } from "commander";
+import { existsSync as existsSync15, unlinkSync as unlinkSync8 } from "fs";
+import { Command as Command7 } from "commander";
 function createConfigCommand() {
-  const cmd = new Command8("config").description("Manage Archal configuration");
+  const cmd = new Command7("config").description("Manage Archal configuration");
   cmd.command("show").description("Print current configuration").option("--json", "Output as JSON").action((opts) => {
     const display = getConfigDisplay();
     if (opts.json) {
@@ -6279,6 +7395,11 @@ function createConfigCommand() {
       model: evaluator["model"] ?? "(not set)",
       apiKey: evaluator["apiKey"] ?? "(not set)"
     });
+    const seedGen = display["seedGeneration"];
+    printConfigSection("Seed Generation", {
+      model: seedGen["model"] ?? "(not set)",
+      geminiApiKey: seedGen["geminiApiKey"] ?? "(not set)"
+    });
     const defaults = display["defaults"];
     printConfigSection("Defaults", {
       runs: String(defaults["runs"]),
@@ -6291,12 +7412,16 @@ function createConfigCommand() {
     });
     process.stdout.write("\n");
     info("Set values with: archal config set <key> <value>");
-    info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout");
+    info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout");
   });
   cmd.command("set").description("Set a configuration value").argument("<key>", "Configuration key (e.g., evaluator.model, defaults.runs)").argument("<value>", "Value to set").action((key, value) => {
     try {
       setConfigValue(key, value);
       success(`Set ${key} = ${key.includes("apiKey") ? "***" : value}`);
+      if (key.includes("apiKey") && !value.startsWith("env:")) {
+        warn("API key stored in plaintext in config file. Consider using env: prefix instead:");
+        info(`  archal config set ${key} env:YOUR_ENV_VAR_NAME`);
+      }
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
       error(message);
@@ -6306,7 +7431,7 @@ function createConfigCommand() {
   cmd.command("init").description("Create default configuration file").option("--force", "Overwrite existing config").action((opts) => {
     const configPath = getConfigPath();
     if (opts.force) {
-      if (existsSync14(configPath)) {
+      if (existsSync15(configPath)) {
         unlinkSync8(configPath);
       }
     }
@@ -6316,7 +7441,7 @@ function createConfigCommand() {
       info("\nNext steps:");
       info("  1. Set your API key:");
       info("     archal config set evaluator.apiKey your-key-here");
-      info("     or set ANTHROPIC_API_KEY environment variable");
+      info("     or set GEMINI_API_KEY environment variable (default provider)");
       info("");
       info("  2. Create a scenario:");
       info("     archal scenario create my-first-test");
@@ -6345,31 +7470,33 @@ function printConfigSection(name, values) {
 }
 // src/commands/demo.ts
-import { Command as Command9 } from "commander";
-import { existsSync as existsSync15 } from "fs";
-import { resolve as resolve10, dirname as dirname4 } from "path";
-import { fileURLToPath as fileURLToPath4 } from "url";
+import { Command as Command8 } from "commander";
+import { existsSync as existsSync16 } from "fs";
+import { resolve as resolve12, dirname as dirname4 } from "path";
+import { fileURLToPath as fileURLToPath5 } from "url";
 import { createRequire as createRequire4 } from "module";
-var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
+var __dirname5 = fileURLToPath5(new URL(".", import.meta.url));
 function resolveDemoDir() {
-  const monorepoDemoDir = resolve10(__dirname4, "..", "demo");
-  if (existsSync15(resolve10(monorepoDemoDir, "scenario.md"))) {
-    return monorepoDemoDir;
+  const demoDir = resolve12(__dirname5, "..", "demo");
+  if (existsSync16(resolve12(demoDir, "scenario.md"))) {
+    return demoDir;
   }
   try {
     const require2 = createRequire4(import.meta.url);
     const cliMain = require2.resolve("@archal/cli");
     const pkgDir = dirname4(dirname4(cliMain));
-    const npmDemoDir = resolve10(pkgDir, "demo");
-    if (existsSync15(resolve10(npmDemoDir, "scenario.md"))) {
+    const npmDemoDir = resolve12(pkgDir, "demo");
+    if (existsSync16(resolve12(npmDemoDir, "scenario.md"))) {
       return npmDemoDir;
     }
   } catch {
   }
-  throw new Error("Demo files not found. Ensure @archal/cli is installed correctly.");
+  throw new Error(
+    "Demo files not found. Ensure @archal/cli is installed correctly.\nIf installed globally, try reinstalling: npm install -g @archal/cli"
+  );
 }
 function createDemoCommand() {
-  const cmd = new Command9("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
+  const cmd = new Command8("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
     if (opts.quiet) {
       configureLogger({ quiet: true });
     }
@@ -6377,9 +7504,9 @@ function createDemoCommand() {
       configureLogger({ verbose: true, level: "debug" });
     }
     const demoDir = resolveDemoDir();
-    const scenarioPath = resolve10(demoDir, "scenario.md");
-    const goodAgentPath = resolve10(demoDir, "good-agent.mjs");
-    const badAgentPath = resolve10(demoDir, "bad-agent.mjs");
+    const scenarioPath = resolve12(demoDir, "scenario.md");
+    const goodAgentPath = resolve12(demoDir, "good-agent.mjs");
+    const badAgentPath = resolve12(demoDir, "bad-agent.mjs");
     process.stderr.write("\n\x1B[36m\x1B[1marchal demo\x1B[0m \x1B[2m\u2014 same scenario, two agents\x1B[0m\n\n");
     process.stderr.write("\x1B[1m\x1B[32m\u25B8 Good agent\x1B[0m \x1B[2m(checks labels, skips keep-open)\x1B[0m\n");
     const goodReport = await runScenario({
@@ -6412,100 +7539,194 @@ function createDemoCommand() {
 }
 // src/commands/login.ts
-import { Command as Command10 } from "commander";
+import { Command as Command9 } from "commander";
 import { exec } from "child_process";
-import { randomBytes } from "crypto";
+import { createHash as createHash3, randomBytes } from "crypto";
 import { createServer } from "http";
-var AUTH_BASE_URL3 = process.env["ARCHAL_AUTH_URL"] ?? "https://archal.ai";
+function normalizeAuthUrl2(value) {
+  const trimmed = value.trim().replace(/\/+$/, "");
+  return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
+}
+var AUTH_BASE_URL3 = normalizeAuthUrl2(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
 var START_PORT = 51423;
 var LOGIN_TIMEOUT_MS = 5 * 60 * 1e3;
+var TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
+function escapeHtml(value) {
+  return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
+}
 function openBrowser(url) {
   const platform = process.platform;
   const command = platform === "darwin" ? `open "${url}"` : platform === "win32" ? `start "" "${url}"` : `xdg-open "${url}"`;
-  exec(command, () => {
+  exec(command, (err) => {
+    if (err) {
+      info("Could not open browser automatically.");
+      info(`Please visit the URL above manually to complete login.`);
+    }
   });
 }
+function createPkcePair() {
+  const codeVerifier = randomBytes(32).toString("base64url");
+  const codeChallenge = createHash3("sha256").update(codeVerifier).digest("base64url");
+  return { codeVerifier, codeChallenge };
+}
+function isPlan2(value) {
+  return value === "free" || value === "pro" || value === "enterprise";
+}
+function credentialsFromApiToken(token) {
+  const nowSeconds = Math.floor(Date.now() / 1e3);
+  return {
+    token,
+    refreshToken: "",
+    email: "(from token)",
+    plan: "free",
+    selectedTwins: [],
+    expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
+  };
+}
+function credentialsFromLegacyCallback(requestUrl) {
+  const token = requestUrl.searchParams.get("token") ?? requestUrl.searchParams.get("access_token");
+  const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
+  const email = requestUrl.searchParams.get("email");
+  const planParam = requestUrl.searchParams.get("plan");
+  const twins = requestUrl.searchParams.get("twins");
+  if (!token || !email || !isPlan2(planParam)) {
+    return null;
+  }
+  const nowSeconds = Math.floor(Date.now() / 1e3);
+  return {
+    token,
+    refreshToken,
+    email,
+    plan: planParam,
+    selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
+    expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
+  };
+}
 function findFreePort(startPort) {
-  return new Promise((resolve11, reject) => {
+  return new Promise((resolve13, reject) => {
     const server = createServer();
     server.listen(startPort, "127.0.0.1", () => {
       const address = server.address();
       const port = typeof address === "object" && address ? address.port : startPort;
-      server.close(() => resolve11(port));
+      server.close(() => resolve13(port));
     });
     server.on("error", () => {
       if (startPort < START_PORT + 100) {
-        findFreePort(startPort + 1).then(resolve11).catch(reject);
+        findFreePort(startPort + 1).then(resolve13).catch(reject);
       } else {
-        reject(new Error("Could not find a free localhost callback port"));
+        reject(new Error(
+          "Could not find a free localhost callback port (tried ports 51423-51523).\nTry closing other services, or use token login: archal login --token <your-token>"
+        ));
       }
     });
   });
 }
 function createLoginCommand() {
-  return new Command10("login").description("Log in via archal.ai browser auth").action(async () => {
+  return new Command9("login").description("Log in via archal.ai browser auth").option("--no-browser", "Do not automatically open the login URL in a browser").option("--token <token>", "Use an API key/token directly (CI/service fallback)").action(async (opts) => {
+    const directToken = opts.token?.trim();
+    if (directToken) {
+      let credentials = credentialsFromApiToken(directToken);
+      credentials = await refreshAuthFromServer(credentials);
+      saveCredentials(credentials);
+      success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
+      return;
+    }
     const port = await findFreePort(START_PORT);
     const state = randomBytes(16).toString("hex");
     const redirectUrl = `http://localhost:${port}/callback`;
-    const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}`;
+    const { codeVerifier, codeChallenge } = createPkcePair();
+    const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}&code_challenge=${encodeURIComponent(codeChallenge)}&code_challenge_method=S256`;
     info("Opening browser for authentication...");
     info(`If your browser does not open, visit:
   ${authUrl}`);
-    openBrowser(authUrl);
-    await new Promise((resolve11, reject) => {
-      const server = createServer((req, res) => {
-        const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
-        if (requestUrl.pathname !== "/callback") {
-          res.writeHead(404);
-          res.end("Not found");
-          return;
-        }
-        const returnedState = requestUrl.searchParams.get("state");
-        if (returnedState !== state) {
-          res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
-          res.end("<h1>Login failed</h1><p>State mismatch.</p>");
-          server.close();
-          reject(new Error("State mismatch in callback"));
+    if (opts.browser !== false) {
+      openBrowser(authUrl);
+    }
+    await new Promise((resolve13, reject) => {
+      let settled = false;
+      const settleResolve = () => {
+        if (settled) return;
+        settled = true;
+        resolve13();
+      };
+      const settleReject = (error2) => {
+        if (settled) return;
+        settled = true;
+        reject(error2);
+      };
+      function closeAndResolve() {
+        if (!server.listening) {
+          settleResolve();
           return;
         }
-        const token = requestUrl.searchParams.get("token");
-        const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
-        const email = requestUrl.searchParams.get("email");
-        const plan = requestUrl.searchParams.get("plan");
-        const twins = requestUrl.searchParams.get("twins");
-        if (!token || !email || !plan) {
-          res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
-          res.end("<h1>Login failed</h1><p>Missing callback parameters.</p>");
-          server.close();
-          reject(new Error("Missing token/email/plan in callback"));
+        server.close(() => settleResolve());
+      }
+      function closeAndReject(error2) {
+        if (!server.listening) {
+          settleReject(error2);
           return;
         }
-        const expiresAt = getJwtExpiry(token) ?? Math.floor(Date.now() / 1e3) + 30 * 24 * 60 * 60;
-        const credentials = {
-          token,
-          refreshToken,
-          email,
-          plan,
-          selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
-          expiresAt
-        };
-        saveCredentials(credentials);
-        res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
-        res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
-        success(`Logged in as ${email} (${plan})`);
-        if (plan === "free" && credentials.selectedTwins.length === 0) {
-          info(
-            "You haven't selected any twins yet.\n  Run `archal twins select` to choose up to 5 twins for your free plan."
-          );
-        }
-        server.close(() => resolve11());
+        server.close(() => settleReject(error2));
+      }
+      const server = createServer((req, res) => {
+        void (async () => {
+          try {
+            const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
+            if (requestUrl.pathname !== "/callback") {
+              res.writeHead(404);
+              res.end("Not found");
+              return;
+            }
+            const returnedState = requestUrl.searchParams.get("state");
+            if (returnedState !== state) {
+              res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
+              res.end("<h1>Login failed</h1><p>State mismatch.</p>");
+              closeAndReject(new Error("State mismatch in callback"));
+              return;
+            }
+            const code = requestUrl.searchParams.get("code");
+            const credentials = code ? await exchangeCliAuthCode({
+              code,
+              codeVerifier,
+              redirectUri: redirectUrl
+            }) : credentialsFromLegacyCallback(requestUrl);
+            if (!credentials) {
+              res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
+              res.end("<h1>Login failed</h1><p>Missing auth code.</p>");
+              closeAndReject(new Error("Missing code in callback"));
+              return;
+            }
+            saveCredentials(credentials);
+            res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
+            res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
+            success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
+            if (credentials.plan === "free" && credentials.selectedTwins.length === 0) {
+              info(
+                "You haven't selected any twins yet.\n  Run `archal twins select` to choose up to 5 twins for your free plan."
+              );
+            }
+            closeAndResolve();
+          } catch (error2) {
+            const message = error2 instanceof Error ? error2.message : String(error2);
+            if (!res.headersSent) {
+              res.writeHead(500, { "content-type": "text/html; charset=utf-8" });
+              res.end(`<h1>Login failed</h1><p>${escapeHtml(message)}</p>`);
+            }
+            closeAndReject(error2);
+          }
+        })().catch((error2) => {
+          closeAndReject(error2);
+        });
       });
-      server.listen(port, "127.0.0.1");
       const timeout = setTimeout(() => {
-        server.close();
-        reject(new Error("Login timed out. Run archal login again."));
+        closeAndReject(new Error("Login timed out. Run archal login again."));
       }, LOGIN_TIMEOUT_MS);
       server.on("close", () => clearTimeout(timeout));
+      server.once("error", (error2) => {
+        clearTimeout(timeout);
+        closeAndReject(error2);
+      });
+      server.listen(port, "127.0.0.1");
     }).catch((error2) => {
       const message = error2 instanceof Error ? error2.message : String(error2);
       error(message);
@@ -6515,9 +7736,9 @@ function createLoginCommand() {
 }
 // src/commands/logout.ts
-import { Command as Command11 } from "commander";
+import { Command as Command10 } from "commander";
 function createLogoutCommand() {
-  return new Command11("logout").description("Log out and remove stored credentials").action(() => {
+  return new Command10("logout").description("Log out and remove stored credentials").action(() => {
     const creds = getCredentials();
     if (!creds) {
       info("Not currently logged in.");
@@ -6535,7 +7756,7 @@ function createLogoutCommand() {
 }
 // src/commands/whoami.ts
-import { Command as Command12 } from "commander";
+import { Command as Command11 } from "commander";
 var RESET2 = "\x1B[0m";
 var BOLD2 = "\x1B[1m";
 var DIM2 = "\x1B[2m";
@@ -6543,11 +7764,12 @@ var CYAN2 = "\x1B[36m";
 var GREEN2 = "\x1B[32m";
 var YELLOW2 = "\x1B[33m";
 function createWhoamiCommand() {
-  return new Command12("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
-    let current = requireAuth({
-      action: "show account status",
-      nextCommand: "archal whoami"
-    });
+  return new Command11("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
+    let current = getCredentials();
+    if (!current) {
+      info("Not logged in. Run: archal login");
+      return;
+    }
     if (opts.refresh) {
       current = await refreshAuthFromServer(current);
       saveCredentials(current);
@@ -6611,7 +7833,7 @@ function planBadge(plan) {
 }
 // src/commands/upgrade.ts
-import { Command as Command13 } from "commander";
+import { Command as Command12 } from "commander";
 import { exec as exec2 } from "child_process";
 var BILLING_URL = "https://archal.ai/dashboard/billing";
 function openBrowser2(url) {
@@ -6621,7 +7843,7 @@ function openBrowser2(url) {
   });
 }
 function createUpgradeCommand() {
-  return new Command13("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
+  return new Command12("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
     const creds = getCredentials();
     if (creds?.plan === "enterprise") {
       info("You are already on the enterprise plan.");
@@ -6640,7 +7862,7 @@ function createUpgradeCommand() {
 }
 // src/commands/help.ts
-import { Command as Command14 } from "commander";
+import { Command as Command13 } from "commander";
 var RESET3 = "\x1B[0m";
 var BOLD3 = "\x1B[1m";
 var DIM3 = "\x1B[2m";
@@ -6668,15 +7890,7 @@ var COMMAND_GROUPS = [
     ]
   },
   {
-    heading: "Twin Processes",
-    commands: [
-      { name: "twin start <name>", description: "Start a local twin process (debug/local only)" },
-      { name: "twin stop <name>", description: "Stop a running local twin process" },
-      { name: "twin status", description: "Show running local twin processes" }
-    ]
-  },
-  {
-    heading: "Twin Catalog",
+    heading: "Twins",
     commands: [
       { name: "twins list", description: "List available twins and entitlement status" },
       { name: "twins select", description: "Choose which twins to use on your free plan" }
@@ -6700,7 +7914,7 @@ var COMMAND_GROUPS = [
 ];
 function showHelp() {
   process.stderr.write(`
-${CYAN3}${BOLD3}Archal CLI${RESET3}  ${DIM3}v0.1.0${RESET3}
+${CYAN3}${BOLD3}Archal CLI${RESET3}  ${DIM3}v${CLI_VERSION}${RESET3}
 `);
   process.stderr.write(`${DIM3}The QA layer for the software factory era${RESET3}
@@ -6722,21 +7936,21 @@ ${CYAN3}${BOLD3}Archal CLI${RESET3}  ${DIM3}v0.1.0${RESET3}
 `);
 }
 function createHelpCommand() {
-  return new Command14("help").description("Show all available commands").action(() => {
+  return new Command13("help").description("Show all available commands").action(() => {
     showHelp();
   });
 }
 // src/commands/setup.ts
-import { Command as Command15 } from "commander";
-import { existsSync as existsSync16 } from "fs";
+import { Command as Command14 } from "commander";
+import { existsSync as existsSync17 } from "fs";
 var RESET4 = "\x1B[0m";
 var BOLD4 = "\x1B[1m";
 var DIM4 = "\x1B[2m";
 var CYAN4 = "\x1B[36m";
 var GREEN3 = "\x1B[32m";
 function createSetupCommand() {
-  return new Command15("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
+  return new Command14("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
     process.stderr.write(`
 ${CYAN4}${BOLD4}Archal Setup${RESET4}
 `);
@@ -6758,7 +7972,7 @@ ${CYAN4}${BOLD4}Archal Setup${RESET4}
 ${BOLD4}Step 2: Configuration${RESET4}
 `);
     const configPath = getConfigPath();
-    if (existsSync16(configPath)) {
+    if (existsSync17(configPath)) {
       success(`Config file exists: ${configPath}`);
     } else {
       const create = await askConfirm("Create a default config file?");
@@ -6823,7 +8037,7 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
 `);
     process.stderr.write(`  ${CYAN4}archal scenario create my-first-test${RESET4}  ${DIM4}Create a scenario${RESET4}
 `);
-    process.stderr.write(`  ${CYAN4}archal run scenario.md --openclaw-url "..."${RESET4}  ${DIM4}Run a scenario${RESET4}
+    process.stderr.write(`  ${CYAN4}archal run scenario.md --engine-endpoint "..." --engine-model "..."${RESET4}  ${DIM4}Run a scenario${RESET4}
 `);
     process.stderr.write(`  ${CYAN4}archal help${RESET4}                          ${DIM4}See all commands${RESET4}
@@ -6832,8 +8046,8 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
 }
 // src/index.ts
-var program = new Command16();
-program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version("0.1.0").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
+var program = new Command15();
+program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version(CLI_VERSION).option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
   const opts = program.opts();
   if (opts.quiet) {
     configureLogger({ quiet: true });
@@ -6848,7 +8062,6 @@ program.addCommand(createWhoamiCommand());
 program.addCommand(createSetupCommand());
 program.addCommand(createRunCommand());
 program.addCommand(createInitCommand());
-program.addCommand(createTwinCommand());
 program.addCommand(createTwinsCommand());
 program.addCommand(createScenarioCommand());
 program.addCommand(createTraceCommand());
@@ -6864,6 +8077,14 @@ program.action(() => {
     process.stderr.write("\x1B[33mNot logged in.\x1B[0m Get started with: \x1B[36marchal login\x1B[0m\n\n");
   }
 });
+function handleShutdown(signal) {
+  process.stderr.write(`
+Received ${signal}, shutting down...
+`);
+  process.exit(128 + (signal === "SIGINT" ? 2 : 15));
+}
+process.on("SIGINT", () => handleShutdown("SIGINT"));
+process.on("SIGTERM", () => handleShutdown("SIGTERM"));
 program.parseAsync(process.argv).catch((err) => {
   const message = err instanceof Error ? err.message : String(err);
   process.stderr.write(`Error: ${message}