npm - @archal/cli - Versions diffs - 0.7.5 → 0.7.7 - Mend

@archal/cli 0.7.5 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/index.js CHANGED Viewed

@@ -5,13 +5,13 @@ import { Command as Command17 } from "commander";
 // src/commands/run.ts
 import { Command as Command2, Option } from "commander";
-import { existsSync as existsSync13, mkdirSync as mkdirSync6, readFileSync as readFileSync14, unlinkSync as unlinkSync7, writeFileSync as writeFileSync10 } from "fs";
-import { dirname as dirname4, resolve as resolve7 } from "path";
+import { existsSync as existsSync12, mkdirSync as mkdirSync6, readFileSync as readFileSync13, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
+import { dirname as dirname3, resolve as resolve6 } from "path";
 // src/runner/orchestrator.ts
-import { existsSync as existsSync11, readFileSync as readFileSync13, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
-import { resolve as resolve5, dirname as dirname3, join as join8, basename as basename2 } from "path";
-import { createRequire as createRequire2 } from "module";
+import { existsSync as existsSync10, readFileSync as readFileSync12, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync7 } from "fs";
+import { resolve as resolve4, dirname as dirname2, join as join8, basename as basename2 } from "path";
+import { createRequire } from "module";
 import { tmpdir as tmpdir3 } from "os";
 // src/runner/scenario-parser.ts
@@ -156,7 +156,7 @@ function table(headers, rows) {
     const extra = Math.max(0, available - minTotal);
     const naturalExtra = naturalWidths.map((w, i) => w - minWidths[i]);
     const naturalExtraTotal = naturalExtra.reduce((sum, w) => sum + Math.max(0, w), 0);
-    colWidths = naturalWidths.map((w, i) => {
+    colWidths = naturalWidths.map((_w, i) => {
       if (naturalExtraTotal === 0) return minWidths[i];
       const share = Math.max(0, naturalExtra[i]) / naturalExtraTotal;
       return minWidths[i] + Math.floor(share * extra);
@@ -874,160 +874,6 @@ function overrideSeedSelection(selections, overrides) {
 import { readFileSync as readFileSync2, existsSync, unlinkSync } from "fs";
 import { join } from "path";
 import { tmpdir } from "os";
-import { randomUUID } from "crypto";
-// ../twins/core/dist/index.js
-import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
-import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
-import { z } from "zod";
-var MAX_BODY_BYTES = 50 * 1024 * 1024;
-var MAX_BODY_BYTES2 = 50 * 1024 * 1024;
-function normalizeSpanId(entry) {
-  return entry.spanId ?? entry.id;
-}
-function normalizeTraceId(entry) {
-  if (typeof entry.traceId === "string" && entry.traceId.trim().length > 0) {
-    return entry.traceId;
-  }
-  return void 0;
-}
-function toSortableTimestamp(entry) {
-  const candidates = [entry.startedAt, entry.startTimestamp, entry.timestamp, entry.endedAt, entry.endTimestamp];
-  for (const candidate of candidates) {
-    if (typeof candidate !== "string") {
-      continue;
-    }
-    const value = Date.parse(candidate);
-    if (Number.isFinite(value)) {
-      return value;
-    }
-  }
-  return Number.POSITIVE_INFINITY;
-}
-function stableSortEntries(entries) {
-  return [...entries].sort((left, right) => {
-    const leftSeq = typeof left.sequenceIndex === "number" ? left.sequenceIndex : Number.POSITIVE_INFINITY;
-    const rightSeq = typeof right.sequenceIndex === "number" ? right.sequenceIndex : Number.POSITIVE_INFINITY;
-    if (leftSeq !== rightSeq) {
-      return leftSeq - rightSeq;
-    }
-    const leftTs = toSortableTimestamp(left);
-    const rightTs = toSortableTimestamp(right);
-    if (leftTs !== rightTs) {
-      return leftTs - rightTs;
-    }
-    return normalizeSpanId(left).localeCompare(normalizeSpanId(right));
-  });
-}
-function validateTraceGraph(entries) {
-  const issues = [];
-  const byTrace = /* @__PURE__ */ new Map();
-  for (const entry of entries) {
-    const traceId = normalizeTraceId(entry);
-    if (!traceId) {
-      issues.push({
-        code: "missing_trace_id",
-        traceId: "",
-        spanId: normalizeSpanId(entry),
-        message: `Entry ${entry.id} is missing traceId`
-      });
-      continue;
-    }
-    const existing = byTrace.get(traceId);
-    if (existing) {
-      existing.push(entry);
-    } else {
-      byTrace.set(traceId, [entry]);
-    }
-  }
-  const traces = [];
-  for (const [traceId, traceEntries] of byTrace.entries()) {
-    const ordered = stableSortEntries(traceEntries);
-    const spanById = /* @__PURE__ */ new Map();
-    const parentBySpan = /* @__PURE__ */ new Map();
-    for (const entry of ordered) {
-      const spanId = normalizeSpanId(entry);
-      if (spanById.has(spanId)) {
-        issues.push({
-          code: "duplicate_span_id",
-          traceId,
-          spanId,
-          message: `Trace ${traceId} has duplicate spanId ${spanId}`
-        });
-      } else {
-        spanById.set(spanId, entry);
-      }
-      parentBySpan.set(spanId, entry.parentSpanId ?? null);
-    }
-    const rootSpanIds = ordered.filter((entry) => !entry.parentSpanId).map((entry) => normalizeSpanId(entry));
-    if (rootSpanIds.length !== 1) {
-      issues.push({
-        code: "invalid_root_count",
-        traceId,
-        message: `Trace ${traceId} has ${rootSpanIds.length} roots (expected 1)`
-      });
-    }
-    for (const entry of ordered) {
-      const spanId = normalizeSpanId(entry);
-      const parent = entry.parentSpanId ?? null;
-      if (parent && !spanById.has(parent)) {
-        issues.push({
-          code: "orphan_span",
-          traceId,
-          spanId,
-          message: `Span ${spanId} references missing parent ${parent}`
-        });
-      }
-      for (const link of entry.links ?? []) {
-        if (link.traceId === traceId && !spanById.has(link.spanId)) {
-          issues.push({
-            code: "broken_link",
-            traceId,
-            spanId,
-            message: `Span ${spanId} has link to missing span ${link.spanId}`
-          });
-        }
-      }
-    }
-    for (const spanId of spanById.keys()) {
-      const seen = /* @__PURE__ */ new Set();
-      let cursor = spanId;
-      while (cursor) {
-        if (seen.has(cursor)) {
-          issues.push({
-            code: "cycle_detected",
-            traceId,
-            spanId,
-            message: `Span ${spanId} is in a parent cycle`
-          });
-          break;
-        }
-        seen.add(cursor);
-        cursor = parentBySpan.get(cursor) ?? null;
-      }
-    }
-    traces.push({
-      traceId,
-      rootSpanId: rootSpanIds[0] ?? null,
-      spanCount: ordered.length,
-      orderedSpanIds: ordered.map((entry) => normalizeSpanId(entry))
-    });
-  }
-  return { valid: issues.length === 0, issues, traces };
-}
-var successCriterionSchema = z.object({
-  id: z.string(),
-  description: z.string(),
-  type: z.enum(["deterministic", "probabilistic"])
-});
-var scenarioConfigSchema = z.object({
-  twins: z.array(z.string()).default([]),
-  timeout: z.number().default(120),
-  runs: z.number().default(5),
-  evaluatorModel: z.string().optional(),
-  difficulty: z.enum(["easy", "medium", "hard"]).optional(),
-  tags: z.array(z.string()).default([])
-});
 // src/utils/process.ts
 import { spawn } from "child_process";
@@ -1087,7 +933,7 @@ function spawnWithTimeout(options) {
     onStdout,
     onStderr
   } = options;
-  return new Promise((resolve13, reject) => {
+  return new Promise((resolve12, reject) => {
     const startTime = Date.now();
     let timedOut = false;
     let stdoutBuf = "";
@@ -1143,7 +989,7 @@ function spawnWithTimeout(options) {
       clearTimeout(timer);
       const durationMs = Date.now() - startTime;
       debug("Process exited", { command, exitCode, durationMs, timedOut });
-      resolve13({
+      resolve12({
         exitCode,
         stdout: stdoutBuf,
         stderr: stderrBuf,
@@ -1254,24 +1100,55 @@ ${stderrPreview}`);
     agentTrace
   };
 }
-var HTTP_COLLECT_TIMEOUT_MS = 1e4;
-var HTTP_COLLECT_MAX_RETRIES = 2;
-var HTTP_COLLECT_BACKOFF_MS = [1e3, 3e3];
-async function fetchWithRetry(url, options, retries = HTTP_COLLECT_MAX_RETRIES) {
+var HTTP_COLLECT_TIMEOUT_MS = 3e4;
+var HTTP_COLLECT_MAX_RETRIES = 5;
+var HTTP_COLLECT_BACKOFF_MS = [2e3, 3e3, 5e3, 5e3, 5e3];
+var HTTP_RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 425, 429, 500, 502, 503, 504]);
+var HTTP_PUSH_TIMEOUT_MS = 2e4;
+var HTTP_PUSH_MAX_RETRIES = 6;
+var HTTP_PUSH_BACKOFF_MS = [1e3, 2e3, 3e3, 5e3, 5e3, 5e3];
+function resolveRetryDelay(backoffMs, attempt, fallbackMs) {
+  const indexed = backoffMs[attempt];
+  if (typeof indexed === "number" && Number.isFinite(indexed) && indexed >= 0) {
+    return indexed;
+  }
+  const last = backoffMs.length > 0 ? backoffMs[backoffMs.length - 1] : void 0;
+  if (typeof last === "number" && Number.isFinite(last) && last >= 0) {
+    return last;
+  }
+  return fallbackMs;
+}
+async function fetchWithRetry(url, options, retryOptions) {
+  const retries = retryOptions?.retries ?? HTTP_COLLECT_MAX_RETRIES;
+  const timeoutMs = retryOptions?.timeoutMs ?? HTTP_COLLECT_TIMEOUT_MS;
+  const backoffMs = retryOptions?.backoffMs ?? HTTP_COLLECT_BACKOFF_MS;
   let lastError;
   for (let attempt = 0; attempt <= retries; attempt++) {
     try {
       const response = await fetch(url, {
         ...options,
-        signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
+        signal: AbortSignal.timeout(timeoutMs)
       });
+      if (!response.ok && HTTP_RETRYABLE_STATUS_CODES.has(response.status) && attempt < retries) {
+        const delay = resolveRetryDelay(backoffMs, attempt, 3e3);
+        let bodyPreview = "";
+        try {
+          bodyPreview = (await response.clone().text()).slice(0, 180);
+        } catch {
+        }
+        debug(
+          `HTTP fetch got ${response.status} (attempt ${attempt + 1}/${retries + 1}), retrying in ${delay}ms${bodyPreview ? `: ${bodyPreview}` : ""}`
+        );
+        await new Promise((resolve12) => setTimeout(resolve12, delay));
+        continue;
+      }
       return response;
     } catch (err) {
       lastError = err;
       if (attempt < retries) {
-        const delay = HTTP_COLLECT_BACKOFF_MS[attempt] ?? 3e3;
+        const delay = resolveRetryDelay(backoffMs, attempt, 3e3);
         debug(`HTTP fetch failed (attempt ${attempt + 1}/${retries + 1}), retrying in ${delay}ms: ${err instanceof Error ? err.message : String(err)}`);
-        await new Promise((resolve13) => setTimeout(resolve13, delay));
+        await new Promise((resolve12) => setTimeout(resolve12, delay));
       }
     }
   }
@@ -1309,7 +1186,6 @@ Cannot proceed \u2014 evaluator would receive empty state and produce unreliable
   }
   return state;
 }
-var HTTP_PUSH_TIMEOUT_MS = 2e4;
 async function pushStateToCloud(twinUrls, seedSelections, bearerToken, adminAuth) {
   const headers = adminAuth ? {
     "x-archal-admin-token": adminAuth.token,
@@ -1325,12 +1201,19 @@ async function pushStateToCloud(twinUrls, seedSelections, bearerToken, adminAuth
     }
     const url = `${twinBasePath(baseUrl)}/state`;
     debug(`Pushing dynamic seed to ${sel.twinName}`, { url });
-    const response = await fetch(url, {
-      method: "PUT",
-      headers,
-      body: JSON.stringify(sel.seedData),
-      signal: AbortSignal.timeout(HTTP_PUSH_TIMEOUT_MS)
-    });
+    const response = await fetchWithRetry(
+      url,
+      {
+        method: "PUT",
+        headers,
+        body: JSON.stringify(sel.seedData)
+      },
+      {
+        retries: HTTP_PUSH_MAX_RETRIES,
+        timeoutMs: HTTP_PUSH_TIMEOUT_MS,
+        backoffMs: HTTP_PUSH_BACKOFF_MS
+      }
+    );
     if (!response.ok) {
       const text = await response.text().catch(() => "");
       throw new Error(
@@ -1385,7 +1268,10 @@ Evaluator would receive incomplete trace data and produce unreliable results.`
     return leftValue - rightValue;
   });
   for (let i = 0; i < allTraces.length; i++) {
-    allTraces[i].sequenceIndex = i;
+    const entry = allTraces[i];
+    if (entry) {
+      entry.sequenceIndex = i;
+    }
   }
   return allTraces;
 }
@@ -1454,24 +1340,44 @@ function resolveAgentConfig(agentCommand, projectConfigPath) {
 }
 // src/runner/openclaw-adapter.ts
-import { existsSync as existsSync2, readFileSync as readFileSync3, mkdirSync, writeFileSync as writeFileSync2, rmSync } from "fs";
+import { existsSync as existsSync2, readFileSync as readFileSync3, mkdirSync, writeFileSync, rmSync } from "fs";
 import { join as join2, resolve } from "path";
 import { tmpdir as tmpdir2 } from "os";
+function buildEnvironmentPreamble(twinNames) {
+  if (twinNames.length === 0) return "";
+  const serviceMap = {
+    slack: "Slack (channels, messages, user profiles)",
+    stripe: "Stripe (payments, balances, customers, payment links)",
+    jira: "Jira (issues, comments, approvals, project boards)",
+    github: "GitHub (repositories, issues, pull requests, code)",
+    linear: "Linear (issues, projects, cycles)",
+    supabase: "Supabase (database tables, SQL queries, row-level access)",
+    "google-workspace": "Google Workspace (calendar events, drive files, sharing permissions)"
+  };
+  const serviceList = twinNames.map((name) => serviceMap[name] ?? name).join(", ");
+  return `You have full access to the following internal systems: ${serviceList}.`;
+}
 function generateTaskFromScenario(scenario, apiRouting) {
-  const baseTask = scenario.prompt ? scenario.prompt : scenario.task ? scenario.task : (() => {
+  const baseTask = scenario.prompt ? scenario.setup ? `${scenario.setup}
+${scenario.prompt}` : scenario.prompt : scenario.task ? scenario.task : (() => {
     const lines2 = [];
     lines2.push(scenario.title);
     lines2.push("");
     lines2.push(scenario.setup);
     return lines2.join("\n");
   })();
+  const preamble = buildEnvironmentPreamble(scenario.config.twins);
+  const taskWithPreamble = preamble ? `${preamble}
+${baseTask}` : baseTask;
   const baseUrls = apiRouting?.baseUrls ?? {};
   const hasBaseUrls = Object.keys(baseUrls).length > 0;
   const hasProxy = Boolean(apiRouting?.proxyUrl);
   if (!hasBaseUrls && !hasProxy) {
-    return baseTask;
+    return taskWithPreamble;
   }
-  const lines = [baseTask, "", "---", "", "## API Routing Context", ""];
+  const lines = [taskWithPreamble, "", "---", "", "## API Routing Context", ""];
   lines.push("When writing or executing raw API code, route traffic to these clone endpoints.");
   lines.push("Prefer explicit base URLs; use proxy settings only when needed.");
   lines.push("");
@@ -1482,19 +1388,14 @@ function generateTaskFromScenario(scenario, apiRouting) {
     }
     lines.push("");
   }
-  if (apiRouting?.adminToken) {
+  if (apiRouting?.adminToken || apiRouting?.bearerToken) {
     lines.push("Authentication:");
-    lines.push("Include these headers with every request to the base URLs above:");
-    lines.push(`  x-archal-admin-token: ${apiRouting.adminToken}`);
-    if (apiRouting.adminUserId) {
-      lines.push(`  x-archal-user-id: ${apiRouting.adminUserId}`);
+    lines.push("Use runtime-provided auth headers for clone endpoints.");
+    lines.push("Do not print or persist credentials in output artifacts.");
+    if (apiRouting?.adminUserId) {
+      lines.push(`Auth context user: ${apiRouting.adminUserId}`);
     }
     lines.push("");
-  } else if (apiRouting?.bearerToken) {
-    lines.push("Authentication:");
-    lines.push("Include this header with every request to the base URLs above:");
-    lines.push(`  Authorization: Bearer ${apiRouting.bearerToken}`);
-    lines.push("");
   }
   if (hasProxy && apiRouting?.proxyUrl) {
     lines.push(`Proxy URL: ${apiRouting.proxyUrl}`);
@@ -1744,39 +1645,39 @@ ${rawBody}${hint}`.trim(),
 import { existsSync as existsSync4, readFileSync as readFileSync5, readdirSync } from "fs";
 import { dirname, resolve as resolve2 } from "path";
 import { fileURLToPath } from "url";
-import { z as z3 } from "zod";
+import { z as z2 } from "zod";
 // src/config/config.ts
-import { readFileSync as readFileSync4, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync3 } from "fs";
+import { readFileSync as readFileSync4, writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, existsSync as existsSync3 } from "fs";
 import { join as join3 } from "path";
 import { homedir } from "os";
-import { z as z2 } from "zod";
+import { z } from "zod";
 var ARCHAL_DIR_NAME = ".archal";
 var CONFIG_FILE_NAME = "config.json";
-var llmProviderModeSchema = z2.enum(["archal", "direct", "auto"]).default("auto");
-var evaluatorConfigSchema = z2.object({
-  model: z2.string().default("claude-sonnet-4-6"),
-  apiKey: z2.string().default("env:ANTHROPIC_API_KEY"),
-  baseUrl: z2.string().optional(),
+var llmProviderModeSchema = z.enum(["archal", "direct", "auto"]).default("auto");
+var evaluatorConfigSchema = z.object({
+  model: z.string().default("claude-sonnet-4-6"),
+  apiKey: z.string().default("env:ANTHROPIC_API_KEY"),
+  baseUrl: z.string().optional(),
   provider: llmProviderModeSchema
 });
-var seedGenerationConfigSchema = z2.object({
-  model: z2.string().default("claude-sonnet-4-6"),
+var seedGenerationConfigSchema = z.object({
+  model: z.string().default("claude-sonnet-4-6"),
   provider: llmProviderModeSchema,
   // Legacy: geminiApiKey is accepted for backward compat but ignored — evaluator.apiKey is used for both.
-  geminiApiKey: z2.string().optional()
+  geminiApiKey: z.string().optional()
 });
-var defaultsConfigSchema = z2.object({
-  runs: z2.number().int().positive().default(5),
-  timeout: z2.number().int().positive().default(120)
+var defaultsConfigSchema = z.object({
+  runs: z.number().int().positive().default(5),
+  timeout: z.number().int().positive().default(180)
 });
-var engineConfigSchema = z2.object({
-  apiKey: z2.string().default(""),
-  defaultHarness: z2.string().optional()
+var engineConfigSchema = z.object({
+  apiKey: z.string().default(""),
+  defaultHarness: z.string().optional()
 });
-var configFileSchema = z2.object({
-  telemetry: z2.boolean().default(true),
-  traceFidelity: z2.enum(["standard", "full"]).default("full"),
+var configFileSchema = z.object({
+  telemetry: z.boolean().default(true),
+  traceFidelity: z.enum(["standard", "full"]).default("full"),
   evaluator: evaluatorConfigSchema.default({}),
   seedGeneration: seedGenerationConfigSchema.default({}),
   defaults: defaultsConfigSchema.default({}),
@@ -1901,7 +1802,7 @@ function saveConfig(config) {
       ...config.engine
     }
   };
-  writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
+  writeFileSync2(configPath, JSON.stringify(merged, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
   debug("Saved config file", { path: configPath });
 }
 function initConfig() {
@@ -1912,7 +1813,7 @@ function initConfig() {
   }
   const defaultConfig = configFileSchema.parse({});
   ensureArchalDir();
-  writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
+  writeFileSync2(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
   return configPath;
 }
 function setConfigValue(key, value) {
@@ -2008,15 +1909,15 @@ function getConfigDisplay() {
 }
 // src/runner/harness.ts
-var harnessLocalSchema = z3.object({
-  command: z3.string().min(1, "local.command must be a non-empty string"),
-  args: z3.array(z3.string()).default([]),
-  env: z3.record(z3.string()).optional()
+var harnessLocalSchema = z2.object({
+  command: z2.string().min(1, "local.command must be a non-empty string"),
+  args: z2.array(z2.string()).default([]),
+  env: z2.record(z2.string()).optional()
 });
-var harnessManifestSchema = z3.object({
-  version: z3.literal(1),
-  defaultModel: z3.string().optional(),
-  promptFiles: z3.array(z3.string()).default([]),
+var harnessManifestSchema = z2.object({
+  version: z2.literal(1),
+  defaultModel: z2.string().optional(),
+  promptFiles: z2.array(z2.string()).default([]),
   local: harnessLocalSchema.optional()
 });
 var MANIFEST_FILE = "archal-harness.json";
@@ -2214,12 +2115,6 @@ function resolveMarkdownPromptOrder(markdownFiles) {
   return [...ordered, ...remaining];
 }
-// src/runner/reporter.ts
-import { readFileSync as readFileSync8, existsSync as existsSync6 } from "fs";
-import { createRequire } from "module";
-import { dirname as dirname2, resolve as resolve4 } from "path";
-import { fileURLToPath as fileURLToPath3 } from "url";
 // src/utils/version.ts
 import { readFileSync as readFileSync6 } from "fs";
 import { resolve as resolve3 } from "path";
@@ -2239,7 +2134,7 @@ var CLI_USER_AGENT = `archal-cli/${CLI_VERSION}`;
 // src/auth.ts
 import { spawnSync } from "child_process";
-import { existsSync as existsSync5, readFileSync as readFileSync7, renameSync, unlinkSync as unlinkSync2, writeFileSync as writeFileSync4 } from "fs";
+import { existsSync as existsSync5, readFileSync as readFileSync7, renameSync, unlinkSync as unlinkSync2, writeFileSync as writeFileSync3 } from "fs";
 import { join as join4 } from "path";
 import { createCipheriv, createDecipheriv, createHash, randomBytes } from "crypto";
 var CREDENTIALS_FILE = "credentials.json";
@@ -2291,6 +2186,30 @@ function getConfiguredApiBaseUrl() {
   return explicit ?? getConfiguredAuthBaseUrl();
 }
 var REQUEST_TIMEOUT_MS = 8e3;
+var AUTH_MAX_RETRIES = 2;
+var AUTH_RETRY_BACKOFF_MS = [500, 1500];
+var AUTH_RETRYABLE_CODES = /* @__PURE__ */ new Set([502, 503, 504, 429]);
+async function fetchAuthWithRetry(url, options) {
+  let lastError;
+  for (let attempt = 0; attempt <= AUTH_MAX_RETRIES; attempt++) {
+    try {
+      const response = await fetch(url, {
+        ...options,
+        signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+      });
+      if (response.ok || !AUTH_RETRYABLE_CODES.has(response.status) || attempt >= AUTH_MAX_RETRIES) {
+        return response;
+      }
+      lastError = new Error(`HTTP ${response.status}`);
+    } catch (err) {
+      lastError = err;
+      if (attempt >= AUTH_MAX_RETRIES) break;
+    }
+    const delay = AUTH_RETRY_BACKOFF_MS[attempt] ?? 1500;
+    await new Promise((resolve12) => setTimeout(resolve12, delay));
+  }
+  throw lastError;
+}
 var ENV_TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
 function getCredentialsPath() {
   return join4(ensureArchalDir(), CREDENTIALS_FILE);
@@ -2380,6 +2299,22 @@ function resolveStoredToken(parsed) {
   }
   return { token: null, source: "legacy" };
 }
+function resolveStoredRefreshToken(parsed) {
+  if (typeof parsed.refreshTokenEncrypted === "string") {
+    const refreshToken = decryptToken(parsed.refreshTokenEncrypted)?.trim() ?? null;
+    if (refreshToken !== null) {
+      return { refreshToken, source: "encrypted" };
+    }
+    if (typeof parsed.refreshToken === "string") {
+      return { refreshToken: parsed.refreshToken.trim(), source: "legacy" };
+    }
+    return { refreshToken: null, source: "encrypted" };
+  }
+  if (typeof parsed.refreshToken === "string") {
+    return { refreshToken: parsed.refreshToken.trim(), source: "legacy" };
+  }
+  return { refreshToken: "", source: "none" };
+}
 function getOrCreateCredentialsKey() {
   const envKey = readCredentialsKeyFromEnv();
   if (envKey) {
@@ -2404,7 +2339,7 @@ function getOrCreateCredentialsKey() {
   const generated = randomBytes(32);
   const wroteToKeychain = writeCredentialsKeyToMacKeychain(generated);
   if (!wroteToKeychain) {
-    writeFileSync4(keyPath, generated.toString("hex") + "\n", { encoding: "utf-8", mode: 384 });
+    writeFileSync3(keyPath, generated.toString("hex") + "\n", { encoding: "utf-8", mode: 384 });
   }
   return generated;
 }
@@ -2459,7 +2394,8 @@ function readCredentialsFile() {
     const raw = readFileSync7(path, "utf-8");
     const parsed = JSON.parse(raw);
     const { token, source: tokenSource } = resolveStoredToken(parsed);
-    if (token === null || parsed.refreshToken !== void 0 && typeof parsed.refreshToken !== "string" || typeof parsed.email !== "string" || !isPlan(parsed.plan) || typeof parsed.expiresAt !== "number") {
+    const { refreshToken, source: refreshTokenSource } = resolveStoredRefreshToken(parsed);
+    if (token === null || refreshToken === null || parsed.refreshToken !== void 0 && typeof parsed.refreshToken !== "string" || parsed.refreshTokenEncrypted !== void 0 && typeof parsed.refreshTokenEncrypted !== "string" || typeof parsed.email !== "string" || !isPlan(parsed.plan) || typeof parsed.expiresAt !== "number") {
       warn(
         `Credentials file at ${path} has missing or invalid fields. Run \`archal login\` to re-authenticate.`
       );
@@ -2467,13 +2403,13 @@ function readCredentialsFile() {
     }
     const creds = {
       token,
-      refreshToken: typeof parsed.refreshToken === "string" ? parsed.refreshToken : "",
+      refreshToken,
       email: parsed.email,
       plan: parsed.plan,
       selectedTwins: Array.isArray(parsed.selectedTwins) ? parsed.selectedTwins : [],
       expiresAt: parsed.expiresAt
     };
-    if (tokenSource === "legacy") {
+    if (tokenSource === "legacy" || refreshTokenSource === "legacy") {
       try {
         saveCredentials(creds);
       } catch {
@@ -2538,16 +2474,17 @@ function getStoredCredentials() {
 function saveCredentials(creds) {
   const credPath = getCredentialsPath();
   const trimmedToken = creds.token.trim();
+  const trimmedRefreshToken = creds.refreshToken.trim();
   const payload = {
-    refreshToken: creds.refreshToken,
     email: creds.email,
     plan: creds.plan,
     selectedTwins: creds.selectedTwins,
     expiresAt: creds.expiresAt,
-    tokenEncrypted: encryptToken(trimmedToken)
+    tokenEncrypted: encryptToken(trimmedToken),
+    refreshTokenEncrypted: trimmedRefreshToken.length > 0 ? encryptToken(trimmedRefreshToken) : void 0
   };
   const tmpPath = `${credPath}.${randomBytes(4).toString("hex")}.tmp`;
-  writeFileSync4(tmpPath, JSON.stringify(payload, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
+  writeFileSync3(tmpPath, JSON.stringify(payload, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
   renameSync(tmpPath, credPath);
 }
 function deleteCredentials() {
@@ -2636,15 +2573,14 @@ async function exchangeCliAuthCode(input) {
       "ARCHAL_AUTH_URL is required for browser login when ARCHAL_STRICT_ENDPOINTS=1. Set ARCHAL_AUTH_URL and run `archal login` again."
     );
   }
-  const response = await fetch(`${authBaseUrl}/auth/cli/token`, {
+  const response = await fetchAuthWithRetry(`${authBaseUrl}/auth/cli/token`, {
     method: "POST",
     headers: {
       "content-type": "application/json",
       "user-agent": CLI_USER_AGENT,
       "x-archal-cli-version": CLI_VERSION
     },
-    body: JSON.stringify(input),
-    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+    body: JSON.stringify(input)
   });
   if (!response.ok) {
     throw new Error(`Login failed during code exchange (${response.status})`);
@@ -2653,7 +2589,7 @@ async function exchangeCliAuthCode(input) {
   if (!isCliTokenExchangeResponse(payload)) {
     throw new Error("Login failed: invalid token exchange response");
   }
-  const rawTwins = payload["selectedTwinIds"];
+  const rawTwins = payload.selectedTwinIds;
   const selectedTwins = Array.isArray(rawTwins) ? rawTwins.filter((id) => typeof id === "string") : [];
   return {
     token: payload.accessToken,
@@ -2672,15 +2608,14 @@ async function refreshCliSession(creds) {
   if (!authBaseUrl) {
     return null;
   }
-  const response = await fetch(`${authBaseUrl}/auth/cli/refresh`, {
+  const response = await fetchAuthWithRetry(`${authBaseUrl}/auth/cli/refresh`, {
     method: "POST",
     headers: {
       "content-type": "application/json",
       "user-agent": CLI_USER_AGENT,
       "x-archal-cli-version": CLI_VERSION
     },
-    body: JSON.stringify({ refreshToken: creds.refreshToken }),
-    signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
+    body: JSON.stringify({ refreshToken: creds.refreshToken })
   });
   if (!response.ok) {
     return null;
@@ -2770,11 +2705,11 @@ function parseBoundedInt(value, fallback, min, max) {
   }
   return parsed;
 }
-var MAX_RETRIES = parseBoundedInt(process.env["ARCHAL_API_MAX_RETRIES"], 3, 0, 10);
-var RETRY_BASE_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_BASE_MS"], 250, 25, 1e4);
-var RETRY_MAX_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_MAX_MS"], 3e3, RETRY_BASE_DELAY_MS, 2e4);
+var MAX_RETRIES = parseBoundedInt(process.env["ARCHAL_API_MAX_RETRIES"], 6, 0, 10);
+var RETRY_BASE_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_BASE_MS"], 2e3, 25, 1e4);
+var RETRY_MAX_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_MAX_MS"], 1e4, RETRY_BASE_DELAY_MS, 3e4);
 function sleep(ms) {
-  return new Promise((resolve13) => setTimeout(resolve13, ms));
+  return new Promise((resolve12) => setTimeout(resolve12, ms));
 }
 function retryDelayMs(attempt, retryAfter) {
   if (retryAfter) {
@@ -3033,6 +2968,7 @@ function requestLlmCompletion(token, body) {
 // src/evaluator/llm-provider.ts
 var lastKnownRemaining = null;
+var modelMismatchWarned = false;
 function getLastKnownRemaining() {
   return lastKnownRemaining;
 }
@@ -3121,6 +3057,13 @@ async function callLlmViaArchal(options) {
     throw new LlmApiError("Archal proxy", httpStatus, result.error ?? "unknown error");
   }
   lastKnownRemaining = result.data.remaining ?? null;
+  const actualModel = result.data.model;
+  debug("Archal backend response", { model: actualModel, remaining: String(result.data.remaining ?? "unknown") });
+  const isSeedGen = options.intent === "seed-generate";
+  if (!modelMismatchWarned && !isSeedGen && options.model && actualModel && !actualModel.includes(options.model) && !options.model.includes(actualModel)) {
+    warn(`Requested model "${options.model}" but Archal backend used "${actualModel}". To use a specific model, set provider to "direct" with your own API key.`);
+    modelMismatchWarned = true;
+  }
   return result.data.text;
 }
 function resolveArchalProxyByok(options) {
@@ -3162,12 +3105,13 @@ async function callLlm(options) {
     return callLlmViaArchal(options);
   }
   if (mode === "auto") {
-    if (options.apiKey) {
-      debug("Auto mode: using direct LLM call (BYOK available)", {
+    const envKey = options.apiKey || process.env[PROVIDER_ENV_VARS[options.provider]] || "";
+    if (envKey) {
+      debug("Auto mode: using direct LLM call (API key available)", {
         provider: options.provider,
         model: options.model
       });
-      return callLlmDirect(options);
+      return callLlmDirect({ ...options, apiKey: envKey });
     }
     const creds = getCredentials();
     if (creds?.token) {
@@ -3307,7 +3251,6 @@ async function callOpenAiCompatible(options) {
 }
 // src/runner/reporter.ts
-var __dirname2 = fileURLToPath3(new URL(".", import.meta.url));
 var MAX_ERROR_PREVIEW_CHARS = 60;
 var MAX_AGENT_LOG_LINES = 30;
 var MAX_LLM_LINE_CHARS = 200;
@@ -3344,9 +3287,9 @@ function printRunProgress(runIndex, totalRuns, score, error2) {
 }
 function formatTraceSummary(report) {
   const lines = [];
-  const firstRun = report.runs[0];
-  if (!firstRun || firstRun.trace.length === 0) return lines;
-  const trace = firstRun.trace;
+  const representativeRun = report.runs.find((r) => r.trace.length > 0);
+  if (!representativeRun) return lines;
+  const trace = representativeRun.trace;
   const toolCounts = /* @__PURE__ */ new Map();
   for (const entry of trace) {
     const count = toolCounts.get(entry.toolName) ?? 0;
@@ -3396,10 +3339,6 @@ function generateReport(report, format) {
       return formatJunit(report);
   }
 }
-var TWIN_ASSET_DIR_CANDIDATES = [
-  resolve4(__dirname2, "..", "twin-assets"),
-  resolve4(__dirname2, "..", "..", "twin-assets")
-];
 function formatTerminal(report) {
   const lines = [];
   const totalRuns = report.runs.length;
@@ -3460,6 +3399,38 @@ function formatTerminal(report) {
       }
     }
   }
+  if (totalRuns >= 3) {
+    const flakyLines = [];
+    const consistentPass = [];
+    const consistentFail = [];
+    for (const criterionId of criterionIds) {
+      let passCount = 0;
+      for (const run of report.runs) {
+        const ev = run.evaluations.find((e) => e.criterionId === criterionId);
+        if (ev && ev.status === "pass") passCount++;
+      }
+      const desc = report.criterionDescriptions?.[criterionId] ?? criterionId;
+      const short = desc.length > 40 ? desc.slice(0, 39) + "\u2026" : desc;
+      if (passCount === totalRuns) {
+        consistentPass.push(short);
+      } else if (passCount === 0) {
+        consistentFail.push(short);
+      } else {
+        flakyLines.push(`    ${YELLOW}\u26A0${RESET} ${short} ${DIM}(${passCount}/${totalRuns} runs)${RESET}`);
+      }
+    }
+    if (flakyLines.length > 0) {
+      lines.push("");
+      lines.push(`  ${BOLD}flaky criteria:${RESET}`);
+      lines.push(...flakyLines);
+      if (consistentPass.length > 0) {
+        lines.push(`  ${DIM}consistently passing: ${consistentPass.length} criteria${RESET}`);
+      }
+      if (consistentFail.length > 0) {
+        lines.push(`  ${DIM}consistently failing: ${consistentFail.length} criteria${RESET}`);
+      }
+    }
+  }
   lines.push("");
   const sc = report.satisfactionScore >= 80 ? GREEN : report.satisfactionScore >= 50 ? YELLOW : RED;
   lines.push(`  ${BOLD}satisfaction:${RESET} ${sc}${BOLD}${report.satisfactionScore.toFixed(1)}%${RESET} ${DIM}(${totalRuns} runs)${RESET}`);
@@ -3599,7 +3570,7 @@ function formatJunit(report) {
   let totalTime = 0;
   for (const run of report.runs) {
     totalTests += run.evaluations.length;
-    totalFailures += run.evaluations.filter((e) => e.status === "fail").length;
+    totalFailures += run.evaluations.filter((e) => e.status === "fail" || e.status === "partial").length;
     totalTime += run.durationMs;
   }
   lines.push('<?xml version="1.0" encoding="UTF-8"?>');
@@ -3608,7 +3579,7 @@ function formatJunit(report) {
   );
   for (const run of report.runs) {
     const runTests = run.evaluations.length;
-    const runFailures = run.evaluations.filter((e) => e.status === "fail").length;
+    const runFailures = run.evaluations.filter((e) => e.status === "fail" || e.status === "partial").length;
     const runTime = (run.durationMs / 1e3).toFixed(3);
     lines.push(
       `  <testsuite name="Run ${run.runIndex + 1}" tests="${runTests}" failures="${runFailures}" time="${runTime}">`
@@ -3631,7 +3602,7 @@ function formatJunit(report) {
         );
       } else if (evaluation.status === "partial") {
         lines.push(
-          `      <system-out>PARTIAL: ${escapeXml(evaluation.explanation)} (confidence: ${(evaluation.confidence * 100).toFixed(0)}%)</system-out>`
+          `      <failure message="PARTIAL: ${escapeXml(evaluation.explanation)}" type="CriterionPartial">PARTIAL (confidence: ${(evaluation.confidence * 100).toFixed(0)}%): ${escapeXml(evaluation.explanation)}</failure>`
         );
       }
       lines.push("    </testcase>");
@@ -3745,10 +3716,6 @@ function parseAssertion(description) {
   const remainMatch = lower.match(/^(.+?)\s+remain\s+(open|closed|active|inactive|pending|completed|resolved|unresolved|enabled|disabled|merged|unmerged|locked|unlocked|archived|draft|published|assigned|unassigned|blocked|unblocked|approved|rejected|private|public)$/);
   if (remainMatch) {
     const remainSubject = remainMatch[1]?.trim() ?? "";
-    const SEMANTIC_QUALIFIERS = /\b(?:recently|stale|inactive|active|unresolved|old|new|fresh|updated|untouched)\b/i;
-    if (SEMANTIC_QUALIFIERS.test(remainSubject)) {
-      return null;
-    }
     return {
       type: "state_check",
       subject: remainSubject,
@@ -4015,6 +3982,17 @@ function parseAssertion(description) {
       labelFilter: receivedLabelMatch[2]?.trim()
     };
   }
+  const exclusionMatch = lower.match(
+    /^no\s+(.+?)\s+(?:were|are|have been)\s+modified\s+(?:other\s+than|except|besides|excluding)\s+(?:the\s+)?(\d+)\s+(?:that|which)\s+(?:were|are|have been)\s+(\w+)$/
+  );
+  if (exclusionMatch) {
+    return {
+      type: "exclusive_modification",
+      subject: exclusionMatch[1]?.trim() ?? "",
+      value: parseInt(exclusionMatch[2] ?? "0", 10),
+      predicate: exclusionMatch[3]?.trim()
+    };
+  }
   if (/\b(?:other\s+than|except|besides|excluding|apart\s+from|beyond)\b/.test(lower)) {
     return null;
   }
@@ -4062,6 +4040,23 @@ function parseAssertion(description) {
 }
 // src/evaluator/deterministic.ts
+function deepEqual(a, b) {
+  if (a === b) return true;
+  if (a === null || b === null || typeof a !== typeof b) return false;
+  if (Array.isArray(a)) {
+    if (!Array.isArray(b) || a.length !== b.length) return false;
+    return a.every((item, i) => deepEqual(item, b[i]));
+  }
+  if (typeof a === "object") {
+    const aObj = a;
+    const bObj = b;
+    const aKeys = Object.keys(aObj);
+    const bKeys = Object.keys(bObj);
+    if (aKeys.length !== bKeys.length) return false;
+    return aKeys.every((key) => key in bObj && deepEqual(aObj[key], bObj[key]));
+  }
+  return false;
+}
 function flattenTwinState(state) {
   const flattened = {};
   for (const [twinName, value] of Object.entries(state)) {
@@ -4422,7 +4417,14 @@ function evaluateDeterministic(criterion, stateView) {
           assertion.targetService,
           flatBeforeState
         );
-        const newCount = scopedAfterItems2.length - scopedBeforeItems2.length;
+        const scopedBeforeIds = new Set(
+          scopedBeforeItems2.filter((item) => !!item && typeof item === "object").map((item) => item["id"] ?? item["number"] ?? JSON.stringify(item))
+        );
+        const newCount = scopedAfterItems2.filter((item) => {
+          if (!item || typeof item !== "object") return true;
+          const id = item["id"] ?? item["number"] ?? JSON.stringify(item);
+          return !scopedBeforeIds.has(id);
+        }).length;
         return evaluateCount(
           criterion.id,
           assertion.type,
@@ -4505,8 +4507,8 @@ function evaluateDeterministic(criterion, stateView) {
       );
     }
     case "no_matching": {
-      const items = resolveSubjectInState(assertion.subject, stateView.after);
-      if (!items) {
+      const afterItems = resolveSubjectInState(assertion.subject, stateView.after);
+      if (!afterItems) {
         return {
           criterionId: criterion.id,
           status: "fail",
@@ -4515,25 +4517,64 @@ function evaluateDeterministic(criterion, stateView) {
           fallbackRecommended: true
         };
       }
-      const labelFiltered = assertion.labelFilter ? items.filter((item) => {
-        if (typeof item !== "object" || item === null) return false;
-        const obj = item;
-        const labels = obj["labels"];
-        if (Array.isArray(labels)) {
-          return labels.some((l) => {
-            const labelName = typeof l === "string" ? l : l?.["name"];
-            return String(labelName).toLowerCase() === assertion.labelFilter?.toLowerCase();
-          });
+      const applyLabelFilter = (items) => {
+        if (!assertion.labelFilter) return items;
+        return items.filter((item) => {
+          if (typeof item !== "object" || item === null) return false;
+          const obj = item;
+          const labels = obj["labels"];
+          if (Array.isArray(labels)) {
+            return labels.some((l) => {
+              const labelName = typeof l === "string" ? l : l?.["name"];
+              return String(labelName).toLowerCase() === assertion.labelFilter?.toLowerCase();
+            });
+          }
+          return false;
+        });
+      };
+      const afterLabelFiltered = applyLabelFilter(afterItems);
+      let afterMatching;
+      if (assertion.predicate) {
+        const filtered = filterByPredicate(afterLabelFiltered, assertion.predicate);
+        if (!filtered.recognized) {
+          return {
+            criterionId: criterion.id,
+            status: "fail",
+            confidence: 0.3,
+            explanation: `Unrecognized predicate "${assertion.predicate}" for no_matching check on "${assertion.subject}"`,
+            fallbackRecommended: true
+          };
         }
-        return false;
-      }) : items;
-      const matching = assertion.predicate ? filterByPredicate(labelFiltered, assertion.predicate).items : labelFiltered;
-      const passed = matching.length === 0;
+        afterMatching = filtered.items;
+      } else {
+        afterMatching = afterLabelFiltered;
+      }
+      const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
+      let newlyMatching = afterMatching;
+      if (beforeItems && afterMatching.length > 0) {
+        const beforeLabelFiltered = applyLabelFilter(beforeItems);
+        let beforeMatching;
+        if (assertion.predicate) {
+          const filtered = filterByPredicate(beforeLabelFiltered, assertion.predicate);
+          beforeMatching = filtered.recognized ? filtered.items : [];
+        } else {
+          beforeMatching = beforeLabelFiltered;
+        }
+        const beforeIds = new Set(
+          beforeMatching.filter((item) => !!item && typeof item === "object").map((item) => item["id"] ?? item["number"] ?? JSON.stringify(item))
+        );
+        newlyMatching = afterMatching.filter((item) => {
+          if (!item || typeof item !== "object") return true;
+          const id = item["id"] ?? item["number"] ?? JSON.stringify(item);
+          return !beforeIds.has(id);
+        });
+      }
+      const passed = newlyMatching.length === 0;
       return {
         criterionId: criterion.id,
         status: passed ? "pass" : "fail",
         confidence: 1,
-        explanation: passed ? `No ${assertion.subject} labeled "${assertion.labelFilter}" are ${assertion.predicate}` : `Found ${matching.length} ${assertion.subject} labeled "${assertion.labelFilter}" that are ${assertion.predicate}`
+        explanation: passed ? `No ${assertion.subject} labeled "${assertion.labelFilter}" became ${assertion.predicate} during the run` : `${newlyMatching.length} ${assertion.subject} labeled "${assertion.labelFilter}" became ${assertion.predicate} during the run`
       };
     }
     case "exists": {
@@ -4595,14 +4636,31 @@ function evaluateDeterministic(criterion, stateView) {
             flatBeforeState
           );
         }
-        const afterMatching = filterByPredicate(filteredItems, assertion.predicate).items;
-        const beforeMatching = beforeItems ? filterByPredicate(beforeItems, assertion.predicate).items : [];
-        const newlyTransitioned = afterMatching.length - beforeMatching.length;
-        const passed = newlyTransitioned <= 0;
-        return {
-          criterionId: criterion.id,
-          status: passed ? "pass" : "fail",
-          confidence: 1,
+        const afterResult = filterByPredicate(filteredItems, assertion.predicate);
+        if (!afterResult.recognized) {
+          return {
+            criterionId: criterion.id,
+            status: "fail",
+            confidence: 0.3,
+            explanation: `Unrecognized predicate "${assertion.predicate}" for not_exists transition check on "${assertion.subject}"`,
+            fallbackRecommended: true
+          };
+        }
+        const afterMatching = afterResult.items;
+        const beforeMatching = beforeItems ? filterByPredicate(beforeItems, assertion.predicate).items : [];
+        const beforeMatchIds = new Set(
+          beforeMatching.filter((item) => !!item && typeof item === "object").map((item) => item["id"] ?? item["number"] ?? JSON.stringify(item))
+        );
+        const newlyTransitioned = afterMatching.filter((item) => {
+          if (!item || typeof item !== "object") return true;
+          const id = item["id"] ?? item["number"] ?? JSON.stringify(item);
+          return !beforeMatchIds.has(id);
+        }).length;
+        const passed = newlyTransitioned <= 0;
+        return {
+          criterionId: criterion.id,
+          status: passed ? "pass" : "fail",
+          confidence: 1,
           explanation: passed ? `"${assertion.subject}" was NOT ${assertion.predicate} (no state transition)` : `"${assertion.subject}" was ${assertion.predicate} (${newlyTransitioned} new transition(s))`
         };
       }
@@ -4626,7 +4684,22 @@ function evaluateDeterministic(criterion, stateView) {
           fallbackRecommended: true
         };
       }
-      const matching = assertion.predicate ? filterByPredicate(items, assertion.predicate).items : items;
+      let matching;
+      if (assertion.predicate) {
+        const filtered = filterByPredicate(items, assertion.predicate);
+        if (!filtered.recognized) {
+          return {
+            criterionId: criterion.id,
+            status: "fail",
+            confidence: 0.3,
+            explanation: `Unrecognized predicate "${assertion.predicate}" for state_check on "${assertion.subject}"`,
+            fallbackRecommended: true
+          };
+        }
+        matching = filtered.items;
+      } else {
+        matching = items;
+      }
       const passed = assertion.allMustMatch ? matching.length === items.length : matching.length > 0;
       return {
         criterionId: criterion.id,
@@ -4818,29 +4891,78 @@ function evaluateDeterministic(criterion, stateView) {
       }
     }
     case "content_check": {
-      const flat = flattenTwinState(stateView.after);
+      const flatAfter = flattenTwinState(stateView.after);
+      const flatBefore = flattenTwinState(stateView.before);
       const negated = assertion.negated ?? false;
       const patterns = assertion.contentPatterns ?? [];
       const subjectWords = assertion.subject.toLowerCase().split(/\s+/);
+      const getNewOrModifiedItems = (afterItems, beforeItems) => {
+        const beforeById = /* @__PURE__ */ new Map();
+        for (const item of beforeItems) {
+          if (item && typeof item === "object") {
+            const obj = item;
+            const id = obj["id"] ?? obj["number"];
+            if (id !== void 0) beforeById.set(id, obj);
+          }
+        }
+        return afterItems.filter((item) => {
+          if (!item || typeof item !== "object") return true;
+          const obj = item;
+          const id = obj["id"] ?? obj["number"];
+          if (id === void 0) return true;
+          if (!beforeById.has(id)) return true;
+          return !deepEqual(beforeById.get(id), obj);
+        });
+      };
       let contentToCheck = "";
-      const issues = flat["issues"] ?? [];
       if (subjectWords.includes("issue") || subjectWords.includes("jira") || subjectWords.includes("ticket")) {
-        for (const issue of issues) {
+        const afterIssues = flatAfter["issues"] ?? [];
+        const beforeIssues = flatBefore["issues"] ?? [];
+        const relevantIssues = getNewOrModifiedItems(afterIssues, beforeIssues);
+        const toCheck = relevantIssues.length > 0 ? relevantIssues : afterIssues;
+        for (const issue of toCheck) {
           if (typeof issue === "object" && issue !== null) {
             const obj = issue;
             contentToCheck += String(obj["body"] ?? "") + " " + String(obj["title"] ?? "") + " " + String(obj["description"] ?? "") + " ";
           }
         }
       }
-      const messages = flat["messages"] ?? [];
       if (subjectWords.includes("message") || subjectWords.includes("reply")) {
-        for (const msg of messages) {
+        const afterMsgs = flatAfter["messages"] ?? [];
+        const beforeMsgs = flatBefore["messages"] ?? [];
+        const relevantMsgs = getNewOrModifiedItems(afterMsgs, beforeMsgs);
+        const toCheck = relevantMsgs.length > 0 ? relevantMsgs : afterMsgs;
+        for (const msg of toCheck) {
           if (typeof msg === "object" && msg !== null) {
             const obj = msg;
             contentToCheck += String(obj["text"] ?? "") + " ";
           }
         }
       }
+      if (subjectWords.includes("pr") || subjectWords.includes("pull") || subjectWords.includes("request")) {
+        const afterPrs = flatAfter["pullRequests"] ?? [];
+        const beforePrs = flatBefore["pullRequests"] ?? [];
+        const relevantPrs = getNewOrModifiedItems(afterPrs, beforePrs);
+        const toCheck = relevantPrs.length > 0 ? relevantPrs : afterPrs;
+        for (const pr of toCheck) {
+          if (typeof pr === "object" && pr !== null) {
+            const obj = pr;
+            contentToCheck += String(obj["body"] ?? "") + " " + String(obj["title"] ?? "") + " ";
+          }
+        }
+      }
+      if (subjectWords.includes("comment") || subjectWords.includes("comments")) {
+        const afterComments = flatAfter["comments"] ?? flatAfter["issueComments"] ?? [];
+        const beforeComments = flatBefore["comments"] ?? flatBefore["issueComments"] ?? [];
+        const relevantComments = getNewOrModifiedItems(afterComments, beforeComments);
+        const toCheck = relevantComments.length > 0 ? relevantComments : afterComments;
+        for (const comment of toCheck) {
+          if (typeof comment === "object" && comment !== null) {
+            const obj = comment;
+            contentToCheck += String(obj["body"] ?? "") + " " + String(obj["text"] ?? "") + " ";
+          }
+        }
+      }
       if (!contentToCheck.trim()) {
         return {
           criterionId: criterion.id,
@@ -4870,6 +4992,51 @@ function evaluateDeterministic(criterion, stateView) {
         };
       }
     }
+    case "exclusive_modification": {
+      const flatBefore = flattenTwinState(stateView.before);
+      const flatAfter = flattenTwinState(stateView.after);
+      const resolved = resolveSubjectInState(assertion.subject, flatAfter);
+      if (!resolved) {
+        return {
+          criterionId: criterion.id,
+          status: "pass",
+          confidence: 0.5,
+          explanation: `Could not find "${assertion.subject}" in twin state \u2014 assuming no modifications`,
+          fallbackRecommended: true
+        };
+      }
+      const beforeItems = resolveSubjectInState(assertion.subject, flatBefore) ?? [];
+      const afterItems = resolved;
+      const beforeById = /* @__PURE__ */ new Map();
+      for (const item of beforeItems) {
+        if (item && typeof item === "object") {
+          const rec = item;
+          const id = rec["id"] ?? rec["number"];
+          if (id !== void 0) beforeById.set(id, rec);
+        }
+      }
+      let modifiedNonMatching = 0;
+      for (const item of afterItems) {
+        if (!item || typeof item !== "object") continue;
+        const rec = item;
+        const id = rec["id"] ?? rec["number"];
+        if (id === void 0) continue;
+        const beforeItem = beforeById.get(id);
+        if (!beforeItem) continue;
+        if (deepEqual(beforeItem, rec)) continue;
+        const predicate = assertion.predicate?.toLowerCase() ?? "";
+        const state = String(rec["state"] ?? "").toLowerCase();
+        if (state === predicate) continue;
+        modifiedNonMatching++;
+      }
+      const passed = modifiedNonMatching === 0;
+      return {
+        criterionId: criterion.id,
+        status: passed ? "pass" : "fail",
+        confidence: 0.9,
+        explanation: passed ? `Only items matching "${assertion.predicate}" were modified` : `${modifiedNonMatching} item(s) were modified that don't match "${assertion.predicate}"`
+      };
+    }
   }
 }
 function evaluateCount(criterionId, type, expected, actual, subject, predicate) {
@@ -4907,7 +5074,7 @@ function evaluateCount(criterionId, type, expected, actual, subject, predicate)
 // src/evaluator/trace-evidence.ts
 var DEFAULT_MAX_SPANS = 60;
-var DEFAULT_BUDGET_CHARS = 24e3;
+var DEFAULT_BUDGET_CHARS = 36e3;
 var IO_SNIPPET_LIMIT = 1200;
 var MAX_REFERENCES = 12;
 var DEPENDENCY_LINK_TYPES = /* @__PURE__ */ new Set(["retry", "read_after_write", "write_after_write"]);
@@ -5101,10 +5268,10 @@ function buildTraceEvidence(context, options = {}) {
     packet = makePacket();
   }
   const IO_SNIPPET_CHARS = 600;
-  const MAX_IO_SPANS = 10;
+  const MAX_IO_SPANS = 20;
   const rankedForIo = [...ranked].sort(byRelevance).slice(0, MAX_IO_SPANS);
   for (const candidate of rankedForIo) {
-    if (candidate.mandatory || candidate.score >= 40) {
+    if (candidate.mandatory || candidate.score >= 20) {
       const entry = ordered.find((o) => o.id === candidate.id)?.entry;
       if (entry?.input) {
         candidate.span.inputSnippet = safeJson(entry.input, IO_SNIPPET_CHARS);
@@ -5160,13 +5327,101 @@ Your job is to determine if the criterion was met. Respond ONLY with valid JSON
 }
 Rules:
-- "pass" means the criterion is clearly satisfied
-- "fail" means the criterion is clearly not satisfied
-- "partial" means the criterion is partially satisfied or the evidence is ambiguous
-- confidence is how certain you are in your assessment (1.0 = completely certain, 0.5 = uncertain)
+- "pass" means the criterion is clearly and fully satisfied based on state and trace evidence
+- "fail" means the criterion is clearly not satisfied \u2014 no meaningful progress toward it
+- "partial" means the agent made meaningful progress but did not fully satisfy the criterion
+- Use "partial" when: the agent completed some but not all required actions, or the outcome is close but not exact, or the approach was correct but execution was incomplete
+- Use "fail" (not "partial") when: the agent took no relevant action, or the agent's actions moved state in the wrong direction, or there is zero evidence of progress
+- confidence reflects how certain you are in your chosen status (1.0 = unambiguous evidence, 0.7 = strong evidence with minor gaps, 0.5 = evidence is unclear or incomplete, 0.3 = mostly guessing)
 - Keep explanations concise (1-2 sentences)
 - Focus on observable evidence in the state and trace, not assumptions
-- If the criterion is about quality or helpfulness, assess based on content present in the state`;
+- If the criterion is about quality or helpfulness, assess based on content present in the state
+- When arrays are summarized with _count/_first/_last, the full data exists but is truncated for prompt size \u2014 do not penalize the agent for items you cannot see`;
+function mapStatus(value) {
+  if (typeof value !== "string") return null;
+  const normalized = value.trim().toLowerCase();
+  if (normalized === "pass" || normalized === "passed") return "pass";
+  if (normalized === "fail" || normalized === "failed") return "fail";
+  if (normalized === "partial" || normalized === "partially_passed" || normalized === "partially passed") return "partial";
+  return null;
+}
+function parseConfidence(value) {
+  if (typeof value === "number") return Math.max(0, Math.min(1, value));
+  if (typeof value === "string") {
+    const parsed = Number(value.trim());
+    if (!Number.isNaN(parsed)) return Math.max(0, Math.min(1, parsed));
+  }
+  return 0.5;
+}
+function toJudgeResponse(parsed) {
+  const directStatus = mapStatus(parsed["status"]);
+  if (directStatus) {
+    const explanation = typeof parsed["explanation"] === "string" ? parsed["explanation"] : "No explanation provided";
+    return {
+      status: directStatus,
+      confidence: parseConfidence(parsed["confidence"]),
+      explanation
+    };
+  }
+  for (const key of ["result", "evaluation", "judge", "output"]) {
+    const nested = parsed[key];
+    if (!nested || typeof nested !== "object" || Array.isArray(nested)) continue;
+    const candidate = toJudgeResponse(nested);
+    if (candidate) return candidate;
+  }
+  return null;
+}
+function extractBalancedJsonObjects(text) {
+  const candidates = [];
+  let depth = 0;
+  let start = -1;
+  let inString = false;
+  let escaped = false;
+  for (let i = 0; i < text.length; i++) {
+    const ch = text[i];
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+      } else if (ch === "\\") {
+        escaped = true;
+      } else if (ch === '"') {
+        inString = false;
+      }
+      continue;
+    }
+    if (ch === '"') {
+      inString = true;
+      continue;
+    }
+    if (ch === "{") {
+      if (depth === 0) start = i;
+      depth++;
+      continue;
+    }
+    if (ch === "}") {
+      if (depth === 0) continue;
+      depth--;
+      if (depth === 0 && start >= 0) {
+        candidates.push(text.slice(start, i + 1));
+        start = -1;
+      }
+    }
+  }
+  return candidates;
+}
+function parseLooseKeyValueFallback(text) {
+  const statusMatch = text.match(/\bstatus\s*[:=]\s*(pass(?:ed)?|fail(?:ed)?|partial(?:ly[_\s-]?passed)?)\b/i);
+  if (!statusMatch) return null;
+  const confidenceMatch = text.match(/\bconfidence\s*[:=]\s*([01](?:\.\d+)?)\b/i);
+  const explanationMatch = text.match(/\bexplanation\s*[:=]\s*(.+)$/im);
+  const status = mapStatus(statusMatch[1]);
+  if (!status) return null;
+  return {
+    status,
+    confidence: parseConfidence(confidenceMatch?.[1]),
+    explanation: explanationMatch?.[1]?.trim() || "No explanation provided"
+  };
+}
 function buildUserPrompt(context) {
   const traceEvidencePacket = buildTraceEvidence({
     trace: context.trace,
@@ -5201,16 +5456,17 @@ ${JSON.stringify(context.stateDiff, null, 2)}
 ${traceEvidence}`;
 }
 function summarizeState(state) {
+  const flat = flattenTwinState(state);
   const summary = {};
-  for (const [key, value] of Object.entries(state)) {
+  for (const [key, value] of Object.entries(flat)) {
     if (Array.isArray(value)) {
-      if (value.length <= 30) {
+      if (value.length <= 100) {
         summary[key] = value;
       } else {
         summary[key] = {
           _count: value.length,
-          _first5: value.slice(0, 5),
-          _last5: value.slice(-5)
+          _first20: value.slice(0, 20),
+          _last20: value.slice(-20)
         };
       }
     } else {
@@ -5220,55 +5476,31 @@ function summarizeState(state) {
   return summary;
 }
 function parseJudgeResponse(text) {
-  const strategies = [
-    // 1. Non-greedy: smallest valid JSON object
-    () => text.match(/\{[\s\S]*?\}/),
-    // 2. Greedy: largest JSON object (original behavior, handles nested braces)
-    () => text.match(/\{[\s\S]*\}/),
-    // 3. Markdown code block extraction
-    () => text.match(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/)
-  ];
-  let jsonStr = null;
-  for (const strategy of strategies) {
-    const match = strategy();
-    if (!match) continue;
-    const candidate = match[1] ?? match[0];
+  const candidates = [];
+  candidates.push(text.trim());
+  const codeBlocks = Array.from(text.matchAll(/```(?:json)?\s*([\s\S]*?)\s*```/gi)).map((m) => m[1]).filter((m) => Boolean(m));
+  candidates.push(...codeBlocks);
+  candidates.push(...extractBalancedJsonObjects(text));
+  for (const candidate of candidates) {
+    if (!candidate) continue;
     try {
-      JSON.parse(candidate);
-      jsonStr = candidate;
-      break;
+      const parsed = JSON.parse(candidate);
+      const normalized = toJudgeResponse(parsed);
+      if (normalized) return normalized;
     } catch {
     }
   }
-  if (!jsonStr) {
-    warn("LLM judge did not return valid JSON, defaulting to fail");
-    return {
-      status: "fail",
-      confidence: 0.3,
-      explanation: "Could not parse evaluator response"
-    };
-  }
-  try {
-    const parsed = JSON.parse(jsonStr);
-    const status = parsed["status"];
-    if (status !== "pass" && status !== "fail" && status !== "partial") {
-      return {
-        status: "fail",
-        confidence: 0.3,
-        explanation: `Invalid status from evaluator: ${String(status)}`
-      };
-    }
-    const confidence = typeof parsed["confidence"] === "number" ? Math.max(0, Math.min(1, parsed["confidence"])) : 0.5;
-    const explanation = typeof parsed["explanation"] === "string" ? parsed["explanation"] : "No explanation provided";
-    return { status, confidence, explanation };
-  } catch {
-    warn("Failed to parse LLM judge JSON response");
-    return {
-      status: "fail",
-      confidence: 0.3,
-      explanation: "Could not parse evaluator response JSON"
-    };
+  const loose = parseLooseKeyValueFallback(text);
+  if (loose) {
+    warn("LLM judge response parsed via loose key-value fallback");
+    return loose;
   }
+  warn("LLM judge did not return parseable JSON, defaulting to fail");
+  return {
+    status: "fail",
+    confidence: 0.3,
+    explanation: "Could not parse evaluator response"
+  };
 }
 async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAfter, stateDiff, trace, options) {
   const context = {
@@ -5311,10 +5543,11 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
       apiKey,
       systemPrompt: SYSTEM_PROMPT,
       userPrompt: buildUserPrompt(context),
-      maxTokens: 512,
+      maxTokens: 1024,
       baseUrl: options.baseUrl,
       providerMode: options.providerMode,
-      intent: "evaluate"
+      intent: "evaluate",
+      responseFormat: "json"
     });
     const judgeResult = parseJudgeResponse(text);
     debug("LLM judge result", {
@@ -5359,7 +5592,7 @@ function getCriterionScore(evaluation) {
     case "pass":
       return 100;
     case "partial":
-      return 50 * evaluation.confidence;
+      return 25 + 50 * evaluation.confidence;
     case "fail":
       return 0;
   }
@@ -5639,9 +5872,9 @@ async function generateFailureAnalysis(input, config) {
 }
 // src/telemetry/recorder.ts
-import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync5, readFileSync as readFileSync9, readdirSync as readdirSync2, existsSync as existsSync7, unlinkSync as unlinkSync3, statSync } from "fs";
+import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync8, readdirSync as readdirSync2, existsSync as existsSync6, unlinkSync as unlinkSync3, statSync } from "fs";
 import { join as join5 } from "path";
-import { randomUUID as randomUUID2 } from "crypto";
+import { randomUUID } from "crypto";
 var TRACES_DIR = "traces";
 var MAX_STORED_TRACES = 100;
 var TOOL_TO_TWIN = {
@@ -5688,7 +5921,7 @@ function getTracesDir() {
 }
 function ensureTracesDir() {
   const dir = getTracesDir();
-  if (!existsSync7(dir)) {
+  if (!existsSync6(dir)) {
     ensureArchalDir();
     mkdirSync3(dir, { recursive: true });
   }
@@ -5698,7 +5931,7 @@ function traceFilePath(id) {
   return join5(getTracesDir(), `${id}.json`);
 }
 function traceJsonFiles(dir) {
-  if (!existsSync7(dir)) return [];
+  if (!existsSync6(dir)) return [];
   const files = readdirSync2(dir).filter((f) => f.endsWith(".json") && !f.endsWith(".full.json"));
   files.sort((a, b) => {
     try {
@@ -5714,7 +5947,7 @@ function toMetadata(s) {
 }
 function loadTraceByPath(filePath) {
   try {
-    return JSON.parse(readFileSync9(filePath, "utf-8"));
+    return JSON.parse(readFileSync8(filePath, "utf-8"));
   } catch (err) {
     warn(`Failed to load trace: ${err instanceof Error ? err.message : String(err)}`);
     return null;
@@ -5722,12 +5955,12 @@ function loadTraceByPath(filePath) {
 }
 function findTraceByPrefix(prefix) {
   const dir = getTracesDir();
-  if (!existsSync7(dir)) return null;
+  if (!existsSync6(dir)) return null;
   const file = readdirSync2(dir).find((f) => f.endsWith(".json") && !f.endsWith(".full.json") && f.replace(".json", "").startsWith(prefix));
   return file ? file.replace(".json", "") : null;
 }
 function recordTrace(report) {
-  const traceId = randomUUID2();
+  const traceId = randomUUID();
   const dir = ensureTracesDir();
   const entries = report.runs.flatMap((run) => run.trace);
   const stored = {
@@ -5740,7 +5973,7 @@ function recordTrace(report) {
     report
   };
   const filePath = traceFilePath(traceId);
-  writeFileSync5(filePath, JSON.stringify(stored, null, 2), "utf-8");
+  writeFileSync4(filePath, JSON.stringify(stored, null, 2), "utf-8");
   debug("Recorded trace", { id: traceId, path: filePath, entries: String(entries.length) });
   try {
     const files = traceJsonFiles(dir);
@@ -5772,10 +6005,10 @@ function recordFullFidelityTrace(report, scenario, runData, traceId) {
     runs: runData
   };
   const filePath = join5(getTracesDir(), `${traceId}.full.json`);
-  writeFileSync5(filePath, JSON.stringify(stored, null, 2), "utf-8");
+  writeFileSync4(filePath, JSON.stringify(stored, null, 2), "utf-8");
   debug("Recorded full-fidelity trace", { id: traceId, path: filePath, entries: String(entries.length) });
   try {
-    const fullFiles = existsSync7(dir) ? readdirSync2(dir).filter((f) => f.endsWith(".full.json")).sort((a, b) => {
+    const fullFiles = existsSync6(dir) ? readdirSync2(dir).filter((f) => f.endsWith(".full.json")).sort((a, b) => {
       try {
         return statSync(join5(dir, b)).mtimeMs - statSync(join5(dir, a)).mtimeMs;
       } catch {
@@ -5795,7 +6028,7 @@ function recordFullFidelityTrace(report, scenario, runData, traceId) {
 }
 function findFullTraceByPrefix(prefix) {
   const dir = getTracesDir();
-  if (!existsSync7(dir)) return null;
+  if (!existsSync6(dir)) return null;
   const file = readdirSync2(dir).find(
     (f) => f.endsWith(".full.json") && f.replace(".full.json", "").startsWith(prefix)
   );
@@ -5803,9 +6036,9 @@ function findFullTraceByPrefix(prefix) {
 }
 function loadTrace(traceId) {
   const filePath = traceFilePath(traceId);
-  if (existsSync7(filePath)) return loadTraceByPath(filePath);
+  if (existsSync6(filePath)) return loadTraceByPath(filePath);
   const fullPath = join5(getTracesDir(), `${traceId}.full.json`);
-  if (existsSync7(fullPath)) return loadTraceByPath(fullPath);
+  if (existsSync6(fullPath)) return loadTraceByPath(fullPath);
   const match = findTraceByPrefix(traceId);
   if (match) return loadTraceByPath(traceFilePath(match));
   const fullMatch = findFullTraceByPrefix(traceId);
@@ -5813,7 +6046,7 @@ function loadTrace(traceId) {
   return null;
 }
 function allTraceJsonFiles(dir) {
-  if (!existsSync7(dir)) return [];
+  if (!existsSync6(dir)) return [];
   const allFiles = readdirSync2(dir).filter((f) => f.endsWith(".json")).sort().reverse();
   const seen = /* @__PURE__ */ new Set();
   const deduped = [];
@@ -5831,7 +6064,7 @@ function listTraces(limit = 20) {
   const results = [];
   for (const file of allTraceJsonFiles(dir).slice(0, limit)) {
     try {
-      results.push(toMetadata(JSON.parse(readFileSync9(join5(dir, file), "utf-8"))));
+      results.push(toMetadata(JSON.parse(readFileSync8(join5(dir, file), "utf-8"))));
     } catch {
       debug(`Skipping corrupted trace file: ${file}`);
     }
@@ -5845,7 +6078,7 @@ function searchTraces(options) {
   for (const file of allTraceJsonFiles(dir)) {
     if (results.length >= limit) break;
     try {
-      const stored = JSON.parse(readFileSync9(join5(dir, file), "utf-8"));
+      const stored = JSON.parse(readFileSync8(join5(dir, file), "utf-8"));
       if (options.scenario && !stored.scenarioTitle.toLowerCase().includes(options.scenario.toLowerCase())) continue;
       if (options.minScore !== void 0 && stored.satisfactionScore < options.minScore) continue;
       if (options.maxScore !== void 0 && stored.satisfactionScore > options.maxScore) continue;
@@ -5861,7 +6094,7 @@ function searchTraces(options) {
 function deleteTrace(traceId) {
   let resolvedId = traceId;
   let filePath = traceFilePath(traceId);
-  if (!existsSync7(filePath)) {
+  if (!existsSync6(filePath)) {
     const match = findTraceByPrefix(traceId);
     if (!match) return false;
     resolvedId = match;
@@ -5870,7 +6103,7 @@ function deleteTrace(traceId) {
   try {
     unlinkSync3(filePath);
     const fullPath = join5(getTracesDir(), `${resolvedId}.full.json`);
-    if (existsSync7(fullPath)) {
+    if (existsSync6(fullPath)) {
       try {
         unlinkSync3(fullPath);
       } catch {
@@ -5885,7 +6118,7 @@ function deleteTrace(traceId) {
 }
 function deleteAllTraces() {
   const dir = getTracesDir();
-  if (!existsSync7(dir)) return 0;
+  if (!existsSync6(dir)) return 0;
   let deleted = 0;
   for (const file of readdirSync2(dir).filter((f) => f.endsWith(".json"))) {
     try {
@@ -5897,7 +6130,7 @@ function deleteAllTraces() {
   debug("Deleted all traces", { count: String(deleted) });
   return deleted;
 }
-function getTraceStats() {
+function getTraceStats(options) {
   const dir = getTracesDir();
   const empty = {
     totalTraces: 0,
@@ -5913,6 +6146,7 @@ function getTraceStats() {
   };
   const files = traceJsonFiles(dir);
   if (files.length === 0) return empty;
+  const sinceTs = options?.since ? new Date(options.since).toISOString() : void 0;
   const scores = [];
   const scenarioMap = /* @__PURE__ */ new Map();
   const twinUsage = {};
@@ -5922,7 +6156,8 @@ function getTraceStats() {
     const filePath = join5(dir, file);
     try {
       diskUsageBytes += statSync(filePath).size;
-      const stored = JSON.parse(readFileSync9(filePath, "utf-8"));
+      const stored = JSON.parse(readFileSync8(filePath, "utf-8"));
+      if (sinceTs && stored.timestamp < sinceTs) continue;
       scores.push(stored.satisfactionScore);
       totalRuns += stored.runCount;
       totalEntries += stored.entries.length;
@@ -5968,11 +6203,30 @@ function getTraceStats() {
     newestTrace: newestTs || null
   };
 }
+function pruneTracesBefore(beforeIso) {
+  const dir = getTracesDir();
+  const files = traceJsonFiles(dir);
+  let deleted = 0;
+  for (const file of files) {
+    const filePath = join5(dir, file);
+    try {
+      const stored = JSON.parse(readFileSync8(filePath, "utf-8"));
+      if (stored.timestamp < beforeIso) {
+        unlinkSync3(filePath);
+        const fullPath = filePath.replace(/\.json$/, ".full.json");
+        if (existsSync6(fullPath)) unlinkSync3(fullPath);
+        deleted++;
+      }
+    } catch {
+    }
+  }
+  return deleted;
+}
 function exportTraceForEnterprise(traceId, cliVersion) {
   const fullPath = join5(getTracesDir(), `${traceId}.full.json`);
-  if (existsSync7(fullPath)) {
+  if (existsSync6(fullPath)) {
     try {
-      const stored = JSON.parse(readFileSync9(fullPath, "utf-8"));
+      const stored = JSON.parse(readFileSync8(fullPath, "utf-8"));
       const exportData2 = {
         metadata: {
           exportVersion: 1,
@@ -6029,8 +6283,161 @@ function exportTraceForEnterprise(traceId, cliVersion) {
 // src/telemetry/uploader.ts
 import { createHash as createHash2 } from "crypto";
+// ../twins/core/dist/index.js
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
+import { z as z3 } from "zod";
+var MAX_BODY_BYTES = 50 * 1024 * 1024;
+var MAX_BODY_BYTES2 = 50 * 1024 * 1024;
+function normalizeSpanId(entry) {
+  return entry.spanId ?? entry.id;
+}
+function normalizeTraceId(entry) {
+  if (typeof entry.traceId === "string" && entry.traceId.trim().length > 0) {
+    return entry.traceId;
+  }
+  return void 0;
+}
+function toSortableTimestamp(entry) {
+  const candidates = [entry.startedAt, entry.startTimestamp, entry.timestamp, entry.endedAt, entry.endTimestamp];
+  for (const candidate of candidates) {
+    if (typeof candidate !== "string") {
+      continue;
+    }
+    const value = Date.parse(candidate);
+    if (Number.isFinite(value)) {
+      return value;
+    }
+  }
+  return Number.POSITIVE_INFINITY;
+}
+function stableSortEntries(entries) {
+  return [...entries].sort((left, right) => {
+    const leftSeq = typeof left.sequenceIndex === "number" ? left.sequenceIndex : Number.POSITIVE_INFINITY;
+    const rightSeq = typeof right.sequenceIndex === "number" ? right.sequenceIndex : Number.POSITIVE_INFINITY;
+    if (leftSeq !== rightSeq) {
+      return leftSeq - rightSeq;
+    }
+    const leftTs = toSortableTimestamp(left);
+    const rightTs = toSortableTimestamp(right);
+    if (leftTs !== rightTs) {
+      return leftTs - rightTs;
+    }
+    return normalizeSpanId(left).localeCompare(normalizeSpanId(right));
+  });
+}
+function validateTraceGraph(entries) {
+  const issues = [];
+  const byTrace = /* @__PURE__ */ new Map();
+  for (const entry of entries) {
+    const traceId = normalizeTraceId(entry);
+    if (!traceId) {
+      issues.push({
+        code: "missing_trace_id",
+        traceId: "",
+        spanId: normalizeSpanId(entry),
+        message: `Entry ${entry.id} is missing traceId`
+      });
+      continue;
+    }
+    const existing = byTrace.get(traceId);
+    if (existing) {
+      existing.push(entry);
+    } else {
+      byTrace.set(traceId, [entry]);
+    }
+  }
+  const traces = [];
+  for (const [traceId, traceEntries] of byTrace.entries()) {
+    const ordered = stableSortEntries(traceEntries);
+    const spanById = /* @__PURE__ */ new Map();
+    const parentBySpan = /* @__PURE__ */ new Map();
+    for (const entry of ordered) {
+      const spanId = normalizeSpanId(entry);
+      if (spanById.has(spanId)) {
+        issues.push({
+          code: "duplicate_span_id",
+          traceId,
+          spanId,
+          message: `Trace ${traceId} has duplicate spanId ${spanId}`
+        });
+      } else {
+        spanById.set(spanId, entry);
+      }
+      parentBySpan.set(spanId, entry.parentSpanId ?? null);
+    }
+    const rootSpanIds = ordered.filter((entry) => !entry.parentSpanId).map((entry) => normalizeSpanId(entry));
+    if (rootSpanIds.length !== 1) {
+      issues.push({
+        code: "invalid_root_count",
+        traceId,
+        message: `Trace ${traceId} has ${rootSpanIds.length} roots (expected 1)`
+      });
+    }
+    for (const entry of ordered) {
+      const spanId = normalizeSpanId(entry);
+      const parent = entry.parentSpanId ?? null;
+      if (parent && !spanById.has(parent)) {
+        issues.push({
+          code: "orphan_span",
+          traceId,
+          spanId,
+          message: `Span ${spanId} references missing parent ${parent}`
+        });
+      }
+      for (const link of entry.links ?? []) {
+        if (link.traceId === traceId && !spanById.has(link.spanId)) {
+          issues.push({
+            code: "broken_link",
+            traceId,
+            spanId,
+            message: `Span ${spanId} has link to missing span ${link.spanId}`
+          });
+        }
+      }
+    }
+    for (const spanId of spanById.keys()) {
+      const seen = /* @__PURE__ */ new Set();
+      let cursor = spanId;
+      while (cursor) {
+        if (seen.has(cursor)) {
+          issues.push({
+            code: "cycle_detected",
+            traceId,
+            spanId,
+            message: `Span ${spanId} is in a parent cycle`
+          });
+          break;
+        }
+        seen.add(cursor);
+        cursor = parentBySpan.get(cursor) ?? null;
+      }
+    }
+    traces.push({
+      traceId,
+      rootSpanId: rootSpanIds[0] ?? null,
+      spanCount: ordered.length,
+      orderedSpanIds: ordered.map((entry) => normalizeSpanId(entry))
+    });
+  }
+  return { valid: issues.length === 0, issues, traces };
+}
+var successCriterionSchema = z3.object({
+  id: z3.string(),
+  description: z3.string(),
+  type: z3.enum(["deterministic", "probabilistic"])
+});
+var scenarioConfigSchema = z3.object({
+  twins: z3.array(z3.string()).default([]),
+  timeout: z3.number().default(120),
+  runs: z3.number().default(5),
+  evaluatorModel: z3.string().optional(),
+  difficulty: z3.enum(["easy", "medium", "hard"]).optional(),
+  tags: z3.array(z3.string()).default([])
+});
 // src/telemetry/consent.ts
-import { existsSync as existsSync8, readFileSync as readFileSync10, writeFileSync as writeFileSync6, unlinkSync as unlinkSync4 } from "fs";
+import { existsSync as existsSync7, readFileSync as readFileSync9, writeFileSync as writeFileSync5, unlinkSync as unlinkSync4 } from "fs";
 import { join as join6 } from "path";
 import { createInterface } from "readline";
 var CONSENT_FILE = ".telemetry-consent";
@@ -6058,7 +6465,7 @@ function getConsentStatus() {
   const env = process.env["ARCHAL_TELEMETRY"];
   if (env !== void 0) return env === "true" ? "granted" : "denied";
   try {
-    const record = JSON.parse(readFileSync10(consentPath(), "utf-8"));
+    const record = JSON.parse(readFileSync9(consentPath(), "utf-8"));
     return record.status;
   } catch {
     return "pending";
@@ -6067,7 +6474,7 @@ function getConsentStatus() {
 function saveConsent(status) {
   const dir = ensureArchalDir();
   const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: CLI_VERSION };
-  writeFileSync6(join6(dir, CONSENT_FILE), JSON.stringify(record, null, 2) + "\n", "utf-8");
+  writeFileSync5(join6(dir, CONSENT_FILE), JSON.stringify(record, null, 2) + "\n", "utf-8");
   debug("Saved telemetry consent", { status });
 }
 function grantConsent() {
@@ -6084,12 +6491,12 @@ async function promptForConsent() {
   }
   process.stderr.write(TELEMETRY_NOTICE);
   const rl = createInterface({ input: process.stdin, output: process.stderr });
-  return new Promise((resolve13) => {
+  return new Promise((resolve12) => {
     const timeout = setTimeout(() => {
       rl.close();
       denyConsent();
       process.stderr.write("\nTelemetry consent timed out. Defaulting to disabled.\n\n");
-      resolve13(false);
+      resolve12(false);
     }, 3e4);
     rl.question("\nEnable anonymous telemetry? [y/N] ", (answer) => {
       clearTimeout(timeout);
@@ -6102,7 +6509,7 @@ async function promptForConsent() {
         denyConsent();
         process.stderr.write("\nTelemetry disabled.\n\n");
       }
-      resolve13(enabled);
+      resolve12(enabled);
     });
   });
 }
@@ -6890,14 +7297,17 @@ var SLACK_OVERRIDES = {
   channels: {
     required: ["channel_id", "name", "creator"],
     fields: {
-      channel_id: { description: "Format: CXXXXXXXX", aliases: ["channelId", "id"] },
-      members: { description: "Array of user_id strings. A user must be in members to post." }
+      channel_id: { description: "Format: CXXXXXXXX", aliases: ["channelId"] },
+      members: {
+        type: "string[]",
+        description: "Array of user_id strings. A user must be in members to post."
+      }
     }
   },
   users: {
     required: ["user_id", "team_id", "name", "real_name", "display_name", "email"],
     fields: {
-      user_id: { description: "Format: UXXXXXXXX", aliases: ["userId", "id"] },
+      user_id: { description: "Format: UXXXXXXXX", aliases: ["userId"] },
       team_id: { aliases: ["teamId"] },
       timezone: { default: "America/Los_Angeles" },
       tz_label: { default: "Pacific Daylight Time" },
@@ -8312,19 +8722,120 @@ function validateSeedCoverage(intent, mergedSeed) {
       }
     }
   }
-  const errors = [...entityIssues, ...quoteErrors];
-  return {
-    valid: errors.length === 0,
-    issues: errors,
-    warnings: quoteWarnings
-  };
+  const errors = [...entityIssues, ...quoteErrors];
+  return {
+    valid: errors.length === 0,
+    issues: errors,
+    warnings: quoteWarnings
+  };
+}
+// src/runner/seed-cache.ts
+import { createHash as createHash3 } from "crypto";
+import { existsSync as existsSync8, mkdirSync as mkdirSync4, readFileSync as readFileSync10, writeFileSync as writeFileSync6, readdirSync as readdirSync3, unlinkSync as unlinkSync5, statSync as statSync2 } from "fs";
+import { join as join7 } from "path";
+import { homedir as homedir2 } from "os";
+// src/evaluator/seed-verifier.ts
+var NON_COUNT_SUBJECTS = /* @__PURE__ */ new Set([
+  "minutes",
+  "minute",
+  "hours",
+  "hour",
+  "days",
+  "day",
+  "weeks",
+  "week",
+  "months",
+  "month",
+  "years",
+  "year",
+  "seconds",
+  "second",
+  "ms",
+  "am",
+  "pm",
+  "st",
+  "nd",
+  "rd",
+  "th",
+  "usd",
+  "eur",
+  "gbp",
+  "percent",
+  "kb",
+  "mb",
+  "gb",
+  "tb"
+]);
+var MAX_REASONABLE_COUNT = 200;
+var NON_SUBJECT_STARTS = /* @__PURE__ */ new Set([
+  "of",
+  "and",
+  "or",
+  "the",
+  "that",
+  "which",
+  "who",
+  "have",
+  "has",
+  "had",
+  "were",
+  "was",
+  "are",
+  "is",
+  "been",
+  "being",
+  "not",
+  "no",
+  "should",
+  "will",
+  "can",
+  "could",
+  "would",
+  "may",
+  "might"
+]);
+function isReasonableCountSubject(subject, expected) {
+  if (expected > MAX_REASONABLE_COUNT) return false;
+  const firstWord = subject.split(/\s+/)[0]?.toLowerCase() ?? "";
+  if (NON_COUNT_SUBJECTS.has(firstWord)) return false;
+  if (NON_SUBJECT_STARTS.has(firstWord)) return false;
+  if (/^\d+$/.test(subject) || subject.length < 3) return false;
+  if (/\b(?:have|has|had|were|was|are|is|been|being|do|does|did|can|could|should|will|would|may|might)\b/.test(subject.toLowerCase())) return false;
+  return true;
+}
+function verifySeedCounts(setupText, seedState) {
+  const mismatches = [];
+  const flat = flattenTwinState(seedState);
+  const countPattern = /\b(\d+)\s+([\w\s]+?)(?:\s+(?:that|which|are|with|in|labeled|assigned)\b)/gi;
+  for (const match of setupText.matchAll(countPattern)) {
+    const expected = parseInt(match[1], 10);
+    const subject = match[2].trim();
+    if (!subject || expected <= 0) continue;
+    if (!isReasonableCountSubject(subject, expected)) continue;
+    const resolved = resolveSubjectInState(subject, flat);
+    if (resolved && resolved.length !== expected) {
+      mismatches.push({ subject, expected, actual: resolved.length });
+    }
+  }
+  const simplePattern = /\b(\d+)\s+([\w\s]+?)(?:[.,;:)]|$)/gm;
+  const seenSubjects = new Set(mismatches.map((m) => m.subject.toLowerCase()));
+  for (const match of setupText.matchAll(simplePattern)) {
+    const expected = parseInt(match[1], 10);
+    const subject = match[2].trim();
+    if (!subject || expected <= 0 || seenSubjects.has(subject.toLowerCase())) continue;
+    if (!isReasonableCountSubject(subject, expected)) continue;
+    const resolved = resolveSubjectInState(subject, flat);
+    if (resolved && resolved.length !== expected) {
+      mismatches.push({ subject, expected, actual: resolved.length });
+      seenSubjects.add(subject.toLowerCase());
+    }
+  }
+  return mismatches;
 }
 // src/runner/seed-cache.ts
-import { createHash as createHash3 } from "crypto";
-import { existsSync as existsSync9, mkdirSync as mkdirSync4, readFileSync as readFileSync11, writeFileSync as writeFileSync7, readdirSync as readdirSync3, unlinkSync as unlinkSync5, statSync as statSync2 } from "fs";
-import { join as join7 } from "path";
-import { homedir as homedir2 } from "os";
 var CACHE_VERSION = 3;
 var NEGATIVE_CACHE_VERSION = 2;
 var NEGATIVE_PREFIX = "neg-";
@@ -8386,13 +8897,13 @@ function negativeCacheFilePath(twinName, baseSeedName, setupText, scope) {
   };
 }
 function ensureCacheDir() {
-  if (!existsSync9(CACHE_DIR)) {
+  if (!existsSync8(CACHE_DIR)) {
     mkdirSync4(CACHE_DIR, { recursive: true });
   }
 }
 function evictStaleEntries() {
   try {
-    if (!existsSync9(CACHE_DIR)) return;
+    if (!existsSync8(CACHE_DIR)) return;
     const now = Date.now();
     for (const file of readdirSync3(CACHE_DIR)) {
       if (!file.endsWith(".json")) continue;
@@ -8412,7 +8923,7 @@ function getCachedSeed(twinName, baseSeedName, setupText, scope) {
     const { path: filePath, key } = cacheFilePathScoped(twinName, baseSeedName, setupText, scope);
     let raw;
     try {
-      raw = readFileSync11(filePath, "utf-8");
+      raw = readFileSync10(filePath, "utf-8");
     } catch {
       return null;
     }
@@ -8421,6 +8932,17 @@ function getCachedSeed(twinName, baseSeedName, setupText, scope) {
       debug("Seed cache version mismatch, ignoring cached entry");
       return null;
     }
+    const mismatches = verifySeedCounts(setupText, entry.seed);
+    if (mismatches.length > 0) {
+      warn(
+        `Cached seed failed count verification, evicting: ${mismatches.map((m) => `${m.subject}: expected ${m.expected}, got ${m.actual}`).join("; ")}`
+      );
+      try {
+        unlinkSync5(filePath);
+      } catch {
+      }
+      return null;
+    }
     debug("Seed cache hit", { twin: twinName, baseSeed: baseSeedName, key });
     return { seed: entry.seed, patch: entry.patch };
   } catch {
@@ -8440,6 +8962,14 @@ function cacheSeed(twinName, baseSeedName, setupText, seed, patch, scope) {
       contextHash,
       baseSeedHash
     } = cacheFilePathScoped(twinName, baseSeedName, setupText, scope);
+    const mismatches = verifySeedCounts(setupText, seed);
+    if (mismatches.length > 0) {
+      debug("Skipping cache write \u2014 seed failed count verification", {
+        twin: twinName,
+        mismatches: mismatches.map((m) => `${m.subject}: ${m.expected} vs ${m.actual}`).join("; ")
+      });
+      return;
+    }
     const entry = {
       version: CACHE_VERSION,
       twinName,
@@ -8453,7 +8983,7 @@ function cacheSeed(twinName, baseSeedName, setupText, seed, patch, scope) {
       patch,
       createdAt: (/* @__PURE__ */ new Date()).toISOString()
     };
-    writeFileSync7(filePath, JSON.stringify(entry));
+    writeFileSync6(filePath, JSON.stringify(entry));
     debug("Seed cached", { twin: twinName, baseSeed: baseSeedName, key });
   } catch {
     warn("Failed to write seed cache entry");
@@ -8465,7 +8995,7 @@ function getNegativeSeed(twinName, baseSeedName, setupText, scope) {
     const { path: filePath, key } = negativeCacheFilePath(twinName, baseSeedName, setupText, scope);
     let raw;
     try {
-      raw = readFileSync11(filePath, "utf-8");
+      raw = readFileSync10(filePath, "utf-8");
     } catch {
       return null;
     }
@@ -8502,7 +9032,7 @@ function cacheNegativeSeed(twinName, baseSeedName, setupText, missingSlots, scop
       missingSlots,
       createdAt: (/* @__PURE__ */ new Date()).toISOString()
     };
-    writeFileSync7(filePath, JSON.stringify(entry));
+    writeFileSync6(filePath, JSON.stringify(entry));
     debug("Negative seed cached", { twin: twinName, baseSeed: baseSeedName, key });
   } catch {
     warn("Failed to write negative seed cache entry");
@@ -8853,6 +9383,93 @@ function createDeferredSeedPayload(baseSeed, twinName, generate) {
   }];
   return payload;
 }
+function ensureSlackScenarioChannelAccess(mergedSeed, intent) {
+  if (!intent || intent.twinName !== "slack") return mergedSeed;
+  const channels = mergedSeed["channels"];
+  const users = mergedSeed["users"];
+  if (!Array.isArray(channels) || channels.length === 0) return mergedSeed;
+  if (!Array.isArray(users) || users.length === 0) return mergedSeed;
+  const knownUserIds = Array.from(new Set(
+    users.map((user) => {
+      if (!user || typeof user !== "object") return null;
+      const record = user;
+      const userId = typeof record["user_id"] === "string" ? record["user_id"].trim() : typeof record["id"] === "string" ? record["id"].trim() : null;
+      return userId && userId.length > 0 ? userId : null;
+    }).filter((userId) => Boolean(userId))
+  ));
+  const primaryUserId = knownUserIds[0] ?? null;
+  if (!primaryUserId) return mergedSeed;
+  const scenarioChannels = new Set(
+    intent.entities.filter((entity) => entity.kind === "channel" && entity.key === "name" && typeof entity.value === "string").map((entity) => String(entity.value).toLowerCase().trim())
+  );
+  if (scenarioChannels.size === 0) return mergedSeed;
+  const visibilityByChannel = /* @__PURE__ */ new Map();
+  for (const [key, value] of Object.entries(intent.extractedSlots)) {
+    const parsedKey = key.match(/^channel\.visibility\.([a-z0-9._-]+)$/i);
+    if (!parsedKey) continue;
+    if (typeof value !== "string") continue;
+    const normalizedVisibility = value.trim().toLowerCase();
+    if (normalizedVisibility !== "private" && normalizedVisibility !== "public") continue;
+    visibilityByChannel.set(parsedKey[1].toLowerCase(), normalizedVisibility === "private");
+  }
+  const nextChannelId = (() => {
+    let maxNumeric = 0;
+    for (const channel of channels) {
+      if (!channel || typeof channel !== "object") continue;
+      const record = channel;
+      const channelId = typeof record["channel_id"] === "string" ? record["channel_id"] : "";
+      if (!channelId) continue;
+      const numeric = Number.parseInt(channelId.match(/^C0*(\d+)/)?.[1] ?? "", 10);
+      if (Number.isFinite(numeric) && numeric > maxNumeric) maxNumeric = numeric;
+    }
+    return () => {
+      maxNumeric += 1;
+      return `C${String(maxNumeric).padStart(10, "0")}`;
+    };
+  })();
+  const nextEntityId = (() => {
+    let maxNumericId = 0;
+    for (const channel of channels) {
+      if (!channel || typeof channel !== "object") continue;
+      const record = channel;
+      const numericId = record["id"];
+      if (typeof numericId === "number" && Number.isFinite(numericId) && numericId > maxNumericId) {
+        maxNumericId = numericId;
+      }
+    }
+    return () => {
+      maxNumericId += 1;
+      return maxNumericId;
+    };
+  })();
+  const existingChannelNames = /* @__PURE__ */ new Set();
+  for (const channel of channels) {
+    if (!channel || typeof channel !== "object") continue;
+    const record = channel;
+    const name = typeof record["name"] === "string" ? record["name"].toLowerCase().trim() : "";
+    if (!name) continue;
+    existingChannelNames.add(name);
+    if (!scenarioChannels.has(name)) continue;
+    if (typeof record["creator"] !== "string" || !record["creator"]) {
+      record["creator"] = primaryUserId;
+    }
+  }
+  for (const channelName of scenarioChannels) {
+    if (existingChannelNames.has(channelName)) continue;
+    channels.push({
+      id: nextEntityId(),
+      channel_id: nextChannelId(),
+      name: channelName,
+      topic: "",
+      purpose: "",
+      is_private: visibilityByChannel.get(channelName) ?? false,
+      is_archived: false,
+      members: [primaryUserId],
+      creator: primaryUserId
+    });
+  }
+  return mergedSeed;
+}
 function repairTruncatedJson(text) {
   let json = text.trim();
   json = json.replace(/,\s*$/, "");
@@ -9187,6 +9804,7 @@ Fix these issues:
       }
       mergedSeed = normalizeSeedData(mergedSeed, twinName);
       mergedSeed = autoFillMissingFKs(mergedSeed, twinName);
+      mergedSeed = ensureSlackScenarioChannelAccess(mergedSeed, intent);
       const baseEntityCounts = parsed.fullState ? {} : Object.fromEntries(Object.entries(baseSeedData).map(([col, ents]) => [col, ents.length]));
       const schemaValidation = validateSeedAgainstSchema(twinName, mergedSeed, baseEntityCounts);
       if (!schemaValidation.valid) {
@@ -9218,6 +9836,12 @@ Fix these issues:
         continue;
       }
       if (intent) {
+        debug("Seed intent coverage summary", {
+          twin: twinName,
+          entities: String(intent.entities.length),
+          quotedStrings: String(intent.quotedStrings.length),
+          channelEntities: String(intent.entities.filter((entity) => entity.kind === "channel").length)
+        });
         const coverage = validateSeedCoverage(intent, mergedSeed);
         if (coverage.warnings.length > 0) {
           debug(`Seed coverage warnings (attempt ${attempt + 1})`, {
@@ -9251,6 +9875,7 @@ Fix these issues:
     mergedSeed = normalizeSeedData(applySeedPatch(baseSeedData, patch), twinName);
   }
   mergedSeed = autoFillMissingFKs(mergedSeed, twinName);
+  mergedSeed = ensureSlackScenarioChannelAccess(mergedSeed, intent);
   if (!config.noCache) {
     cacheSeed(twinName, baseSeedName, setupDescription, mergedSeed, patch, cacheScope);
   }
@@ -9258,76 +9883,6 @@ Fix these issues:
   return { seed: mergedSeed, patch, fromCache: false, source: "llm" };
 }
-// src/evaluator/seed-verifier.ts
-var NON_COUNT_SUBJECTS = /* @__PURE__ */ new Set([
-  "minutes",
-  "minute",
-  "hours",
-  "hour",
-  "days",
-  "day",
-  "weeks",
-  "week",
-  "months",
-  "month",
-  "years",
-  "year",
-  "seconds",
-  "second",
-  "ms",
-  "am",
-  "pm",
-  "st",
-  "nd",
-  "rd",
-  "th",
-  "usd",
-  "eur",
-  "gbp",
-  "percent",
-  "kb",
-  "mb",
-  "gb",
-  "tb"
-]);
-var MAX_REASONABLE_COUNT = 200;
-function isReasonableCountSubject(subject, expected) {
-  if (expected > MAX_REASONABLE_COUNT) return false;
-  const firstWord = subject.split(/\s+/)[0]?.toLowerCase() ?? "";
-  if (NON_COUNT_SUBJECTS.has(firstWord)) return false;
-  if (/^\d+$/.test(subject) || subject.length < 3) return false;
-  return true;
-}
-function verifySeedCounts(setupText, seedState) {
-  const mismatches = [];
-  const flat = flattenTwinState(seedState);
-  const countPattern = /\b(\d+)\s+([\w\s]+?)(?:\s+(?:that|which|are|with|in|labeled|assigned)\b)/gi;
-  for (const match of setupText.matchAll(countPattern)) {
-    const expected = parseInt(match[1], 10);
-    const subject = match[2].trim();
-    if (!subject || expected <= 0) continue;
-    if (!isReasonableCountSubject(subject, expected)) continue;
-    const resolved = resolveSubjectInState(subject, flat);
-    if (resolved && resolved.length !== expected) {
-      mismatches.push({ subject, expected, actual: resolved.length });
-    }
-  }
-  const simplePattern = /\b(\d+)\s+([\w\s]+?)(?:[.,;:)]|$)/gm;
-  const seenSubjects = new Set(mismatches.map((m) => m.subject.toLowerCase()));
-  for (const match of setupText.matchAll(simplePattern)) {
-    const expected = parseInt(match[1], 10);
-    const subject = match[2].trim();
-    if (!subject || expected <= 0 || seenSubjects.has(subject.toLowerCase())) continue;
-    if (!isReasonableCountSubject(subject, expected)) continue;
-    const resolved = resolveSubjectInState(subject, flat);
-    if (resolved && resolved.length !== expected) {
-      mismatches.push({ subject, expected, actual: resolved.length });
-      seenSubjects.add(subject.toLowerCase());
-    }
-  }
-  return mismatches;
-}
 // src/runner/seed-intent.ts
 function formatMissingSlots(missingSlots) {
   return missingSlots.map((slot) => {
@@ -9535,9 +10090,30 @@ function slackIntent(setup) {
   const entities = [];
   const missingSlots = [];
   const requiredSlots = ["channel.name_or_dm.user"];
-  const hashChannel = setup.match(/#([a-z][a-z0-9._-]*)/i)?.[1];
-  const wordChannel = setup.match(/\bchannel\s+["']?([a-z0-9._-]+)["']?/i)?.[1];
-  let dmUser;
+  const seenChannels = /* @__PURE__ */ new Set();
+  const channelRegex = /#([a-z][a-z0-9._-]*)/gi;
+  let channelMatch;
+  while ((channelMatch = channelRegex.exec(setup)) !== null) {
+    const channel = channelMatch[1]?.replace(/[.,;:!?]+$/, "");
+    if (!channel) continue;
+    if (seenChannels.has(channel)) continue;
+    seenChannels.add(channel);
+    if (!extractedSlots["channel.name"]) extractedSlots["channel.name"] = channel;
+    entities.push({ kind: "channel", key: "name", value: channel });
+    const suffix = setup.slice(channelMatch.index + channelMatch[0].length, channelMatch.index + channelMatch[0].length + 32);
+    const visibility = suffix.match(/^\s*\((private|public)\)/i)?.[1]?.toLowerCase();
+    if (!visibility) continue;
+    extractedSlots[`channel.visibility.${channel}`] = visibility;
+  }
+  if (!extractedSlots["channel.name"]) {
+    const wordChannel = setup.match(/\bchannel\s+["']?([a-z0-9._-]+)["']?/i)?.[1];
+    if (wordChannel) {
+      extractedSlots["channel.name"] = wordChannel;
+      entities.push({ kind: "channel", key: "name", value: wordChannel });
+    }
+  }
+  const seenUsers = /* @__PURE__ */ new Set();
+  const dmUsers = [];
   const mentionRegex = /@([a-z0-9._-]+)/gi;
   let mentionMatch;
   while ((mentionMatch = mentionRegex.exec(setup)) !== null) {
@@ -9545,20 +10121,30 @@ function slackIntent(setup) {
     if (!mention) continue;
     const prevChar = mentionMatch.index > 0 ? setup[mentionMatch.index - 1] : "";
     if (prevChar && /[a-zA-Z0-9._%+-]/.test(prevChar)) continue;
-    dmUser = mention;
-    break;
-  }
+    if (seenUsers.has(mention)) continue;
+    seenUsers.add(mention);
+    dmUsers.push(mention);
+    entities.push({ kind: "user", key: "name", value: mention });
+  }
+  const backtickedUserRegex = /`@?([a-z0-9._-]{2,})`/gi;
+  let backtickedMatch;
+  while ((backtickedMatch = backtickedUserRegex.exec(setup)) !== null) {
+    const candidate = backtickedMatch[1];
+    if (!candidate) continue;
+    if (candidate.includes("@") || candidate.includes("/")) continue;
+    if (!/^[a-z][a-z0-9]*[._-][a-z][a-z0-9._-]*$/i.test(candidate)) continue;
+    const localContext = setup.slice(Math.max(0, backtickedMatch.index - 40), backtickedMatch.index).toLowerCase();
+    const likelyUserContext = /\b(user|username|display name|from|by|posts?|replies?|writes?)\b/.test(localContext);
+    if (!likelyUserContext) continue;
+    if (seenUsers.has(candidate)) continue;
+    seenUsers.add(candidate);
+    dmUsers.push(candidate);
+    entities.push({ kind: "user", key: "name", value: candidate });
+  }
+  const dmUser = dmUsers[0];
   const mentionsDm = /\bdirect message\b|\bdm\b/i.test(setup);
-  if (hashChannel || wordChannel) {
-    const channel = hashChannel ?? wordChannel;
-    if (channel) {
-      extractedSlots["channel.name"] = channel;
-      entities.push({ kind: "channel", key: "name", value: channel });
-    }
-  }
   if (dmUser) {
     extractedSlots["dm.user"] = dmUser;
-    entities.push({ kind: "user", key: "name", value: dmUser });
   } else if (mentionsDm && !extractedSlots["channel.name"]) {
     missingSlots.push({
       slot: "dm.user",
@@ -9576,7 +10162,7 @@ function slackIntent(setup) {
   const needsMessageTarget = /\b(message|reply|thread|react|history)\b/i.test(setup);
   if (needsMessageTarget) {
     const hasQuote = /"[^"\n]{1,2000}"/.test(setup);
-    const hasSender = /\b(from|by)\s+@?[a-z0-9._-]+\b/i.test(setup);
+    const hasSender = /\b(from|by)\s+`?@?[a-z0-9._-]+`?\b/i.test(setup);
     if (!hasQuote && !hasSender) {
       missingSlots.push({
         slot: "message.target",
@@ -9947,7 +10533,7 @@ function extractSeedIntent(twinName, setupDescription) {
 }
 // src/runner/routing.ts
-import { existsSync as existsSync10, readFileSync as readFileSync12 } from "fs";
+import { existsSync as existsSync9, readFileSync as readFileSync11 } from "fs";
 function isLoopbackUrl(rawUrl) {
   try {
     const parsed = new URL(rawUrl);
@@ -9962,10 +10548,10 @@ function isNonLocalEndpoint(rawUrl) {
 }
 function parseRemoteTwinUrlOverrides(path) {
   if (!path) return void 0;
-  if (!existsSync10(path)) {
+  if (!existsSync9(path)) {
     throw new Error(`Twin URL overrides file not found: ${path}`);
   }
-  const raw = readFileSync12(path, "utf-8");
+  const raw = readFileSync11(path, "utf-8");
   const parsed = JSON.parse(raw);
   const overrides = {};
   for (const [key, value] of Object.entries(parsed)) {
@@ -9987,10 +10573,10 @@ function parseRemoteTwinUrlOverrides(path) {
 }
 function parseApiBaseUrlOverrides(path) {
   if (!path) return void 0;
-  if (!existsSync10(path)) {
+  if (!existsSync9(path)) {
     throw new Error(`API base URL overrides file not found: ${path}`);
   }
-  const raw = readFileSync12(path, "utf-8");
+  const raw = readFileSync11(path, "utf-8");
   const parsed = JSON.parse(raw);
   const overrides = {};
   for (const [key, value] of Object.entries(parsed)) {
@@ -10076,6 +10662,23 @@ async function probeHttp(url, timeoutMs) {
 }
 // src/runner/orchestrator.ts
+function deepEqual2(a, b) {
+  if (a === b) return true;
+  if (a === null || b === null || typeof a !== typeof b) return false;
+  if (Array.isArray(a)) {
+    if (!Array.isArray(b) || a.length !== b.length) return false;
+    return a.every((item, i) => deepEqual2(item, b[i]));
+  }
+  if (typeof a === "object") {
+    const aObj = a;
+    const bObj = b;
+    const aKeys = Object.keys(aObj);
+    const bKeys = Object.keys(bObj);
+    if (aKeys.length !== bKeys.length) return false;
+    return aKeys.every((key) => key in bObj && deepEqual2(aObj[key], bObj[key]));
+  }
+  return false;
+}
 function computeStateDiff(before, after) {
   const diff = { added: {}, modified: {}, removed: {} };
   const allKeys = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
@@ -10088,7 +10691,7 @@ function computeStateDiff(before, after) {
       diff.removed[key] = Array.isArray(beforeVal) ? beforeVal.map(
         (item, idx) => item.id ?? item.number ?? -(idx + 1)
       ) : [-1];
-    } else if (JSON.stringify(beforeVal) !== JSON.stringify(afterVal)) {
+    } else if (!deepEqual2(beforeVal, afterVal)) {
       diff.modified[key] = Array.isArray(afterVal) ? afterVal : [afterVal];
     }
   }
@@ -10230,13 +10833,13 @@ function parseSqlSeed(sql) {
   return seed;
 }
 function loadSeedStateFromPath(seedRoot, seedName) {
-  const jsonPath = resolve5(seedRoot, `${seedName}.json`);
-  if (existsSync11(jsonPath)) {
-    return JSON.parse(readFileSync13(jsonPath, "utf-8"));
+  const jsonPath = resolve4(seedRoot, `${seedName}.json`);
+  if (existsSync10(jsonPath)) {
+    return JSON.parse(readFileSync12(jsonPath, "utf-8"));
   }
-  const sqlPath = resolve5(seedRoot, `${seedName}.sql`);
-  if (existsSync11(sqlPath)) {
-    return parseSqlSeed(readFileSync13(sqlPath, "utf-8"));
+  const sqlPath = resolve4(seedRoot, `${seedName}.sql`);
+  if (existsSync10(sqlPath)) {
+    return parseSqlSeed(readFileSync12(sqlPath, "utf-8"));
   }
   return null;
 }
@@ -10251,10 +10854,10 @@ function normalizeSeedState(raw) {
   return Object.keys(normalized).length > 0 ? normalized : null;
 }
 function loadBaseSeedFromDisk(twinName, seedName) {
-  const __dir = dirname3(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1"));
+  const __dir = dirname2(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, "$1"));
   const bundledSeedRoots = [
-    resolve5(__dir, "..", "twin-assets", twinName, "seeds"),
-    resolve5(__dir, "..", "..", "twin-assets", twinName, "seeds")
+    resolve4(__dir, "..", "twin-assets", twinName, "seeds"),
+    resolve4(__dir, "..", "..", "twin-assets", twinName, "seeds")
   ];
   for (const bundledSeedRoot of bundledSeedRoots) {
     const bundledSeed = loadSeedStateFromPath(bundledSeedRoot, seedName);
@@ -10263,8 +10866,8 @@ function loadBaseSeedFromDisk(twinName, seedName) {
     }
   }
   const monorepoSeedRoots = [
-    resolve5(__dir, "..", "..", "twins", twinName, "seeds"),
-    resolve5(__dir, "..", "..", "..", "twins", twinName, "seeds")
+    resolve4(__dir, "..", "..", "twins", twinName, "seeds"),
+    resolve4(__dir, "..", "..", "..", "twins", twinName, "seeds")
   ];
   for (const monorepoSeedRoot of monorepoSeedRoots) {
     const monorepoSeed = loadSeedStateFromPath(monorepoSeedRoot, seedName);
@@ -10273,9 +10876,9 @@ function loadBaseSeedFromDisk(twinName, seedName) {
     }
   }
   try {
-    const req = createRequire2(import.meta.url);
+    const req = createRequire(import.meta.url);
     const twinMain = req.resolve(`@archal/twin-${twinName}`);
-    const seedRoot = resolve5(dirname3(twinMain), "..", "seeds");
+    const seedRoot = resolve4(dirname2(twinMain), "..", "seeds");
     const seedState = loadSeedStateFromPath(seedRoot, seedName);
     if (seedState) {
       return seedState;
@@ -10319,7 +10922,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     const twinUrls = cloudTwinUrls;
     restConfigPath = join8(tmpdir3(), `${runId}-rest-config.json`);
     const restTmpPath = `${restConfigPath}.tmp`;
-    writeFileSync8(restTmpPath, JSON.stringify({ restEndpoints: twinUrls }, null, 2));
+    writeFileSync7(restTmpPath, JSON.stringify({ restEndpoints: twinUrls }, null, 2));
     renameSync2(restTmpPath, restConfigPath);
     const twinNames = seedSelections.map((s) => s.twinName);
     const mcpServers = {};
@@ -10330,7 +10933,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
     }
     mcpConfigPath = join8(tmpdir3(), `${runId}-mcp-config.json`);
     const mcpTmpPath = `${mcpConfigPath}.tmp`;
-    writeFileSync8(mcpTmpPath, JSON.stringify({ mcpServers }, null, 2));
+    writeFileSync7(mcpTmpPath, JSON.stringify({ mcpServers }, null, 2));
     renameSync2(mcpTmpPath, mcpConfigPath);
     const mcpServersJson = JSON.stringify(mcpServers);
     let effectiveRemoteTwinUrls;
@@ -10365,6 +10968,7 @@ ${baseTaskMessage}` : baseTaskMessage;
         ARCHAL_ENGINE_TASK: taskMessage
       }
     };
+    const agentBudgetMs = Math.max(timeoutSeconds * 1e3 - setupMs, 3e4);
     let agentResult = apiEngine ? await executeOpenClawRemote(
       apiEngine,
       scenario,
@@ -10377,7 +10981,7 @@ ${baseTaskMessage}` : baseTaskMessage;
       mcpConfigPath,
       mcpServersJson,
       twinNames,
-      timeoutSeconds * 1e3,
+      agentBudgetMs,
       { restConfigPath, twinUrls },
       apiBearerToken
     );
@@ -10527,7 +11131,7 @@ ${baseTaskMessage}` : baseTaskMessage;
     if (restConfigPath) {
       for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
         try {
-          if (existsSync11(file)) unlinkSync6(file);
+          if (existsSync10(file)) unlinkSync6(file);
         } catch {
         }
       }
@@ -10592,56 +11196,13 @@ function preflightCheck(scenario, apiKey, model, baseUrl, evaluatorProvider, see
     }
   }
   if (seedModel) {
-    const seedProvider = detectProvider(seedModel);
-    const seedMode = seedProviderMode ?? "direct";
-    const seedApiKey = resolveProviderApiKey(apiKey, seedProvider);
     const creds = getCredentials();
     const hasArchalAuth = Boolean(creds?.token);
-    if (seedProvider === "openai-compatible" && !baseUrl && seedMode === "direct") {
-      errors.push({
-        check: "seedGeneration.baseUrl",
-        message: `Seed model "${seedModel}" requires a base URL for the OpenAI-compatible endpoint`,
-        detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>"
-      });
-    }
-    if (seedMode === "archal" && !hasArchalAuth) {
+    if (!hasArchalAuth) {
       errors.push({
         check: "archal-auth-seed",
-        message: 'Seed provider is "archal" but no Archal credentials found',
-        detail: "Run `archal login` or set ARCHAL_TOKEN to authenticate with Archal backend"
-      });
-    }
-    if (seedMode === "direct" && !seedApiKey) {
-      const envVar = getProviderEnvVar(seedProvider);
-      errors.push({
-        check: envVar,
-        message: `Dynamic seed generation requires ${seedProvider} API access for model "${seedModel}"`,
-        detail: `Set via: export ${envVar}=<your-key> or archal config set evaluator.apiKey <key>`
-      });
-    }
-    if (seedMode === "auto" && !seedApiKey && !hasArchalAuth) {
-      const envVar = getProviderEnvVar(seedProvider);
-      errors.push({
-        check: envVar,
-        message: `Dynamic seed generation has no configured LLM path for model "${seedModel}"`,
-        detail: `Set via: archal login, export ARCHAL_TOKEN=<token>, or export ${envVar}=<your-key>`
-      });
-    }
-    if (seedApiKey && (seedMode === "direct" || seedMode === "auto")) {
-      const mismatch = validateKeyForProvider(seedApiKey, seedProvider);
-      if (mismatch) {
-        errors.push({
-          check: "seed-key-provider-mismatch",
-          message: mismatch,
-          warning: true
-        });
-      }
-    }
-    if ((seedMode === "archal" || seedMode === "auto") && !seedApiKey && hasArchalAuth && seedProvider !== "gemini") {
-      errors.push({
-        check: "seedGeneration.model",
-        message: `Seed model "${seedModel}" will not run directly without a ${getProviderEnvVar(seedProvider)} key`,
-        detail: "In this configuration, Archal backend uses its server-default Gemini model for seed generation.",
+        message: "Dynamic seed generation requires Archal authentication",
+        detail: "Run `archal login` or set ARCHAL_TOKEN to authenticate with Archal backend",
         warning: true
       });
     }
@@ -10735,6 +11296,19 @@ Run 'archal doctor' for a full system check.`
     }
     seedSelections = overrideSeedSelection(seedSelections, overrides);
   }
+  if (options.staticSeed) {
+    progress("Loading static seed (no LLM mutation)...");
+    for (const sel of seedSelections) {
+      const baseSeedData = loadBaseSeedFromDisk(sel.twinName, sel.seedName);
+      if (!baseSeedData || Object.keys(baseSeedData).length === 0) {
+        throw new Error(
+          `Could not load static seed "${sel.seedName}" for twin "${sel.twinName}" from disk. Ensure the seed file exists at twins/${sel.twinName}/seeds/${sel.seedName}.json`
+        );
+      }
+      sel.seedData = baseSeedData;
+      debug("Using static seed as-is", { twin: sel.twinName, seed: sel.seedName });
+    }
+  }
   const generationTargets = [];
   const extractedIntentByTwin = /* @__PURE__ */ new Map();
   const cachedSeedTwins = [];
@@ -10744,44 +11318,47 @@ Run 'archal doctor' for a full system check.`
     expectedBehavior: scenario.expectedBehavior,
     successCriteria: scenario.successCriteria.map((criterion) => `${criterion.type}: ${criterion.description}`)
   };
-  for (const sel of seedSelections) {
-    const intentResult = extractSeedIntent(sel.twinName, scenario.setup);
-    extractedIntentByTwin.set(sel.twinName, intentResult.intent ?? void 0);
-    if (intentResult.missingSlots.length === 0) {
-      generationTargets.push(sel);
-      continue;
-    }
-    let missingSlots = intentResult.missingSlots;
-    if (!options.noSeedCache) {
-      const negative = getNegativeSeed(sel.twinName, sel.seedName, scenario.setup, { cacheContext: seedPromptContext });
-      if (negative && negative.missingSlots.length > 0) {
-        missingSlots = negative.missingSlots;
+  if (!options.staticSeed) {
+    for (const sel of seedSelections) {
+      const intentResult = extractSeedIntent(sel.twinName, scenario.setup);
+      extractedIntentByTwin.set(sel.twinName, intentResult.intent ?? void 0);
+      if (intentResult.missingSlots.length === 0) {
+        generationTargets.push(sel);
+        continue;
       }
-    }
-    const details = formatMissingSlots(missingSlots);
-    const message = `Setup is ambiguous for twin "${sel.twinName}" and cannot safely generate a dynamic seed.
+      let missingSlots = intentResult.missingSlots;
+      if (!options.noSeedCache) {
+        const negative = getNegativeSeed(sel.twinName, sel.seedName, scenario.setup, { cacheContext: seedPromptContext });
+        if (negative && negative.missingSlots.length > 0) {
+          missingSlots = negative.missingSlots;
+        }
+      }
+      const details = formatMissingSlots(missingSlots);
+      const message = `Setup is ambiguous for twin "${sel.twinName}" and cannot safely generate a dynamic seed.
 Missing details:
 ${details}
 Pass --allow-ambiguous-seed to opt into best-effort generation.`;
-    if (!options.allowAmbiguousSeed) {
-      if (!options.noSeedCache) {
-        cacheNegativeSeed(sel.twinName, sel.seedName, scenario.setup, missingSlots, {
-          cacheContext: seedPromptContext
-        });
+      if (!options.allowAmbiguousSeed) {
+        if (!options.noSeedCache) {
+          cacheNegativeSeed(sel.twinName, sel.seedName, scenario.setup, missingSlots, {
+            cacheContext: seedPromptContext
+          });
+        }
+        throw new Error(message);
       }
-      throw new Error(message);
+      warn(message);
+      generationTargets.push(sel);
     }
-    warn(message);
-    generationTargets.push(sel);
   }
   if (generationTargets.length > 0) {
     progress("Generating dynamic seeds from setup description...");
     const dynamicConfig = {
-      apiKey: config.apiKey,
+      apiKey: "",
+      // Seed gen always routes through Archal backend
       model: config.seedModel,
       baseUrl: config.baseUrl,
       noCache: options.noSeedCache,
-      providerMode: config.seedProvider
+      providerMode: "archal"
     };
     let cloudSeedSnapshotByTwin = null;
     const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
@@ -10839,11 +11416,11 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
       `Seed count mismatch for ${sel.twinName}: ${mismatches.map((m) => `${m.subject}: expected ${m.expected}, got ${m.actual}`).join("; ")}`
     );
   }
-  const scenarioDir = dirname3(resolve5(options.scenarioPath));
+  const scenarioDir = dirname2(resolve4(options.scenarioPath));
   let projectConfigPath;
   for (const dir of [scenarioDir, process.cwd()]) {
-    const candidate = resolve5(dir, ".archal.json");
-    if (existsSync11(candidate)) {
+    const candidate = resolve4(dir, ".archal.json");
+    if (existsSync10(candidate)) {
       projectConfigPath = candidate;
       break;
     }
@@ -11036,6 +11613,8 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
     providerMode: config.evaluatorProvider
   };
   const runs = [];
+  let consecutiveInfraErrors = 0;
+  const EARLY_ABORT_THRESHOLD = 2;
   for (let i = 0; i < numRuns; i++) {
     const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
     const result = await executeSingleRun(
@@ -11056,6 +11635,15 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
     );
     runs.push(result);
     printRunProgress(i, numRuns, result.overallScore, result.error);
+    if (result.error) {
+      consecutiveInfraErrors++;
+      if (consecutiveInfraErrors >= EARLY_ABORT_THRESHOLD && i < numRuns - 1) {
+        warn(`${consecutiveInfraErrors} consecutive run errors \u2014 aborting remaining ${numRuns - i - 1} run(s) to avoid wasting quota.`);
+        break;
+      }
+    } else {
+      consecutiveInfraErrors = 0;
+    }
   }
   const runScores = runs.map((r) => r.overallScore);
   const satisfactionScore = aggregateSatisfaction(runScores);
@@ -11147,10 +11735,10 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
 // src/commands/scenario.ts
 import { Command } from "commander";
-import { existsSync as existsSync12, readdirSync as readdirSync4, writeFileSync as writeFileSync9, mkdirSync as mkdirSync5 } from "fs";
-import { resolve as resolve6, join as join9, extname, relative } from "path";
-import { fileURLToPath as fileURLToPath4 } from "url";
-var __dirname3 = fileURLToPath4(new URL(".", import.meta.url));
+import { existsSync as existsSync11, readdirSync as readdirSync4, writeFileSync as writeFileSync8, mkdirSync as mkdirSync5 } from "fs";
+import { resolve as resolve5, join as join9, extname, relative, basename as basename3 } from "path";
+import { fileURLToPath as fileURLToPath3 } from "url";
+var __dirname2 = fileURLToPath3(new URL(".", import.meta.url));
 var SCENARIO_TEMPLATE = `# {{NAME}}
 ## Setup
@@ -11183,33 +11771,33 @@ timeout: 120
 runs: 5
 `;
 var SCENARIO_DIR_CANDIDATES = [
-  resolve6("scenarios"),
-  resolve6("scenario"),
-  resolve6("test", "scenarios"),
-  resolve6("tests", "scenarios"),
-  resolve6(".archal", "scenarios")
+  resolve5("scenarios"),
+  resolve5("scenario"),
+  resolve5("test", "scenarios"),
+  resolve5("tests", "scenarios"),
+  resolve5(".archal", "scenarios")
 ];
 var BUNDLED_SCENARIOS_CANDIDATES = [
-  resolve6(__dirname3, "..", "scenarios"),
+  resolve5(__dirname2, "..", "scenarios"),
   // __dirname = cli/dist/
-  resolve6(__dirname3, "..", "..", "scenarios"),
+  resolve5(__dirname2, "..", "..", "scenarios"),
   // __dirname = cli/src/commands/
-  resolve6(__dirname3, "..", "..", "..", "scenarios")
+  resolve5(__dirname2, "..", "..", "..", "scenarios")
   // monorepo root from cli/dist/
 ];
 function findBundledScenariosDir() {
   for (const candidate of BUNDLED_SCENARIOS_CANDIDATES) {
-    if (existsSync12(candidate)) return candidate;
+    if (existsSync11(candidate)) return candidate;
   }
   return null;
 }
 function resolveBundledScenario(nameOrPath) {
-  if (existsSync12(nameOrPath)) return nameOrPath;
+  if (existsSync11(nameOrPath)) return nameOrPath;
   const needle = nameOrPath.endsWith(".md") ? nameOrPath : `${nameOrPath}.md`;
   for (const dir of BUNDLED_SCENARIOS_CANDIDATES) {
-    if (!existsSync12(dir)) continue;
+    if (!existsSync11(dir)) continue;
     const rootCandidate = join9(dir, needle);
-    if (existsSync12(rootCandidate)) return rootCandidate;
+    if (existsSync11(rootCandidate)) return rootCandidate;
     const allFiles = findScenarioFiles(dir);
     const match = allFiles.find((f) => f.endsWith(`/${needle}`) || f.endsWith(`\\${needle}`));
     if (match) return match;
@@ -11219,7 +11807,7 @@ function resolveBundledScenario(nameOrPath) {
 var CRITICAL_PREFIX2 = /^\s*(?:\[critical\]|critical:)\s*/i;
 function findScenarioFiles(dir) {
   const files = [];
-  if (!existsSync12(dir)) return files;
+  if (!existsSync11(dir)) return files;
   const entries = readdirSync4(dir, { withFileTypes: true });
   for (const entry of entries) {
     const fullPath = join9(dir, entry.name);
@@ -11233,17 +11821,17 @@ function findScenarioFiles(dir) {
 }
 function findLocalScenariosDir() {
   for (const candidate of SCENARIO_DIR_CANDIDATES) {
-    if (existsSync12(candidate)) {
+    if (existsSync11(candidate)) {
       return { dir: candidate, candidates: SCENARIO_DIR_CANDIDATES };
     }
   }
   return {
-    dir: resolve6("scenarios"),
+    dir: resolve5("scenarios"),
     candidates: SCENARIO_DIR_CANDIDATES
   };
 }
 function toDisplayPath(path) {
-  const rel = relative(resolve6("."), path);
+  const rel = relative(resolve5("."), path);
   if (!rel) return ".";
   return rel.startsWith("..") ? path : rel;
 }
@@ -11253,8 +11841,8 @@ function lintSeedability(setup, twins) {
     const intentResult = extractSeedIntent(twinName, setup);
     if (intentResult.missingSlots.length === 0) continue;
     const details = formatMissingSlots(intentResult.missingSlots);
-    errors.push(`[${twinName}] missing seedability details:
-${details}`);
+    errors.push({ message: `[${twinName}] missing seedability details:
+${details}` });
   }
   return errors;
 }
@@ -11265,24 +11853,25 @@ function lintDeterministicCriteria(criteria) {
     const description = criterion.description.replace(CRITICAL_PREFIX2, "").trim();
     const parsed = parseAssertion(description);
     if (!parsed) {
-      errors.push(
-        `[${criterion.id}] deterministic criterion is not parser-safe: "${criterion.description}". Rewrite as deterministic parser-compatible syntax or tag as [P].`
-      );
+      errors.push({
+        message: `[${criterion.id}] deterministic criterion will fall back to LLM evaluation at runtime: "${criterion.description}". Consider rewriting or tagging as [P] for clarity.`,
+        warning: true
+      });
       continue;
     }
     if (parsed.type === "channel_check" || parsed.type === "channel_content_check") {
       const channels = parsed.channel?.split(",").map((c) => c.trim()).filter(Boolean) ?? [];
       const suspicious = channels.filter((channel) => channel !== "*" && !/[a-z]/i.test(channel));
       if (suspicious.length > 0) {
-        errors.push(
-          `[${criterion.id}] deterministic channel extraction looks lossy (${suspicious.join(", ")}): "${criterion.description}". Use explicit Slack channel names (for example, #security) or retag as [P].`
-        );
+        errors.push({
+          message: `[${criterion.id}] deterministic channel extraction looks lossy (${suspicious.join(", ")}): "${criterion.description}". Use explicit Slack channel names (for example, #security) or retag as [P].`
+        });
       }
     }
     if ((parsed.type === "content_check" || parsed.type === "channel_content_check") && (!parsed.contentPatterns || parsed.contentPatterns.length === 0)) {
-      errors.push(
-        `[${criterion.id}] deterministic content check has no extracted content pattern: "${criterion.description}". Add explicit quoted text or tag as [P].`
-      );
+      errors.push({
+        message: `[${criterion.id}] deterministic content check has no extracted content pattern: "${criterion.description}". Add explicit quoted text or tag as [P].`
+      });
     }
   }
   return errors;
@@ -11292,11 +11881,11 @@ function createScenarioCommand() {
   cmd.command("list").description("List available scenarios").option("-d, --dir <directory>", "Scenario directory to search").option("--local", "Only show local scenarios (skip remote fetch)").option("--runnable-only", "Deprecated no-op (scenarios are no longer entitlement-filtered)").option("--tag <tag>", "Filter scenarios by tag").option("--difficulty <level>", "Filter by difficulty (easy, medium, hard)").option("--json", "Output as JSON").action(async (opts) => {
     const tagFilter = opts.tag?.toLowerCase();
     const difficultyFilter = opts.difficulty?.toLowerCase();
-    const headers = ["Scenario", "Source", "Criteria", "Twins", "Tags", "Difficulty"];
+    const headers = ["Scenario", "Slug", "Twins"];
     const rows = [];
-    const localResolution = opts.dir ? { dir: resolve6(opts.dir), candidates: [resolve6(opts.dir)] } : findLocalScenariosDir();
+    const localResolution = opts.dir ? { dir: resolve5(opts.dir), candidates: [resolve5(opts.dir)] } : findLocalScenariosDir();
     const localDir = localResolution.dir;
-    if (existsSync12(localDir)) {
+    if (existsSync11(localDir)) {
       const localFiles = findScenarioFiles(localDir);
       for (const file of localFiles) {
         try {
@@ -11306,19 +11895,15 @@ function createScenarioCommand() {
             if (!scenarioTags.includes(tagFilter)) continue;
           }
           if (difficultyFilter && (scenario.config.difficulty ?? "") !== difficultyFilter) continue;
-          const relativePath = relative(resolve6("."), file);
+          const slug = basename3(file, ".md");
           rows.push([
             scenario.title,
-            relativePath,
-            String(scenario.successCriteria.length),
-            scenario.config.twins.join(", ") || "(auto)",
-            scenario.config.tags.length > 0 ? scenario.config.tags.join(", ") : "-",
-            scenario.config.difficulty ?? "-"
+            slug,
+            scenario.config.twins.join(", ") || "(auto)"
           ]);
-        } catch (err) {
-          const message = err instanceof Error ? err.message : String(err);
-          const relativePath = relative(resolve6("."), file);
-          rows.push([`(parse error)`, relativePath, "-", message, "-", "-"]);
+        } catch {
+          const slug = basename3(file, ".md");
+          rows.push([`(parse error)`, slug, "-"]);
         }
       }
     } else if (opts.dir) {
@@ -11343,14 +11928,11 @@ function createScenarioCommand() {
               if (!scenarioTags.includes(tagFilter)) continue;
             }
             if (difficultyFilter && (scenario.config.difficulty ?? "") !== difficultyFilter) continue;
-            const fileName = relative(bundledDir, file);
+            const slug = basename3(file, ".md");
             rows.push([
               scenario.title,
-              `(built-in) ${fileName}`,
-              String(scenario.successCriteria.length),
-              scenario.config.twins.join(", ") || "(auto)",
-              scenario.config.tags.length > 0 ? scenario.config.tags.join(", ") : "-",
-              scenario.config.difficulty ?? "-"
+              slug,
+              scenario.config.twins.join(", ") || "(auto)"
             ]);
           } catch {
           }
@@ -11366,11 +11948,8 @@ function createScenarioCommand() {
     if (opts.json) {
       const jsonRows = rows.map((r) => ({
         scenario: r[0],
-        source: r[1],
-        criteria: r[2],
-        twins: r[3],
-        tags: r[4],
-        difficulty: r[5]
+        slug: r[1],
+        twins: r[2]
       }));
       process.stdout.write(JSON.stringify(jsonRows, null, 2) + "\n");
       return;
@@ -11380,8 +11959,8 @@ function createScenarioCommand() {
 Found ${rows.length} scenario(s)`);
   });
   cmd.command("validate").description("Parse and validate a scenario file").argument("<file>", "Path to scenario markdown file").action((file) => {
-    const filePath = resolve6(file);
-    if (!existsSync12(filePath)) {
+    const filePath = resolve5(file);
+    if (!existsSync11(filePath)) {
       error(`File not found: ${filePath}`);
       process.exit(1);
     }
@@ -11429,48 +12008,61 @@ Found ${rows.length} scenario(s)`);
   });
   cmd.command("create").description("Scaffold a new scenario file").argument("<name>", "Scenario name (will be used as filename)").option("-d, --dir <directory>", "Directory to create scenario in").option("--twins <twins>", "Twins to configure, comma-separated (github, slack, etc.)", "github").option("--twin <twin>", "Alias for --twins").action((name, opts) => {
     if (opts.twin) opts.twins = opts.twin;
-    const scenariosDir = opts.dir ? resolve6(opts.dir) : findLocalScenariosDir().dir;
-    if (!existsSync12(scenariosDir)) {
+    const scenariosDir = opts.dir ? resolve5(opts.dir) : findLocalScenariosDir().dir;
+    if (!existsSync11(scenariosDir)) {
       mkdirSync5(scenariosDir, { recursive: true });
       info(`Created scenarios directory: ${scenariosDir}`);
     }
     const fileName = name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "") + ".md";
     const filePath = join9(scenariosDir, fileName);
-    if (existsSync12(filePath)) {
+    if (existsSync11(filePath)) {
       error(`Scenario file already exists: ${filePath}`);
       process.exit(1);
     }
     const displayName = name.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
     const content = SCENARIO_TEMPLATE.replace("{{NAME}}", displayName).replace("twins: github", `twins: ${opts.twins}`);
-    writeFileSync9(filePath, content, "utf-8");
+    writeFileSync8(filePath, content, "utf-8");
     success(`Created scenario: ${filePath}`);
     info(`Edit the file to define your test scenario, then run:`);
     info(`  archal scenario validate ${filePath}`);
     info(`  archal run ${filePath}`);
   });
   cmd.command("lint").description("Lint scenario quality checks before running").argument("<file>", "Path to scenario markdown file").option("--seedability", "Validate setup details needed for dynamic seed generation").action((file, opts) => {
-    const filePath = resolve6(file);
-    if (!existsSync12(filePath)) {
+    const filePath = resolve5(file);
+    if (!existsSync11(filePath)) {
       error(`File not found: ${filePath}`);
       process.exit(1);
     }
     try {
       const scenario = parseScenarioFile(filePath);
-      const errors = validateScenario(scenario);
-      const lintErrors = [...errors];
-      lintErrors.push(...lintDeterministicCriteria(scenario.successCriteria));
+      const validationErrors = validateScenario(scenario);
+      const lintResults = validationErrors.map((e) => ({ message: e }));
+      lintResults.push(...lintDeterministicCriteria(scenario.successCriteria));
       if (opts.seedability) {
-        lintErrors.push(...lintSeedability(scenario.setup, scenario.config.twins));
+        lintResults.push(...lintSeedability(scenario.setup, scenario.config.twins));
       }
-      if (lintErrors.length === 0) {
+      const hardErrors = lintResults.filter((r) => !r.warning);
+      const warnings = lintResults.filter((r) => r.warning);
+      if (hardErrors.length === 0 && warnings.length === 0) {
         success("Scenario lint passed");
         return;
       }
-      fail(`Scenario has ${lintErrors.length} lint error(s):`);
-      for (const lintError of lintErrors) {
-        error(`  - ${lintError}`);
+      if (warnings.length > 0) {
+        warn(`${warnings.length} warning(s):`);
+        for (const w of warnings) {
+          warn(`  - ${w.message}`);
+        }
+      }
+      if (hardErrors.length > 0) {
+        fail(`Scenario has ${hardErrors.length} lint error(s):`);
+        for (const e of hardErrors) {
+          error(`  - ${e.message}`);
+        }
+        process.exit(1);
+      }
+      if (warnings.length > 0) {
+        success("Scenario lint passed (with warnings)");
       }
-      process.exit(1);
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
       error(`Failed to parse scenario: ${message}`);
@@ -11510,8 +12102,25 @@ async function runShutdownHooks(signal) {
 }
 // src/commands/run.ts
+var KNOWN_KEY_PREFIXES = ["AIza", "sk-ant-", "sk-"];
+function warnIfKeyLooksInvalid(key, flagName) {
+  if (key.length < 10) {
+    process.stderr.write(`Warning: ${flagName} value looks too short (${key.length} chars). Verify it is a valid API key.
+`);
+    return;
+  }
+  if (!KNOWN_KEY_PREFIXES.some((p) => key.startsWith(p))) {
+    if (key.length < 20) {
+      process.stderr.write(`Warning: ${flagName} value is unusually short (${key.length} chars). Verify it is a valid API key.
+`);
+    }
+  }
+}
 function createRunCommand() {
-  const cmd = new Command2("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path or name of a scenario (e.g. close-stale-issues)").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "0").option("--api-key <key>", "API key for the model provider (overrides env var and config)").option("--engine-endpoint <url>", "Agent gateway URL (your agent connects here to receive tasks and call tools)").option("--engine-token <token>", "Bearer token for API engine auth").option(
+  const cmd = new Command2("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path or name of a scenario (e.g. close-stale-issues)").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "180").option(
+    "-m, --model <model>",
+    "Evaluator model for probabilistic criteria (also defaults local engine model when unset)"
+  ).option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "0").option("--api-key <key>", "API key for the model provider (overrides env var and config)").option("--engine-endpoint <url>", "Agent gateway URL (your agent connects here to receive tasks and call tools)").option("--engine-key <key>", "API key for the agent engine (overrides config engine.apiKey and ARCHAL_ENGINE_API_KEY)").option("--engine-token <token>", "Bearer token for API engine auth").option(
     "--engine-model <model>",
     "Model to use (e.g. gemini-2.0-flash, claude-sonnet-4-20250514)"
   ).option("--engine-twin-urls <path>", "Path to JSON mapping twin names to base URLs (auto-generated in most cases)").option("--engine-timeout <seconds>", "Timeout for API engine HTTP call per run (defaults to run timeout)").option(
@@ -11520,7 +12129,7 @@ function createRunCommand() {
   ).option(
     "--harness-dir <path>",
     "Local agent execution directory (archal-harness.json is optional)"
-  ).addOption(new Option("--openclaw-url <url>", "Deprecated alias for --engine-endpoint").hideHelp()).addOption(new Option("--openclaw-token <token>", "Deprecated alias for --engine-token").hideHelp()).addOption(new Option("--openclaw-agent <id>", "Deprecated alias for --engine-model").hideHelp()).addOption(new Option("--openclaw-twin-urls <path>", "Deprecated alias for --engine-twin-urls").hideHelp()).addOption(new Option("--openclaw-timeout <seconds>", "Deprecated alias for --engine-timeout").hideHelp()).option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("--no-failure-analysis", "Skip LLM failure analysis on imperfect scores").option(
+  ).addOption(new Option("--openclaw-url <url>", "Deprecated alias for --engine-endpoint").hideHelp()).addOption(new Option("--openclaw-token <token>", "Deprecated alias for --engine-token").hideHelp()).addOption(new Option("--openclaw-agent <id>", "Deprecated alias for --engine-model").hideHelp()).addOption(new Option("--openclaw-twin-urls <path>", "Deprecated alias for --engine-twin-urls").hideHelp()).addOption(new Option("--openclaw-timeout <seconds>", "Deprecated alias for --engine-timeout").hideHelp()).option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--seed-cache", "Enable seed cache for dynamic generation (off by default)").option("--static-seed", "Use seed files as-is without LLM mutation (uses --seed name or auto-selected per twin)").option("--no-failure-analysis", "Skip LLM failure analysis on imperfect scores").option(
     "--allow-ambiguous-seed",
     "Allow dynamic seed generation when setup is underspecified"
   ).option("--tag <tag>", "Only run if scenario has this tag (exit 0 if not)").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
@@ -11530,8 +12139,8 @@ function createRunCommand() {
     if (opts.verbose) {
       configureLogger({ verbose: true, level: "debug" });
     }
-    let scenarioPath = resolve7(scenarioArg);
-    if (!existsSync13(scenarioPath)) {
+    let scenarioPath = resolve6(scenarioArg);
+    if (!existsSync12(scenarioPath)) {
       const bundled = resolveBundledScenario(scenarioArg);
       if (bundled) {
         scenarioPath = bundled;
@@ -11547,7 +12156,7 @@ function createRunCommand() {
 `);
       process.exit(1);
     }
-    if (!readFileSync14(scenarioPath, "utf-8").trim()) {
+    if (!readFileSync13(scenarioPath, "utf-8").trim()) {
       process.stderr.write(`Error: Scenario file is empty: ${scenarioPath}
 `);
       process.exit(1);
@@ -11615,7 +12224,7 @@ function createRunCommand() {
       }
       sessionCleanupPromise = (async () => {
         const cleanupGeneratedSessionMaps = () => {
-          if (generatedTwinUrlMapPath && existsSync13(generatedTwinUrlMapPath)) {
+          if (generatedTwinUrlMapPath && existsSync12(generatedTwinUrlMapPath)) {
             try {
               unlinkSync7(generatedTwinUrlMapPath);
             } catch (error2) {
@@ -11624,7 +12233,7 @@ function createRunCommand() {
 `);
             }
           }
-          if (generatedApiBaseUrlMapPath && existsSync13(generatedApiBaseUrlMapPath)) {
+          if (generatedApiBaseUrlMapPath && existsSync12(generatedApiBaseUrlMapPath)) {
             try {
               unlinkSync7(generatedApiBaseUrlMapPath);
             } catch (error2) {
@@ -11695,8 +12304,8 @@ function createRunCommand() {
           try {
             const evidenceResult = await getSessionEvidence(credentials.token, sessionId);
             if (evidenceResult.ok) {
-              mkdirSync6(dirname4(evidenceOutputPath), { recursive: true });
-              writeFileSync10(
+              mkdirSync6(dirname3(evidenceOutputPath), { recursive: true });
+              writeFileSync9(
                 evidenceOutputPath,
                 JSON.stringify(
                   {
@@ -11795,8 +12404,9 @@ function createRunCommand() {
       }
     }
     if (opts.apiKey?.trim()) {
+      warnIfKeyLooksInvalid(opts.apiKey.trim(), "--api-key");
       process.env["ARCHAL_ENGINE_API_KEY"] = opts.apiKey.trim();
-      if (!opts.engineModel && !process.env["ARCHAL_ENGINE_MODEL"]) {
+      if (!opts.engineModel && !process.env["ARCHAL_ENGINE_MODEL"] && !opts.model?.trim()) {
         const key = opts.apiKey.trim();
         if (key.startsWith("AIza")) {
           opts.engineModel = "gemini-2.0-flash";
@@ -11811,6 +12421,24 @@ function createRunCommand() {
         }
       }
     }
+    if (opts.engineKey?.trim()) {
+      warnIfKeyLooksInvalid(opts.engineKey.trim(), "--engine-key");
+      process.env["ARCHAL_ENGINE_API_KEY"] = opts.engineKey.trim();
+      if (!opts.engineModel && !process.env["ARCHAL_ENGINE_MODEL"]) {
+        const key = opts.engineKey.trim();
+        if (key.startsWith("AIza")) {
+          opts.engineModel = "gemini-2.0-flash";
+        } else if (key.startsWith("sk-ant-")) {
+          opts.engineModel = "claude-sonnet-4-20250514";
+        } else if (key.startsWith("sk-")) {
+          opts.engineModel = "gpt-4o";
+        } else {
+          process.stderr.write(
+            "Warning: Could not detect provider from --engine-key prefix. Pass --engine-model explicitly (e.g. --engine-model gemini-2.0-flash).\n"
+          );
+        }
+      }
+    }
     if (!opts.harnessDir || !process.env["ARCHAL_ENGINE_API_KEY"]) {
       const userConfig = loadConfig();
       if (!opts.harnessDir && !opts.engineEndpoint && !opts.openclawUrl && !process.env["ARCHAL_ENGINE_ENDPOINT"] && !process.env["OPENCLAW_URL"] && !process.env["ARCHAL_HARNESS_DIR"]) {
@@ -11824,6 +12452,7 @@ function createRunCommand() {
         process.env["ARCHAL_ENGINE_API_KEY"] = userConfig.engineApiKey;
       }
     }
+    inferEngineModelFromEvaluatorModel(opts);
     let engine;
     try {
       engine = resolveEngineConfig(opts, timeout);
@@ -11914,20 +12543,20 @@ function createRunCommand() {
           cloudTwinUrls = endpointRoots;
         }
         if (!runFailureMessage && engine.mode === "api" && !engine.twinUrlsPath) {
-          generatedTwinUrlMapPath = resolve7(
+          generatedTwinUrlMapPath = resolve6(
             `.archal-session-${backendSessionId}-engine-twin-urls.json`
           );
-          writeFileSync10(
+          writeFileSync9(
             generatedTwinUrlMapPath,
             JSON.stringify(endpointRoots, null, 2) + "\n",
             "utf-8"
           );
         }
         if (!runFailureMessage && !opts.apiBaseUrls && apiBaseUrls && Object.keys(apiBaseUrls).length > 0) {
-          generatedApiBaseUrlMapPath = resolve7(
+          generatedApiBaseUrlMapPath = resolve6(
             `.archal-session-${backendSessionId}-api-base-urls.json`
           );
-          writeFileSync10(
+          writeFileSync9(
             generatedApiBaseUrlMapPath,
             JSON.stringify(apiBaseUrls, null, 2) + "\n",
             "utf-8"
@@ -11941,15 +12570,23 @@ function createRunCommand() {
             return Number.isNaN(parsed) || parsed <= 0 ? 3e5 : parsed;
           })();
           const SESSION_READY_TIMEOUT_MS = Math.max(12e4, configuredReadyTimeoutMs);
-          const SESSION_POLL_INTERVAL_MS = 3e3;
-          const STATUS_READY_GRACE_MS = 15e3;
+          const SESSION_POLL_INTERVAL_MS = 2e3;
+          const STATUS_READY_GRACE_MS = 5e3;
           const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
           let sessionReady = false;
           let lastPollIssue;
           let statusReadySinceMs = null;
           const isRetryablePollFailure = (result) => result.offline || typeof result.status === "number" && result.status >= 500;
-          const sleepForPollInterval = async () => new Promise((resolve13) => setTimeout(resolve13, SESSION_POLL_INTERVAL_MS));
+          const sleepForPollInterval = async () => new Promise((resolve12) => setTimeout(resolve12, SESSION_POLL_INTERVAL_MS));
+          process.stderr.write("Starting cloud session...\n");
+          let pollCount = 0;
           while (Date.now() < readyDeadline) {
+            pollCount++;
+            if (pollCount % 4 === 0) {
+              const elapsedSec = Math.round((Date.now() - (readyDeadline - SESSION_READY_TIMEOUT_MS)) / 1e3);
+              process.stderr.write(`  Still waiting for session to be ready (${elapsedSec}s)...
+`);
+            }
             const freshCreds = getCredentials();
             if (freshCreds) credentials = freshCreds;
             let statusResult;
@@ -12004,8 +12641,8 @@ function createRunCommand() {
               }
               const readyForMs = Date.now() - statusReadySinceMs;
               if (readyForMs >= STATUS_READY_GRACE_MS) {
-                warn(
-                  `Session ${backendSessionId} reported status=ready while health endpoint remained starting for ${readyForMs}ms; proceeding.`
+                debug(
+                  `Session ${backendSessionId} proceeded after health endpoint warmup (${readyForMs}ms).`
                 );
                 sessionReady = true;
                 break;
@@ -12016,6 +12653,11 @@ function createRunCommand() {
             lastPollIssue = `session still starting (status=${status}, health=${healthAlive ? "alive" : "starting"})`;
             await sleepForPollInterval();
           }
+          if (sessionReady) {
+            const warmupSec = Math.round((Date.now() - (readyDeadline - SESSION_READY_TIMEOUT_MS)) / 1e3);
+            process.stderr.write(`Cloud session ready (${warmupSec}s).
+`);
+          }
           if (!sessionReady && !runFailureMessage) {
             runFailureMessage = lastPollIssue ? `session timed out waiting for twins to become ready (${lastPollIssue})` : "session timed out waiting for twins to become ready";
           }
@@ -12068,6 +12710,8 @@ function createRunCommand() {
           cloudTwinUrls,
           hostedSessionId: backendSessionId,
           noSeedCache: !opts.seedCache,
+          // --seed-cache is opt-in; absent = no cache
+          staticSeed: opts.staticSeed,
           noFailureAnalysis: !opts.failureAnalysis,
           allowAmbiguousSeed: !!opts.allowAmbiguousSeed,
           apiBearerToken: credentials.token,
@@ -12149,6 +12793,33 @@ function resolveEngineConfig(opts, runTimeoutSeconds) {
     deprecatedAliasesUsed
   };
 }
+function inferEngineModelFromEvaluatorModel(opts) {
+  const evaluatorModel = firstNonEmpty(opts.model);
+  if (!evaluatorModel) {
+    return;
+  }
+  const explicitOpenClawAgent = firstNonEmpty(opts.openclawAgent, process.env["OPENCLAW_AGENT_ID"]);
+  const hasExplicitEngineModel = Boolean(
+    firstNonEmpty(
+      opts.engineModel,
+      process.env["ARCHAL_ENGINE_MODEL"],
+      resolveOpenClawModel(explicitOpenClawAgent)
+    )
+  );
+  if (hasExplicitEngineModel) {
+    return;
+  }
+  let mode;
+  try {
+    mode = resolveEngineMode(opts);
+  } catch {
+    return;
+  }
+  if (mode !== "local") {
+    return;
+  }
+  opts.engineModel = evaluatorModel;
+}
 function resolveEngineMode(opts) {
   if (firstNonEmpty(opts.engineEndpoint, opts.openclawUrl)) {
     return "api";
@@ -12393,8 +13064,8 @@ function buildEvidenceReport(report) {
 // src/commands/init.ts
 import { Command as Command3 } from "commander";
-import { existsSync as existsSync14, mkdirSync as mkdirSync7, writeFileSync as writeFileSync11 } from "fs";
-import { join as join10, resolve as resolve8 } from "path";
+import { existsSync as existsSync13, mkdirSync as mkdirSync7, writeFileSync as writeFileSync10 } from "fs";
+import { join as join10, resolve as resolve7 } from "path";
 var SAMPLE_SCENARIO = `# Urgent Merge Pressure
 ## Setup
@@ -12471,6 +13142,7 @@ async function callTool(baseUrl: string, name: string, args: Record<string, unkn
     method: 'POST',
     headers: getAuthHeaders(),
     body: JSON.stringify({ name, arguments: args }),
+    signal: AbortSignal.timeout(30_000),
   });
   const text = await res.text();
   if (!res.ok) throw new Error(\`\${name} failed (HTTP \${res.status}): \${text}\`);
@@ -12481,7 +13153,7 @@ async function main(): Promise<void> {
   const baseUrl = getTwinUrl();
   // 1. Discover available tools
-  const toolsRes = await fetch(\`\${baseUrl}/tools\`, { headers: getAuthHeaders() });
+  const toolsRes = await fetch(\`\${baseUrl}/tools\`, { headers: getAuthHeaders(), signal: AbortSignal.timeout(10_000) });
   const tools: Tool[] = await toolsRes.json();
   console.error(\`Connected: \${tools.length} tools available\`);
@@ -12525,8 +13197,8 @@ var SAMPLE_PACKAGE_JSON = `{
 }
 `;
 function writeIfMissing(filePath, content) {
-  if (!existsSync14(filePath)) {
-    writeFileSync11(filePath, content);
+  if (!existsSync13(filePath)) {
+    writeFileSync10(filePath, content);
     info(`Created ${filePath}`);
   } else {
     info(`Skipped ${filePath} (already exists)`);
@@ -12534,8 +13206,8 @@ function writeIfMissing(filePath, content) {
 }
 function createInitCommand() {
   const cmd = new Command3("init").description("Initialize an Archal test directory with sample scenario and agent").argument("[directory]", "Directory to initialize", "archal").action((directory) => {
-    const targetDir = resolve8(directory);
-    if (existsSync14(targetDir)) {
+    const targetDir = resolve7(directory);
+    if (existsSync13(targetDir)) {
       warn(`Directory already exists: ${targetDir}`);
       warn("Skipping files that already exist.");
     } else {
@@ -12560,33 +13232,33 @@ function createInitCommand() {
 // src/commands/twins.ts
 import { Command as Command4 } from "commander";
-import { existsSync as existsSync15 } from "fs";
-import { createRequire as createRequire3 } from "module";
-import { dirname as dirname5, resolve as resolve9 } from "path";
-import { fileURLToPath as fileURLToPath5 } from "url";
-var __dirname4 = fileURLToPath5(new URL(".", import.meta.url));
+import { existsSync as existsSync14 } from "fs";
+import { createRequire as createRequire2 } from "module";
+import { dirname as dirname4, resolve as resolve8 } from "path";
+import { fileURLToPath as fileURLToPath4 } from "url";
+var __dirname3 = fileURLToPath4(new URL(".", import.meta.url));
 function hasFidelityBaseline(twinName) {
   for (const base of [
-    resolve9(__dirname4, "..", "twin-assets", twinName, "fidelity.json"),
+    resolve8(__dirname3, "..", "twin-assets", twinName, "fidelity.json"),
     // __dirname = cli/dist/
-    resolve9(__dirname4, "..", "..", "twin-assets", twinName, "fidelity.json")
+    resolve8(__dirname3, "..", "..", "twin-assets", twinName, "fidelity.json")
     // __dirname = cli/src/commands/
   ]) {
-    if (existsSync15(base)) return true;
+    if (existsSync14(base)) return true;
   }
   for (const base of [
-    resolve9(__dirname4, "..", "..", "twins", twinName, "fidelity.json"),
+    resolve8(__dirname3, "..", "..", "twins", twinName, "fidelity.json"),
     // __dirname = cli/dist/
-    resolve9(__dirname4, "..", "..", "..", "twins", twinName, "fidelity.json")
+    resolve8(__dirname3, "..", "..", "..", "twins", twinName, "fidelity.json")
     // __dirname = cli/src/commands/
   ]) {
-    if (existsSync15(base)) return true;
+    if (existsSync14(base)) return true;
   }
   try {
-    const req = createRequire3(import.meta.url);
+    const req = createRequire2(import.meta.url);
     const twinMain = req.resolve(`@archal/twin-${twinName}`);
-    const candidate = resolve9(dirname5(twinMain), "..", "fidelity.json");
-    if (existsSync15(candidate)) return true;
+    const candidate = resolve8(dirname4(twinMain), "..", "fidelity.json");
+    if (existsSync14(candidate)) return true;
   } catch {
   }
   return false;
@@ -12669,8 +13341,8 @@ function createTwinsCommand() {
 }
 // src/commands/trace.ts
-import { writeFileSync as writeFileSync12, existsSync as existsSync16 } from "fs";
-import { resolve as resolve10 } from "path";
+import { writeFileSync as writeFileSync11, existsSync as existsSync15 } from "fs";
+import { resolve as resolve9 } from "path";
 import { createInterface as createInterface2 } from "readline";
 import { Command as Command5 } from "commander";
@@ -12809,6 +13481,39 @@ function formatTimestamp2(iso) {
     return iso;
   }
 }
+function parseDateArg(input) {
+  const trimmed = input.trim().toLowerCase();
+  const relMatch = /^(\d+)\s*(?:d(?:ays?)?)\s*(?:ago)?$/.exec(trimmed);
+  if (relMatch) {
+    const d = /* @__PURE__ */ new Date();
+    d.setDate(d.getDate() - parseInt(relMatch[1], 10));
+    return d.toISOString();
+  }
+  const weekMatch = /^(\d+)\s*w(?:eeks?)?\s*(?:ago)?$/.exec(trimmed);
+  if (weekMatch) {
+    const d = /* @__PURE__ */ new Date();
+    d.setDate(d.getDate() - parseInt(weekMatch[1], 10) * 7);
+    return d.toISOString();
+  }
+  const hourMatch = /^(\d+)\s*h(?:ours?)?\s*(?:ago)?$/.exec(trimmed);
+  if (hourMatch) {
+    const d = /* @__PURE__ */ new Date();
+    d.setHours(d.getHours() - parseInt(hourMatch[1], 10));
+    return d.toISOString();
+  }
+  if (trimmed === "today") {
+    const d = /* @__PURE__ */ new Date();
+    d.setHours(0, 0, 0, 0);
+    return d.toISOString();
+  }
+  const parsed = new Date(input);
+  if (isNaN(parsed.getTime())) {
+    process.stderr.write(`Warning: Could not parse date "${input}", using all traces.
+`);
+    return (/* @__PURE__ */ new Date(0)).toISOString();
+  }
+  return parsed.toISOString();
+}
 function formatBytes(bytes) {
   if (bytes < 1024) return `${bytes} B`;
   if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
@@ -12839,10 +13544,10 @@ var TRACE_HEADERS = ["ID", "Scenario", "Score", "Runs", "Entries", "Timestamp"];
 function confirmPrompt(message) {
   if (!process.stdin.isTTY) return Promise.resolve(false);
   const rl = createInterface2({ input: process.stdin, output: process.stderr });
-  return new Promise((resolve13) => {
+  return new Promise((resolve12) => {
     rl.question(`${message} [y/N] `, (answer) => {
       rl.close();
-      resolve13(answer.trim().toLowerCase() === "y");
+      resolve12(answer.trim().toLowerCase() === "y");
     });
   });
 }
@@ -13014,15 +13719,15 @@ ${traces.length} trace(s) found`);
       output = JSON.stringify(anonymized, null, 2);
     }
     if (opts.output) {
-      const outPath = resolve10(opts.output);
-      if (existsSync16(outPath)) {
+      const outPath = resolve9(opts.output);
+      if (existsSync15(outPath)) {
         const confirmed = await confirmPrompt(`File already exists: ${outPath}. Overwrite?`);
         if (!confirmed) {
           info("Aborted.");
           return;
         }
       }
-      writeFileSync12(outPath, output, "utf-8");
+      writeFileSync11(outPath, output, "utf-8");
       info(`Trace exported to: ${outPath}`);
     } else {
       process.stdout.write(output + "\n");
@@ -13051,8 +13756,9 @@ ${traces.length} trace(s) found`);
       process.exit(1);
     }
   });
-  cmd.command("stats").description("Show aggregate statistics across all traces").option("--json", "Output as JSON").action((opts) => {
-    const stats = getTraceStats();
+  cmd.command("stats").description("Show aggregate statistics across all traces").option("--json", "Output as JSON").option("--since <date>", 'Only include traces after this date (e.g. "2026-02-27", "1 day ago")').action((opts) => {
+    const sinceOpt = opts.since ? parseDateArg(opts.since) : void 0;
+    const stats = getTraceStats(sinceOpt ? { since: sinceOpt } : void 0);
     if (stats.totalTraces === 0) {
       info("No traces found. Run a scenario first: archal run <scenario.md>");
       return;
@@ -13094,11 +13800,24 @@ ${traces.length} trace(s) found`);
       table(["Twin", "Tool Calls"], twinEntries.map(([name, count]) => [name, String(count)]));
     }
   });
+  cmd.command("prune").description("Delete traces older than a given date").argument("<before>", 'Delete traces before this date (e.g. "2026-02-26", "7d", "1 week ago")').option("-y, --yes", "Skip confirmation prompt").action(async (before, opts) => {
+    const beforeIso = parseDateArg(before);
+    const beforeDisplay = formatTimestamp2(beforeIso);
+    if (!opts.yes) {
+      const confirmed = await confirmPrompt(`Delete all traces before ${beforeDisplay}?`);
+      if (!confirmed) {
+        info("Aborted.");
+        return;
+      }
+    }
+    const count = pruneTracesBefore(beforeIso);
+    info(`Deleted ${count} trace(s) older than ${beforeDisplay}`);
+  });
   return cmd;
 }
 // src/commands/config.ts
-import { existsSync as existsSync17, unlinkSync as unlinkSync8 } from "fs";
+import { existsSync as existsSync16, unlinkSync as unlinkSync8 } from "fs";
 import { Command as Command6 } from "commander";
 function createConfigCommand() {
   const cmd = new Command6("config").description("Manage Archal configuration");
@@ -13186,12 +13905,12 @@ function createConfigCommand() {
   });
   cmd.command("init").description("Create default configuration file").option("--force", "Overwrite existing config").action((opts) => {
     const configPath = getConfigPath();
-    if (!opts.force && existsSync17(configPath)) {
+    if (!opts.force && existsSync16(configPath)) {
       info(`Config file already exists at ${configPath}`);
       info("To overwrite, run: archal config init --force");
       return;
     }
-    if (opts.force && existsSync17(configPath)) {
+    if (opts.force && existsSync16(configPath)) {
       unlinkSync8(configPath);
     }
     try {
@@ -13230,11 +13949,11 @@ function printConfigSection(name, values) {
 // src/commands/doctor.ts
 import { Command as Command7 } from "commander";
-import { existsSync as existsSync18, readFileSync as readFileSync15 } from "fs";
-import { createRequire as createRequire4 } from "module";
-import { dirname as dirname6, resolve as resolve11 } from "path";
-import { fileURLToPath as fileURLToPath6 } from "url";
-var __dirname5 = fileURLToPath6(new URL(".", import.meta.url));
+import { existsSync as existsSync17, readFileSync as readFileSync14 } from "fs";
+import { createRequire as createRequire3 } from "module";
+import { dirname as dirname5, resolve as resolve10 } from "path";
+import { fileURLToPath as fileURLToPath5 } from "url";
+var __dirname4 = fileURLToPath5(new URL(".", import.meta.url));
 var PASS = `${GREEN}${BOLD}pass${RESET}`;
 var FAIL = `${RED}${BOLD}FAIL${RESET}`;
 var WARN_TAG = `${YELLOW}${BOLD}warn${RESET}`;
@@ -13278,7 +13997,7 @@ function checkNodeVersion() {
 }
 function checkArchalDir() {
   const dir = getArchalDir();
-  if (existsSync18(dir)) {
+  if (existsSync17(dir)) {
     return {
       name: "Archal directory",
       status: "pass",
@@ -13294,7 +14013,7 @@ function checkArchalDir() {
 }
 function checkConfigFile() {
   const path = getConfigPath();
-  if (existsSync18(path)) {
+  if (existsSync17(path)) {
     return {
       name: "Config file",
       status: "pass",
@@ -13371,14 +14090,14 @@ function checkApiKey() {
 }
 function resolveFidelityJson(twinName) {
   for (const base of [
-    resolve11(__dirname5, "..", "twin-assets", twinName, "fidelity.json"),
+    resolve10(__dirname4, "..", "twin-assets", twinName, "fidelity.json"),
     // __dirname = cli/dist/
-    resolve11(__dirname5, "..", "..", "twin-assets", twinName, "fidelity.json")
+    resolve10(__dirname4, "..", "..", "twin-assets", twinName, "fidelity.json")
     // __dirname = cli/src/commands/
   ]) {
-    if (existsSync18(base)) {
+    if (existsSync17(base)) {
       try {
-        const data = JSON.parse(readFileSync15(base, "utf-8"));
+        const data = JSON.parse(readFileSync14(base, "utf-8"));
         return { path: base, version: data.version };
       } catch {
         return { path: base };
@@ -13386,14 +14105,14 @@ function resolveFidelityJson(twinName) {
     }
   }
   for (const base of [
-    resolve11(__dirname5, "..", "..", "twins", twinName, "fidelity.json"),
+    resolve10(__dirname4, "..", "..", "twins", twinName, "fidelity.json"),
     // __dirname = cli/dist/
-    resolve11(__dirname5, "..", "..", "..", "twins", twinName, "fidelity.json")
+    resolve10(__dirname4, "..", "..", "..", "twins", twinName, "fidelity.json")
     // __dirname = cli/src/commands/
   ]) {
-    if (existsSync18(base)) {
+    if (existsSync17(base)) {
       try {
-        const data = JSON.parse(readFileSync15(base, "utf-8"));
+        const data = JSON.parse(readFileSync14(base, "utf-8"));
         return { path: base, version: data.version };
       } catch {
         return { path: base };
@@ -13401,12 +14120,12 @@ function resolveFidelityJson(twinName) {
     }
   }
   try {
-    const req = createRequire4(import.meta.url);
+    const req = createRequire3(import.meta.url);
     const twinMain = req.resolve(`@archal/twin-${twinName}`);
-    const candidate = resolve11(dirname6(twinMain), "..", "fidelity.json");
-    if (existsSync18(candidate)) {
+    const candidate = resolve10(dirname5(twinMain), "..", "fidelity.json");
+    if (existsSync17(candidate)) {
       try {
-        const data = JSON.parse(readFileSync15(candidate, "utf-8"));
+        const data = JSON.parse(readFileSync14(candidate, "utf-8"));
         return { path: candidate, version: data.version };
       } catch {
         return { path: candidate };
@@ -13459,10 +14178,10 @@ function checkAgentConfig() {
       message: `ARCHAL_AGENT_COMMAND="${envCommand}"`
     };
   }
-  const projectConfig = resolve11(".archal.json");
-  if (existsSync18(projectConfig)) {
+  const projectConfig = resolve10(".archal.json");
+  if (existsSync17(projectConfig)) {
     try {
-      const raw = JSON.parse(readFileSync15(projectConfig, "utf-8"));
+      const raw = JSON.parse(readFileSync14(projectConfig, "utf-8"));
       if (raw.agent?.command) {
         return {
           name: "Agent command",
@@ -13487,8 +14206,8 @@ function checkAgentConfig() {
   };
 }
 function checkScenario(scenarioPath) {
-  const resolved = resolve11(scenarioPath);
-  if (!existsSync18(resolved)) {
+  const resolved = resolve10(scenarioPath);
+  if (!existsSync17(resolved)) {
     return {
       name: `Scenario: ${scenarioPath}`,
       status: "fail",
@@ -13765,16 +14484,16 @@ function renderLoginSuccessHtml(redirectUrl) {
 </html>`;
 }
 function findFreePort(startPort) {
-  return new Promise((resolve13, reject) => {
+  return new Promise((resolve12, reject) => {
     const server = createServer();
     server.listen(startPort, "127.0.0.1", () => {
       const address = server.address();
       const port = typeof address === "object" && address ? address.port : startPort;
-      server.close(() => resolve13(port));
+      server.close(() => resolve12(port));
     });
     server.on("error", () => {
       if (startPort < START_PORT + 100) {
-        findFreePort(startPort + 1).then(resolve13).catch(reject);
+        findFreePort(startPort + 1).then(resolve12).catch(reject);
       } else {
         reject(new Error(
           "Could not find a free localhost callback port (tried ports 51423-51523).\nTry closing other services, or use token login: archal login --token <your-token>"
@@ -13821,12 +14540,12 @@ function createLoginCommand() {
     if (opts.browser !== false) {
       openBrowser(authUrl);
     }
-    await new Promise((resolve13, reject) => {
+    await new Promise((resolve12, reject) => {
       let settled = false;
       const settleResolve = () => {
         if (settled) return;
         settled = true;
-        resolve13();
+        resolve12();
       };
       const settleReject = (error2) => {
         if (settled) return;
@@ -14023,7 +14742,7 @@ function createWhoamiCommand() {
       };
       if (opts.live) {
         const usage = await fetchUsage(current.token);
-        if (usage.ok) result.usage = usage.data;
+        if (usage.ok) result["usage"] = usage.data;
       }
       process.stdout.write(JSON.stringify(result, null, 2) + "\n");
       return;
@@ -14101,9 +14820,9 @@ function createUsageCommand() {
         plan: current.plan
       };
       if (usage2.ok) {
-        result.usage = usage2.data;
+        result["usage"] = usage2.data;
       } else {
-        result.error = usage2.error;
+        result["error"] = usage2.error;
       }
       process.stdout.write(JSON.stringify(result, null, 2) + "\n");
       return;
@@ -14249,7 +14968,7 @@ function createUpgradeCommand() {
 // src/commands/cleanup.ts
 import { Command as Command12 } from "commander";
 import { execSync } from "child_process";
-import { existsSync as existsSync19, readdirSync as readdirSync5, statSync as statSync3, unlinkSync as unlinkSync9 } from "fs";
+import { existsSync as existsSync18, readdirSync as readdirSync5, statSync as statSync3, unlinkSync as unlinkSync9 } from "fs";
 import { join as join11 } from "path";
 function killOrphanedProcesses(dryRun) {
   if (process.platform === "win32") {
@@ -14301,7 +15020,7 @@ function createCleanupCommand() {
         process.exit(1);
       }
       const tracesDir = join11(getArchalDir(), "traces");
-      if (!existsSync19(tracesDir)) {
+      if (!existsSync18(tracesDir)) {
         process.stdout.write("No traces directory found\n");
         return;
       }
@@ -14333,24 +15052,24 @@ function createCleanupCommand() {
 // src/commands/demo.ts
 import { Command as Command13 } from "commander";
-import { existsSync as existsSync20, readdirSync as readdirSync6 } from "fs";
-import { join as join12, resolve as resolve12, extname as extname2, basename as basename3 } from "path";
-import { fileURLToPath as fileURLToPath7 } from "url";
+import { existsSync as existsSync19, readdirSync as readdirSync6 } from "fs";
+import { join as join12, resolve as resolve11, extname as extname2, basename as basename4 } from "path";
+import { fileURLToPath as fileURLToPath6 } from "url";
 import { createInterface as createInterface3 } from "readline";
-var __dirname6 = fileURLToPath7(new URL(".", import.meta.url));
+var __dirname5 = fileURLToPath6(new URL(".", import.meta.url));
 function findBundledScenarios() {
   const candidates = [
-    resolve12(__dirname6, "..", "scenarios"),
+    resolve11(__dirname5, "..", "scenarios"),
     // __dirname = cli/dist/ → cli/scenarios/
-    resolve12(__dirname6, "..", "..", "scenarios"),
+    resolve11(__dirname5, "..", "..", "scenarios"),
     // __dirname = cli/src/commands/ → cli/scenarios/
-    resolve12(__dirname6, "..", "..", "..", "scenarios")
+    resolve11(__dirname5, "..", "..", "..", "scenarios")
     // monorepo root → scenarios/ (github/, slack/, etc.)
   ];
   const results = [];
   const seen = /* @__PURE__ */ new Set();
   function scanDir(dir) {
-    if (!existsSync20(dir)) return;
+    if (!existsSync19(dir)) return;
     const topEntries = readdirSync6(dir, { withFileTypes: true });
     for (const topEntry of topEntries) {
       if (topEntry.isDirectory()) {
@@ -14426,7 +15145,7 @@ async function promptUserChoice(prompt, max) {
     );
   }
   const rl = createInterface3({ input: process.stdin, output: process.stderr });
-  return new Promise((resolve13) => {
+  return new Promise((resolve12) => {
     const ask = () => {
       rl.question(prompt, (answer) => {
         const num = parseInt(answer.trim(), 10);
@@ -14437,7 +15156,7 @@ async function promptUserChoice(prompt, max) {
           return;
         }
         rl.close();
-        resolve13(num);
+        resolve12(num);
       });
     };
     ask();
@@ -14491,7 +15210,7 @@ ${CYAN}${BOLD}  Archal Demo${RESET}
     let scenarioPath;
     const bundledScenarios = findBundledScenarios();
     if (opts.scenario) {
-      if (existsSync20(opts.scenario)) {
+      if (existsSync19(opts.scenario)) {
         scenarioPath = opts.scenario;
       } else {
         const numIndex = parseInt(opts.scenario, 10);
@@ -14500,7 +15219,7 @@ ${CYAN}${BOLD}  Archal Demo${RESET}
           match = bundledScenarios[numIndex - 1];
         } else {
           match = bundledScenarios.find(
-            (s) => s.title.toLowerCase().includes(opts.scenario.toLowerCase()) || basename3(s.path, ".md") === opts.scenario
+            (s) => s.title.toLowerCase().includes(opts.scenario.toLowerCase()) || basename4(s.path, ".md") === opts.scenario
           );
         }
         if (!match) {
@@ -14557,6 +15276,10 @@ ${available.join("\n")}
         indexedScenarios.length
       );
       const selected = indexedScenarios[choice - 1];
+      if (!selected) {
+        process.stderr.write("Error: Invalid scenario selection.\n");
+        process.exit(1);
+      }
       process.stderr.write(`
   Selected: ${BOLD}${selected.title}${RESET}
@@ -14654,8 +15377,7 @@ ${available.join("\n")}
     );
     const results = [];
     process.env["ARCHAL_DEMO_MODE"] = "1";
-    for (let i = 0; i < bundledHarnesses.length; i++) {
-      const harness = bundledHarnesses[i];
+    for (const [i, harness] of bundledHarnesses.entries()) {
       process.stderr.write(
         `  ${DIM}\u2501\u2501\u2501${RESET} Harness ${i + 1}/${bundledHarnesses.length}: ${BOLD}${harness.name}${RESET} ${DIM}\u2501\u2501\u2501${RESET}
 `
@@ -14909,10 +15631,10 @@ import { spawnSync as spawnSync2 } from "child_process";
 import { createInterface as createInterface4 } from "readline";
 function askLine(question) {
   const rl = createInterface4({ input: process.stdin, output: process.stderr });
-  return new Promise((resolve13) => {
+  return new Promise((resolve12) => {
     rl.question(question, (answer) => {
       rl.close();
-      resolve13(answer.trim());
+      resolve12(answer.trim());
     });
   });
 }
@@ -14922,7 +15644,7 @@ async function askConfirm(question) {
 }
 // src/commands/setup.ts
-import { existsSync as existsSync21 } from "fs";
+import { existsSync as existsSync20 } from "fs";
 var RESET4 = "\x1B[0m";
 var BOLD4 = "\x1B[1m";
 var DIM4 = "\x1B[2m";
@@ -14944,7 +15666,12 @@ ${CYAN4}${BOLD4}Archal Setup${RESET4}
     } else {
       const doLogin = await askConfirm("You need to log in first. Log in now?");
       if (doLogin) {
-        const result = spawnSync2(process.execPath, [process.argv[1], "login"], {
+        const cliEntrypoint = process.argv[1];
+        if (!cliEntrypoint) {
+          error("Could not resolve CLI entrypoint. Run `archal login` manually, then re-run `archal setup`.");
+          process.exit(1);
+        }
+        const result = spawnSync2(process.execPath, [cliEntrypoint, "login"], {
           stdio: "inherit"
         });
         creds = getCredentials();
@@ -14962,7 +15689,7 @@ ${CYAN4}${BOLD4}Archal Setup${RESET4}
 ${BOLD4}Step 2: Configuration${RESET4}
 `);
     const configPath = getConfigPath();
-    if (existsSync21(configPath)) {
+    if (existsSync20(configPath)) {
       success(`Config file exists: ${configPath}`);
     } else {
       const create = await askConfirm("Create a default config file?");