@archal/cli 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,16 +1,16 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { Command as Command16 } from "commander";
4
+ import { Command as Command15 } from "commander";
5
5
 
6
6
  // src/commands/run.ts
7
7
  import { Command as Command3 } from "commander";
8
- import { existsSync as existsSync11, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
9
- import { dirname as dirname3, resolve as resolve6 } from "path";
8
+ import { existsSync as existsSync12, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
9
+ import { dirname as dirname3, resolve as resolve8 } from "path";
10
10
 
11
11
  // src/runner/orchestrator.ts
12
- import { existsSync as existsSync10, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
13
- import { resolve as resolve5, dirname as dirname2, join as join8 } from "path";
12
+ import { existsSync as existsSync11, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
13
+ import { resolve as resolve7, dirname as dirname2, join as join8 } from "path";
14
14
  import { tmpdir as tmpdir3 } from "os";
15
15
 
16
16
  // src/runner/scenario-parser.ts
@@ -276,10 +276,10 @@ function inferTwinsFromContent(setup, expectedBehavior) {
276
276
  ${expectedBehavior}`.toLowerCase();
277
277
  const twins = [];
278
278
  const twinKeywords = {
279
- github: ["github", "repository", "repo", "pull request", "pr", "issue", "commit", "branch", "merge"],
280
- slack: ["slack", "channel", "message", "thread", "workspace", "dm", "direct message"],
281
- linear: ["linear", "ticket", "project", "cycle", "backlog"],
282
- jira: ["jira", "sprint", "epic", "story", "board"]
279
+ github: ["github", "repository", "pull request", "create_issue", "create_pull_request", "merge_pull_request"],
280
+ slack: ["slack", "slack channel", "send_message", "slack message", "direct message"],
281
+ linear: ["linear", "linear ticket", "linear project", "linear cycle"],
282
+ jira: ["jira", "jira sprint", "jira epic", "jira board"]
283
283
  };
284
284
  for (const [twin, keywords] of Object.entries(twinKeywords)) {
285
285
  if (keywords.some((kw) => combined.includes(kw))) {
@@ -442,6 +442,19 @@ var GITHUB_SEED_MAPPINGS = [
442
442
  ],
443
443
  seedName: "large-backlog",
444
444
  weight: 2
445
+ },
446
+ {
447
+ keywords: [
448
+ "triage",
449
+ "unlabeled",
450
+ "no labels",
451
+ "categorize",
452
+ "classify",
453
+ "label",
454
+ "none of them have labels"
455
+ ],
456
+ seedName: "triage-unlabeled",
457
+ weight: 2
445
458
  }
446
459
  ];
447
460
  var SLACK_SEED_MAPPINGS = [
@@ -450,34 +463,47 @@ var SLACK_SEED_MAPPINGS = [
450
463
  seedName: "empty",
451
464
  weight: 1
452
465
  },
453
- {
454
- keywords: ["small team", "few channels", "simple", "basic", "starter"],
455
- seedName: "small-team",
456
- weight: 1
457
- },
458
466
  {
459
467
  keywords: [
460
468
  "engineering",
461
469
  "development",
462
470
  "engineering team",
463
471
  "developers",
464
- "incidents",
465
- "on-call",
466
472
  "sprints",
467
- "standups"
473
+ "standups",
474
+ "hr",
475
+ "confidential",
476
+ "salary"
468
477
  ],
469
478
  seedName: "engineering-team",
470
479
  weight: 1
471
480
  },
472
481
  {
473
- keywords: ["support", "customer", "tickets", "help desk", "routing"],
474
- seedName: "support-team",
482
+ keywords: [
483
+ "support",
484
+ "customer",
485
+ "tickets",
486
+ "help desk",
487
+ "routing",
488
+ "busy",
489
+ "high volume",
490
+ "many messages",
491
+ "active",
492
+ "noisy",
493
+ "general",
494
+ "workspace",
495
+ "members",
496
+ "finance",
497
+ "ceo",
498
+ "fraud"
499
+ ],
500
+ seedName: "busy-workspace",
475
501
  weight: 1
476
502
  },
477
503
  {
478
- keywords: ["busy", "high volume", "many messages", "active", "noisy"],
479
- seedName: "high-volume",
480
- weight: 1
504
+ keywords: ["incident", "on-call", "alert", "outage", "escalat", "sev1", "sev2"],
505
+ seedName: "incident-active",
506
+ weight: 2
481
507
  }
482
508
  ];
483
509
  var LINEAR_SEED_MAPPINGS = [
@@ -507,14 +533,59 @@ var LINEAR_SEED_MAPPINGS = [
507
533
  weight: 1
508
534
  }
509
535
  ];
536
+ var STRIPE_SEED_MAPPINGS = [
537
+ {
538
+ keywords: ["empty", "blank", "new", "fresh", "clean", "no customers"],
539
+ seedName: "empty",
540
+ weight: 1
541
+ },
542
+ {
543
+ keywords: [
544
+ "small business",
545
+ "few customers",
546
+ "simple",
547
+ "basic",
548
+ "starter",
549
+ "payment",
550
+ "charge",
551
+ "wire",
552
+ "transfer",
553
+ "balance",
554
+ "vendor",
555
+ "invoice",
556
+ "ceo",
557
+ "fraud",
558
+ "financial"
559
+ ],
560
+ seedName: "small-business",
561
+ weight: 1
562
+ },
563
+ {
564
+ keywords: [
565
+ "subscription",
566
+ "recurring",
567
+ "saas",
568
+ "monthly",
569
+ "annual",
570
+ "plan",
571
+ "pricing",
572
+ "trial",
573
+ "cancel"
574
+ ],
575
+ seedName: "subscription-heavy",
576
+ weight: 2
577
+ }
578
+ ];
510
579
  var TWIN_SEED_REGISTRY = {
511
580
  github: GITHUB_SEED_MAPPINGS,
512
581
  slack: SLACK_SEED_MAPPINGS,
582
+ stripe: STRIPE_SEED_MAPPINGS,
513
583
  linear: LINEAR_SEED_MAPPINGS
514
584
  };
515
585
  var DEFAULT_SEEDS = {
516
586
  github: "small-project",
517
- slack: "small-team",
587
+ slack: "engineering-team",
588
+ stripe: "small-business",
518
589
  linear: "small-team"
519
590
  };
520
591
  function normalizeText(text) {
@@ -612,7 +683,27 @@ import { spawn } from "child_process";
612
683
  function buildSanitizedSpawnEnv(explicitEnv) {
613
684
  const sanitized = {};
614
685
  const tempVarKey = process.platform === "win32" ? "TEMP" : "TMPDIR";
615
- const passthroughKeys = ["PATH", "HOME", tempVarKey, "NODE_ENV"];
686
+ const passthroughKeys = [
687
+ "PATH",
688
+ "HOME",
689
+ "USER",
690
+ "SHELL",
691
+ tempVarKey,
692
+ "NODE_ENV",
693
+ // Proxy vars — critical for corporate environments
694
+ "HTTP_PROXY",
695
+ "HTTPS_PROXY",
696
+ "NO_PROXY",
697
+ "http_proxy",
698
+ "https_proxy",
699
+ "no_proxy",
700
+ // API keys needed by local engine harness agents
701
+ "ANTHROPIC_API_KEY",
702
+ "OPENAI_API_KEY",
703
+ "GEMINI_API_KEY",
704
+ // Windows-specific
705
+ ...process.platform === "win32" ? ["USERPROFILE", "APPDATA", "LOCALAPPDATA", "SystemRoot", "COMSPEC", "TMP"] : []
706
+ ];
616
707
  for (const key of passthroughKeys) {
617
708
  const value = process.env[key];
618
709
  if (typeof value === "string" && value.length > 0) {
@@ -640,7 +731,7 @@ function spawnWithTimeout(options) {
640
731
  onStdout,
641
732
  onStderr
642
733
  } = options;
643
- return new Promise((resolve11, reject) => {
734
+ return new Promise((resolve13, reject) => {
644
735
  const startTime = Date.now();
645
736
  let timedOut = false;
646
737
  let stdoutBuf = "";
@@ -696,7 +787,7 @@ function spawnWithTimeout(options) {
696
787
  clearTimeout(timer);
697
788
  const durationMs = Date.now() - startTime;
698
789
  debug("Process exited", { command, exitCode, durationMs, timedOut });
699
- resolve11({
790
+ resolve13({
700
791
  exitCode,
701
792
  stdout: stdoutBuf,
702
793
  stderr: stderrBuf,
@@ -721,9 +812,9 @@ function spawnMcpStdioProcess(options) {
721
812
  return child;
722
813
  }
723
814
  function killProcess(child, gracePeriodMs = 5e3) {
724
- return new Promise((resolve11) => {
815
+ return new Promise((resolve13) => {
725
816
  if (child.killed || child.exitCode !== null) {
726
- resolve11();
817
+ resolve13();
727
818
  return;
728
819
  }
729
820
  child.kill("SIGTERM");
@@ -734,7 +825,7 @@ function killProcess(child, gracePeriodMs = 5e3) {
734
825
  }, gracePeriodMs);
735
826
  child.on("close", () => {
736
827
  clearTimeout(forceKillTimer);
737
- resolve11();
828
+ resolve13();
738
829
  });
739
830
  });
740
831
  }
@@ -768,6 +859,20 @@ function generateTaskFromScenario(scenario, apiRouting) {
768
859
  }
769
860
  lines.push("");
770
861
  }
862
+ if (apiRouting?.adminToken) {
863
+ lines.push("Authentication:");
864
+ lines.push("Include these headers with every request to the base URLs above:");
865
+ lines.push(` x-archal-admin-token: ${apiRouting.adminToken}`);
866
+ if (apiRouting.adminUserId) {
867
+ lines.push(` x-archal-user-id: ${apiRouting.adminUserId}`);
868
+ }
869
+ lines.push("");
870
+ } else if (apiRouting?.bearerToken) {
871
+ lines.push("Authentication:");
872
+ lines.push("Include this header with every request to the base URLs above:");
873
+ lines.push(` Authorization: Bearer ${apiRouting.bearerToken}`);
874
+ lines.push("");
875
+ }
771
876
  if (hasProxy && apiRouting?.proxyUrl) {
772
877
  lines.push(`Proxy URL: ${apiRouting.proxyUrl}`);
773
878
  lines.push("");
@@ -812,14 +917,6 @@ function resolveResponsesUrl(rawUrl) {
812
917
  }
813
918
  return url.toString();
814
919
  }
815
- function toMcpUrl(rawUrl) {
816
- const url = new URL(rawUrl);
817
- const path = url.pathname.replace(/\/+$/, "");
818
- if (!path.endsWith("/mcp")) {
819
- url.pathname = `${path || ""}/mcp`;
820
- }
821
- return url.toString();
822
- }
823
920
  function collectResponseText(response) {
824
921
  if (!response.output || response.output.length === 0) return "";
825
922
  const chunks = [];
@@ -838,7 +935,7 @@ function collectResponseText(response) {
838
935
  }
839
936
  return chunks.join("\n").trim();
840
937
  }
841
- function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting, mcpField = "tools") {
938
+ function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting) {
842
939
  const metadata = {
843
940
  run_id: runId,
844
941
  scenario_title: scenario.title,
@@ -851,40 +948,11 @@ function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, m
851
948
  if (apiRouting?.proxyUrl) {
852
949
  metadata["archal_api_proxy_url"] = apiRouting.proxyUrl;
853
950
  }
854
- const mcpTools = Object.entries(twinUrls).map(([name, url]) => ({
855
- type: "mcp",
856
- server_label: name,
857
- server_url: toMcpUrl(url),
858
- require_approval: "never"
859
- }));
860
- const request2 = {
951
+ return {
861
952
  model,
862
953
  input: taskMessage,
863
954
  metadata
864
955
  };
865
- if (mcpField === "both") {
866
- request2.tools = mcpTools;
867
- request2.mcp_servers = mcpTools;
868
- return request2;
869
- }
870
- request2[mcpField] = mcpTools;
871
- return request2;
872
- }
873
- function shouldRetryWithAlternateMcpField(status, rawBody, attemptedField) {
874
- if (status !== 400) return false;
875
- const pattern = new RegExp(`Unrecognized key:\\s*"?${attemptedField}"?`, "i");
876
- try {
877
- const parsed = JSON.parse(rawBody);
878
- if (typeof parsed.error?.message === "string") {
879
- return pattern.test(parsed.error.message);
880
- }
881
- } catch {
882
- }
883
- return pattern.test(rawBody);
884
- }
885
- function resolvePreferredMcpField() {
886
- const configured = (process.env["ARCHAL_OPENCLAW_MCP_FIELD"] ?? process.env["OPENCLAW_MCP_FIELD"] ?? "tools").trim().toLowerCase();
887
- return configured === "mcp_servers" ? "mcp_servers" : "tools";
888
956
  }
889
957
  function extractOpenClawResponseText(response) {
890
958
  return collectResponseText(response);
@@ -927,15 +995,13 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
927
995
  const timer = setTimeout(() => controller.abort(), remoteConfig.timeoutMs);
928
996
  try {
929
997
  responsesUrl = resolveResponsesUrl(remoteConfig.url);
930
- let mcpField = resolvePreferredMcpField();
931
- let requestBody = buildOpenClawResponsesRequest(
998
+ const requestBody = buildOpenClawResponsesRequest(
932
999
  scenario,
933
1000
  runId,
934
1001
  taskMessage,
935
1002
  twinUrls,
936
1003
  remoteConfig.model,
937
- apiRouting,
938
- mcpField
1004
+ apiRouting
939
1005
  );
940
1006
  const headers = {
941
1007
  "Content-Type": "application/json"
@@ -943,36 +1009,32 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
943
1009
  if (remoteConfig.token) {
944
1010
  headers["Authorization"] = `Bearer ${remoteConfig.token}`;
945
1011
  }
1012
+ if (remoteConfig.agentId) {
1013
+ headers["x-openclaw-agent-id"] = remoteConfig.agentId;
1014
+ }
946
1015
  info("Executing remote OpenClaw agent", {
947
1016
  url: responsesUrl,
948
- timeout: `${remoteConfig.timeoutMs}ms`
1017
+ timeout: `${remoteConfig.timeoutMs}ms`,
1018
+ ...remoteConfig.agentId ? { agentId: remoteConfig.agentId } : {}
1019
+ });
1020
+ debug("Task message being sent to OpenClaw:", {
1021
+ taskMessage: taskMessage.replace(/x-archal-admin-token:\s*\S+/gi, "x-archal-admin-token: [REDACTED]").replace(/Authorization:\s*Bearer\s+\S+/gi, "Authorization: Bearer [REDACTED]").slice(0, 2e3)
949
1022
  });
950
- let response = await fetch(responsesUrl, {
1023
+ debug("Twin URLs:", { twinUrls: JSON.stringify(twinUrls) });
1024
+ debug("API routing:", {
1025
+ apiRouting: JSON.stringify({
1026
+ ...apiRouting,
1027
+ bearerToken: apiRouting?.bearerToken ? "[REDACTED]" : void 0,
1028
+ adminToken: apiRouting?.adminToken ? "[REDACTED]" : void 0
1029
+ })
1030
+ });
1031
+ const response = await fetch(responsesUrl, {
951
1032
  method: "POST",
952
1033
  headers,
953
1034
  body: JSON.stringify(requestBody),
954
1035
  signal: controller.signal
955
1036
  });
956
- let rawBody = await response.text();
957
- if (!response.ok && shouldRetryWithAlternateMcpField(response.status, rawBody, mcpField)) {
958
- mcpField = mcpField === "tools" ? "mcp_servers" : "tools";
959
- requestBody = buildOpenClawResponsesRequest(
960
- scenario,
961
- runId,
962
- taskMessage,
963
- twinUrls,
964
- remoteConfig.model,
965
- apiRouting,
966
- mcpField
967
- );
968
- response = await fetch(responsesUrl, {
969
- method: "POST",
970
- headers,
971
- body: JSON.stringify(requestBody),
972
- signal: controller.signal
973
- });
974
- rawBody = await response.text();
975
- }
1037
+ const rawBody = await response.text();
976
1038
  if (!response.ok) {
977
1039
  const statusLine = `${response.status} ${response.statusText}`.trim();
978
1040
  return {
@@ -1155,7 +1217,7 @@ function writeMcpConfig(twinConfigs, runId) {
1155
1217
  return { configPath, twinPaths };
1156
1218
  }
1157
1219
  function waitForPortOutput(child, timeoutMs = 15e3) {
1158
- return new Promise((resolve11, reject) => {
1220
+ return new Promise((resolve13, reject) => {
1159
1221
  const timer = setTimeout(() => {
1160
1222
  reject(new Error("Timed out waiting for twin REST port"));
1161
1223
  }, timeoutMs);
@@ -1165,7 +1227,7 @@ function waitForPortOutput(child, timeoutMs = 15e3) {
1165
1227
  const match = /listening on http:\/\/(?:localhost|127\.0\.0\.1):(\d+)/.exec(stderrBuf);
1166
1228
  if (match) {
1167
1229
  clearTimeout(timer);
1168
- resolve11(parseInt(match[1], 10));
1230
+ resolve13(parseInt(match[1], 10));
1169
1231
  }
1170
1232
  });
1171
1233
  child.on("exit", (code) => {
@@ -1323,11 +1385,16 @@ function collectTraceFromFiles(twinPaths) {
1323
1385
  return allTraces;
1324
1386
  }
1325
1387
  var HTTP_COLLECT_TIMEOUT_MS = 5e3;
1326
- async function collectStateFromHttp(twinUrls) {
1388
+ function twinBasePath(url) {
1389
+ return url.replace(/\/(mcp|api)\/?$/, "");
1390
+ }
1391
+ async function collectStateFromHttp(twinUrls, bearerToken, adminAuth) {
1327
1392
  const state = {};
1393
+ const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
1328
1394
  for (const [name, baseUrl] of Object.entries(twinUrls)) {
1329
1395
  try {
1330
- const response = await fetch(`${baseUrl.replace(/\/+$/, "")}/state`, {
1396
+ const response = await fetch(`${twinBasePath(baseUrl)}/state`, {
1397
+ headers,
1331
1398
  signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
1332
1399
  });
1333
1400
  if (response.ok) {
@@ -1344,11 +1411,13 @@ async function collectStateFromHttp(twinUrls) {
1344
1411
  }
1345
1412
  return state;
1346
1413
  }
1347
- async function collectTraceFromHttp(twinUrls) {
1414
+ async function collectTraceFromHttp(twinUrls, bearerToken, adminAuth) {
1348
1415
  const allTraces = [];
1416
+ const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
1349
1417
  for (const [name, baseUrl] of Object.entries(twinUrls)) {
1350
1418
  try {
1351
- const response = await fetch(`${baseUrl.replace(/\/+$/, "")}/trace`, {
1419
+ const response = await fetch(`${twinBasePath(baseUrl)}/trace`, {
1420
+ headers,
1352
1421
  signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
1353
1422
  });
1354
1423
  if (response.ok) {
@@ -1443,10 +1512,94 @@ function resolveAgentConfig(agentCommand, projectConfigPath) {
1443
1512
  return null;
1444
1513
  }
1445
1514
 
1515
+ // src/runner/harness.ts
1516
+ import { existsSync as existsSync3, readFileSync as readFileSync4 } from "fs";
1517
+ import { resolve as resolve3 } from "path";
1518
+ import { z } from "zod";
1519
+ var harnessLocalSchema = z.object({
1520
+ command: z.string().min(1, "local.command must be a non-empty string"),
1521
+ args: z.array(z.string()).default([]),
1522
+ env: z.record(z.string()).optional()
1523
+ });
1524
+ var harnessManifestSchema = z.object({
1525
+ version: z.literal(1),
1526
+ defaultModel: z.string().optional(),
1527
+ promptFiles: z.array(z.string()).default([]),
1528
+ local: harnessLocalSchema.optional()
1529
+ });
1530
+ var MANIFEST_FILE = "archal-harness.json";
1531
+ function resolveHarnessDir(rawDir) {
1532
+ const harnessDir = resolve3(rawDir);
1533
+ if (!existsSync3(harnessDir)) {
1534
+ throw new Error(`Harness directory not found: ${harnessDir}`);
1535
+ }
1536
+ return harnessDir;
1537
+ }
1538
+ function parseHarnessManifest(manifestPath) {
1539
+ try {
1540
+ const raw = readFileSync4(manifestPath, "utf-8");
1541
+ return harnessManifestSchema.parse(JSON.parse(raw));
1542
+ } catch (err) {
1543
+ const message = err instanceof Error ? err.message : String(err);
1544
+ throw new Error(`Invalid harness manifest at ${manifestPath}: ${message}`);
1545
+ }
1546
+ }
1547
+ function trimToUndefined(value) {
1548
+ const trimmed = value?.trim();
1549
+ return trimmed ? trimmed : void 0;
1550
+ }
1551
+ function resolveLocalHarness(harnessDirInput, explicitModel) {
1552
+ const harnessDir = resolveHarnessDir(harnessDirInput);
1553
+ const manifestPath = resolve3(harnessDir, MANIFEST_FILE);
1554
+ const explicit = trimToUndefined(explicitModel);
1555
+ if (!existsSync3(manifestPath)) {
1556
+ return {
1557
+ harnessDir,
1558
+ manifestPath,
1559
+ model: explicit
1560
+ };
1561
+ }
1562
+ const manifest = parseHarnessManifest(manifestPath);
1563
+ const promptContext = loadPromptContext(harnessDir, manifest.promptFiles);
1564
+ const localCommand = manifest.local ? {
1565
+ command: manifest.local.command,
1566
+ args: manifest.local.args,
1567
+ env: manifest.local.env
1568
+ } : void 0;
1569
+ const model = explicit ?? trimToUndefined(manifest.defaultModel);
1570
+ return { harnessDir, manifestPath, manifest, model, promptContext, localCommand };
1571
+ }
1572
+ function loadPromptContext(harnessDir, promptFiles) {
1573
+ if (promptFiles.length === 0) {
1574
+ return void 0;
1575
+ }
1576
+ const sections = [];
1577
+ for (const promptFile of promptFiles) {
1578
+ const relativePath = promptFile.trim();
1579
+ if (!relativePath) {
1580
+ throw new Error("Harness promptFiles entries must be non-empty strings");
1581
+ }
1582
+ const absolutePath = resolve3(harnessDir, relativePath);
1583
+ if (!existsSync3(absolutePath)) {
1584
+ throw new Error(`Harness prompt file not found: ${absolutePath}`);
1585
+ }
1586
+ const content = readFileSync4(absolutePath, "utf-8").trim();
1587
+ if (!content) {
1588
+ warn(`Harness prompt file is empty and will be skipped: ${absolutePath}`);
1589
+ continue;
1590
+ }
1591
+ sections.push(content);
1592
+ }
1593
+ if (sections.length === 0) {
1594
+ return void 0;
1595
+ }
1596
+ return sections.join("\n\n");
1597
+ }
1598
+
1446
1599
  // src/runner/reporter.ts
1447
- import { readFileSync as readFileSync4, existsSync as existsSync3 } from "fs";
1600
+ import { readFileSync as readFileSync5, existsSync as existsSync4 } from "fs";
1448
1601
  import { createRequire as createRequire2 } from "module";
1449
- import { dirname, resolve as resolve3 } from "path";
1602
+ import { dirname, resolve as resolve4 } from "path";
1450
1603
  import { fileURLToPath as fileURLToPath2 } from "url";
1451
1604
  var __dirname2 = fileURLToPath2(new URL(".", import.meta.url));
1452
1605
  function printHeader(scenarioTitle, seedSelections) {
@@ -1530,23 +1683,26 @@ function loadTwinFidelity(twinNames) {
1530
1683
  for (const name of twinNames) {
1531
1684
  try {
1532
1685
  let fidelityPath = null;
1533
- const monorepoPath = resolve3(__dirname2, "..", "..", "twins", name, "fidelity.json");
1534
- if (existsSync3(monorepoPath)) {
1686
+ const monorepoPath = resolve4(__dirname2, "..", "..", "twins", name, "fidelity.json");
1687
+ if (existsSync4(monorepoPath)) {
1535
1688
  fidelityPath = monorepoPath;
1536
1689
  }
1537
1690
  if (!fidelityPath) {
1538
1691
  try {
1539
1692
  const require2 = createRequire2(import.meta.url);
1540
1693
  const twinMain = require2.resolve(`@archal/twin-${name}`);
1541
- const candidate = resolve3(dirname(twinMain), "..", "fidelity.json");
1542
- if (existsSync3(candidate)) {
1694
+ const candidate = resolve4(dirname(twinMain), "..", "fidelity.json");
1695
+ if (existsSync4(candidate)) {
1543
1696
  fidelityPath = candidate;
1544
1697
  }
1545
1698
  } catch {
1546
1699
  }
1547
1700
  }
1548
- if (!fidelityPath) continue;
1549
- const raw = readFileSync4(fidelityPath, "utf-8");
1701
+ if (!fidelityPath) {
1702
+ debug(`Fidelity data not found for twin "${name}" \u2014 skipping badge`);
1703
+ continue;
1704
+ }
1705
+ const raw = readFileSync5(fidelityPath, "utf-8");
1550
1706
  const data = JSON.parse(raw);
1551
1707
  lines.push(` ${DIM}twin fidelity:${RESET} ${data.twin} v${data.version}`);
1552
1708
  for (const cap of data.capabilities) {
@@ -1701,6 +1857,7 @@ function cleanPredicate(pred) {
1701
1857
  return cleaned.trim();
1702
1858
  }
1703
1859
  function parseAssertion(description) {
1860
+ const lowerOriginal = description.toLowerCase().trim();
1704
1861
  const lower = stripParenthetical(description).toLowerCase().trim();
1705
1862
  const noLabeledMatch = lower.match(/^no\s+(.+?)\s+labeled\s+["']?([^"']+?)["']?\s+(?:are|were|is|was|should be)\s+(.+)$/);
1706
1863
  if (noLabeledMatch) {
@@ -1711,7 +1868,63 @@ function parseAssertion(description) {
1711
1868
  labelFilter: noLabeledMatch[2]?.trim()
1712
1869
  };
1713
1870
  }
1714
- const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
1871
+ const withLabelRemainMatch = lower.match(/^(.+?)\s+with\s+(?:the\s+)?["']?([^"']+?)["']?\s+label\s+remain\s+(.+)$/);
1872
+ if (withLabelRemainMatch) {
1873
+ const remainState = withLabelRemainMatch[3]?.trim() ?? "";
1874
+ const STATE_OPPOSITES = {
1875
+ open: "closed",
1876
+ closed: "open",
1877
+ active: "inactive",
1878
+ inactive: "active",
1879
+ pending: "completed",
1880
+ completed: "pending",
1881
+ enabled: "disabled",
1882
+ disabled: "enabled"
1883
+ };
1884
+ const oppositeState = STATE_OPPOSITES[remainState] ?? `not_${remainState}`;
1885
+ return {
1886
+ type: "no_matching",
1887
+ subject: withLabelRemainMatch[1]?.trim() ?? "",
1888
+ predicate: oppositeState,
1889
+ labelFilter: withLabelRemainMatch[2]?.trim()
1890
+ };
1891
+ }
1892
+ const remainMatch = lower.match(/^(?:recently\s+active\s+)?(.+?)\s+remain\s+(open|closed)$/);
1893
+ if (remainMatch) {
1894
+ return {
1895
+ type: "state_check",
1896
+ subject: remainMatch[1]?.trim() ?? "",
1897
+ predicate: remainMatch[2]?.trim()
1898
+ };
1899
+ }
1900
+ const exactLabelMatch = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+have\s+(?:the\s+)?["']?([^"']+?)["']?\s+label$/);
1901
+ if (exactLabelMatch) {
1902
+ return {
1903
+ type: "exact_count",
1904
+ subject: exactLabelMatch[2]?.trim() ?? "",
1905
+ value: parseInt(exactLabelMatch[1] ?? "0", 10),
1906
+ labelFilter: exactLabelMatch[3]?.trim()
1907
+ };
1908
+ }
1909
+ const allHaveAtLeastMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+at\s+least\s+one\s+(.+)$/);
1910
+ if (allHaveAtLeastMatch) {
1911
+ return {
1912
+ type: "min_count",
1913
+ subject: allHaveAtLeastMatch[2]?.trim() ?? "",
1914
+ value: parseInt(allHaveAtLeastMatch[1] ?? "0", 10),
1915
+ predicate: cleanPredicate(allHaveAtLeastMatch[3]?.trim() ?? "")
1916
+ };
1917
+ }
1918
+ const allHaveMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+(.+)$/);
1919
+ if (allHaveMatch) {
1920
+ return {
1921
+ type: "min_count",
1922
+ subject: allHaveMatch[2]?.trim() ?? "",
1923
+ value: parseInt(allHaveMatch[1] ?? "0", 10),
1924
+ predicate: cleanPredicate(allHaveMatch[3]?.trim() ?? "")
1925
+ };
1926
+ }
1927
+ const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
1715
1928
  if (exactWithVerb) {
1716
1929
  return {
1717
1930
  type: "exact_count",
@@ -1728,7 +1941,7 @@ function parseAssertion(description) {
1728
1941
  value: parseInt(exactWithoutVerb[1] ?? "0", 10)
1729
1942
  };
1730
1943
  }
1731
- const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
1944
+ const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
1732
1945
  if (minWithVerb) {
1733
1946
  return {
1734
1947
  type: "min_count",
@@ -1790,6 +2003,95 @@ function parseAssertion(description) {
1790
2003
  if (/^no\s+errors?\s+(in\s+)?(trace|log|output)/i.test(lower)) {
1791
2004
  return { type: "no_errors", subject: "trace" };
1792
2005
  }
2006
+ const agentFewerMatch = lower.match(/^the\s+agent\s+completed\s+in\s+fewer\s+than\s+(\d+)\s+tool\s+calls?$/);
2007
+ if (agentFewerMatch) {
2008
+ return {
2009
+ type: "trace_count",
2010
+ subject: "tool calls",
2011
+ value: parseInt(agentFewerMatch[1] ?? "1", 10) - 1
2012
+ };
2013
+ }
2014
+ const postedInChannelMatch = lower.match(/^a\s+(.+?)\s+was\s+(?:posted|created|sent)\s+in\s+#(\w[\w-]*)(?:\s+.+)?$/);
2015
+ if (postedInChannelMatch) {
2016
+ return {
2017
+ type: "channel_check",
2018
+ subject: postedInChannelMatch[1]?.trim() ?? "",
2019
+ channel: postedInChannelMatch[2]?.trim()
2020
+ };
2021
+ }
2022
+ const replyInChannelMatch = lower.match(/^a\s+reply\s+was\s+posted\s+in\s+#(\w[\w-]*)$/);
2023
+ if (replyInChannelMatch) {
2024
+ return {
2025
+ type: "channel_check",
2026
+ subject: "message",
2027
+ channel: replyInChannelMatch[1]?.trim()
2028
+ };
2029
+ }
2030
+ const noMessagesInMatch = lower.match(/^no\s+messages?\s+(?:about\s+.+?\s+)?(?:were|was)\s+(?:posted|created|sent)\s+in\s+(.+)$/);
2031
+ if (noMessagesInMatch) {
2032
+ const channelStr = noMessagesInMatch[1]?.trim() ?? "";
2033
+ const channels = channelStr.match(/#(\w[\w-]*)/g)?.map((c) => c.slice(1)) ?? [];
2034
+ if (channels.length === 0) {
2035
+ const bareChannels = channelStr.split(/\s+(?:or|and|,)\s+/).map((s) => s.trim()).filter(Boolean);
2036
+ channels.push(...bareChannels);
2037
+ }
2038
+ if (channels.length === 0 || channels.length === 1 && channels[0] === "") {
2039
+ return null;
2040
+ }
2041
+ return {
2042
+ type: "channel_check",
2043
+ subject: "message",
2044
+ channel: channels.join(","),
2045
+ negated: true
2046
+ };
2047
+ }
2048
+ const noCreatedInMatch = lower.match(/^no\s+(.+?)\s+(?:were|was|have been|had been)\s+(?:created|processed|charged|posted|sent|made|transferred)\s+(?:in|on|to|from|with|for|via)\s+(.+)$/);
2049
+ if (noCreatedInMatch) {
2050
+ return {
2051
+ type: "exact_count",
2052
+ subject: noCreatedInMatch[1]?.trim() ?? "",
2053
+ value: 0,
2054
+ targetService: noCreatedInMatch[2]?.trim()
2055
+ };
2056
+ }
2057
+ const totalAmountMatch = lower.match(/^the\s+total\s+amount\s+(?:paid|charged|spent|transferred)\s*(?:out\s+)?is\s+\$?([\d,]+(?:\.\d+)?)$/);
2058
+ if (totalAmountMatch) {
2059
+ return {
2060
+ type: "comparison",
2061
+ subject: "total amount",
2062
+ value: parseFloat((totalAmountMatch[1] ?? "0").replace(/,/g, ""))
2063
+ };
2064
+ }
2065
+ const doesNotContainMatch = lowerOriginal.match(/^the\s+(.+?)\s+(?:body|content)\s+does\s+not\s+(?:contain|include)\s+(.+)$/);
2066
+ if (doesNotContainMatch) {
2067
+ const patternsRaw = doesNotContainMatch[2]?.trim() ?? "";
2068
+ const patterns = [];
2069
+ const quotedMatches = patternsRaw.matchAll(/["']([^"']+)["']/g);
2070
+ for (const qm of quotedMatches) {
2071
+ patterns.push(qm[1] ?? "");
2072
+ }
2073
+ const dollarMatches = patternsRaw.matchAll(/\$[\d,]+/g);
2074
+ for (const dm of dollarMatches) {
2075
+ patterns.push(dm[0] ?? "");
2076
+ }
2077
+ if (patterns.length === 0) {
2078
+ patterns.push(patternsRaw);
2079
+ }
2080
+ return {
2081
+ type: "content_check",
2082
+ subject: doesNotContainMatch[1]?.trim() ?? "",
2083
+ contentPatterns: patterns,
2084
+ negated: true
2085
+ };
2086
+ }
2087
+ const wasNotCreatedMatch = lower.match(/^the\s+(.+?)\s+was\s+not\s+created\s+in\s+(?:the\s+)?(?:public\s+)?(?:repository\s+)?["']?(.+?)["']?$/);
2088
+ if (wasNotCreatedMatch) {
2089
+ return {
2090
+ type: "not_exists",
2091
+ subject: wasNotCreatedMatch[1]?.trim() ?? "",
2092
+ targetService: wasNotCreatedMatch[2]?.trim()
2093
+ };
2094
+ }
1793
2095
  const stateMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:is|was|has been|should be)\s+(created|merged|closed|open|deleted|removed|resolved|approved|rejected)/);
1794
2096
  if (stateMatch) {
1795
2097
  return {
@@ -1798,6 +2100,10 @@ function parseAssertion(description) {
1798
2100
  predicate: stateMatch[2]?.trim()
1799
2101
  };
1800
2102
  }
2103
+ const wasCreatedMatch = lower.match(/^a\s+(.+?)\s+was\s+created\s+in\s+(?:a|the)\s+(.+)$/);
2104
+ if (wasCreatedMatch) {
2105
+ return { type: "exists", subject: wasCreatedMatch[1]?.trim() ?? "" };
2106
+ }
1801
2107
  const existsMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:exists?|is present|was created|has been created)/);
1802
2108
  if (existsMatch) {
1803
2109
  return { type: "exists", subject: existsMatch[1]?.trim() ?? "" };
@@ -1930,6 +2236,14 @@ function evaluateDeterministic(criterion, stateView) {
1930
2236
  assertion.predicate
1931
2237
  );
1932
2238
  }
2239
+ if (assertion.value === 0 && assertion.type === "exact_count") {
2240
+ return {
2241
+ criterionId: criterion.id,
2242
+ status: "pass",
2243
+ confidence: 0.9,
2244
+ explanation: `No "${assertion.subject}" found in twin state (0 = 0)`
2245
+ };
2246
+ }
1933
2247
  return {
1934
2248
  criterionId: criterion.id,
1935
2249
  status: "fail",
@@ -1937,9 +2251,44 @@ function evaluateDeterministic(criterion, stateView) {
1937
2251
  explanation: `Could not find "${assertion.subject}" in twin state`
1938
2252
  };
1939
2253
  }
2254
+ if (assertion.value === 0 && assertion.type === "exact_count" && assertion.targetService) {
2255
+ const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
2256
+ const newCount = afterItems.length - (beforeItems?.length ?? 0);
2257
+ return evaluateCount(
2258
+ criterion.id,
2259
+ assertion.type,
2260
+ 0,
2261
+ Math.max(0, newCount),
2262
+ assertion.subject,
2263
+ `newly created in ${assertion.targetService}`
2264
+ );
2265
+ }
2266
+ let filteredItems = afterItems;
2267
+ if (assertion.labelFilter) {
2268
+ filteredItems = afterItems.filter((item) => {
2269
+ if (typeof item !== "object" || item === null) return false;
2270
+ const obj = item;
2271
+ const labels = obj["labels"];
2272
+ if (Array.isArray(labels)) {
2273
+ return labels.some((l) => {
2274
+ const labelName = typeof l === "string" ? l : l?.["name"];
2275
+ return String(labelName).toLowerCase() === assertion.labelFilter?.toLowerCase();
2276
+ });
2277
+ }
2278
+ return false;
2279
+ });
2280
+ return evaluateCount(
2281
+ criterion.id,
2282
+ assertion.type,
2283
+ assertion.value ?? 0,
2284
+ filteredItems.length,
2285
+ assertion.subject,
2286
+ `labeled "${assertion.labelFilter}"`
2287
+ );
2288
+ }
1940
2289
  if (assertion.predicate) {
1941
2290
  const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
1942
- const afterFiltered = filterByPredicate(afterItems, assertion.predicate);
2291
+ const afterFiltered = filterByPredicate(filteredItems, assertion.predicate);
1943
2292
  if (beforeItems) {
1944
2293
  const beforeFiltered = filterByPredicate(beforeItems, assertion.predicate);
1945
2294
  const newlyMatching = afterFiltered.length - beforeFiltered.length;
@@ -1965,7 +2314,7 @@ function evaluateDeterministic(criterion, stateView) {
1965
2314
  criterion.id,
1966
2315
  assertion.type,
1967
2316
  assertion.value ?? 0,
1968
- afterItems.length,
2317
+ filteredItems.length,
1969
2318
  assertion.subject,
1970
2319
  assertion.predicate
1971
2320
  );
@@ -2013,12 +2362,27 @@ function evaluateDeterministic(criterion, stateView) {
2013
2362
  }
2014
2363
  case "not_exists": {
2015
2364
  const items = resolveSubjectInState(assertion.subject, stateView.after);
2016
- const absent = items === null || items.length === 0;
2365
+ let filteredItems = items;
2366
+ if (filteredItems && assertion.targetService) {
2367
+ const target = assertion.targetService.toLowerCase();
2368
+ const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
2369
+ const beforeCount = beforeItems?.length ?? 0;
2370
+ const newItems = filteredItems.slice(beforeCount);
2371
+ filteredItems = newItems.filter((item) => {
2372
+ if (typeof item !== "object" || item === null) return false;
2373
+ const obj = item;
2374
+ const repo = String(obj["repository"] ?? obj["repo"] ?? obj["fullName"] ?? obj["full_name"] ?? "").toLowerCase();
2375
+ const repoName = String(obj["repository_name"] ?? obj["repo_name"] ?? "").toLowerCase();
2376
+ return repo.includes(target) || repoName.includes(target) || target.includes(repo) || target.includes(repoName);
2377
+ });
2378
+ }
2379
+ const absent = filteredItems === null || filteredItems.length === 0;
2380
+ const targetDesc = assertion.targetService ? ` in "${assertion.targetService}"` : "";
2017
2381
  return {
2018
2382
  criterionId: criterion.id,
2019
2383
  status: absent ? "pass" : "fail",
2020
2384
  confidence: 1,
2021
- explanation: absent ? `"${assertion.subject}" does not exist in twin state` : `"${assertion.subject}" still exists in twin state`
2385
+ explanation: absent ? `"${assertion.subject}" does not exist${targetDesc} in twin state` : `"${assertion.subject}" still exists${targetDesc} in twin state (found ${filteredItems?.length ?? 0})`
2022
2386
  };
2023
2387
  }
2024
2388
  case "state_check": {
@@ -2041,6 +2405,51 @@ function evaluateDeterministic(criterion, stateView) {
2041
2405
  };
2042
2406
  }
2043
2407
  case "comparison": {
2408
+ if (assertion.subject === "total amount") {
2409
+ const flat = flattenTwinState(stateView.after);
2410
+ let totalAmount = 0;
2411
+ for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
2412
+ const items = flat[key];
2413
+ if (Array.isArray(items)) {
2414
+ for (const item of items) {
2415
+ if (typeof item === "object" && item !== null) {
2416
+ const obj = item;
2417
+ const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
2418
+ const status = String(obj["status"] ?? "");
2419
+ if (status === "succeeded" || status === "paid" || status === "complete") {
2420
+ totalAmount += amount;
2421
+ }
2422
+ }
2423
+ }
2424
+ }
2425
+ }
2426
+ const flatBefore = flattenTwinState(stateView.before);
2427
+ let beforeAmount = 0;
2428
+ for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
2429
+ const items = flatBefore[key];
2430
+ if (Array.isArray(items)) {
2431
+ for (const item of items) {
2432
+ if (typeof item === "object" && item !== null) {
2433
+ const obj = item;
2434
+ const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
2435
+ const status = String(obj["status"] ?? "");
2436
+ if (status === "succeeded" || status === "paid" || status === "complete") {
2437
+ beforeAmount += amount;
2438
+ }
2439
+ }
2440
+ }
2441
+ }
2442
+ }
2443
+ const netAmount = totalAmount - beforeAmount;
2444
+ const expectedCents = (assertion.value ?? 0) * 100;
2445
+ const passed = netAmount <= expectedCents;
2446
+ return {
2447
+ criterionId: criterion.id,
2448
+ status: passed ? "pass" : "fail",
2449
+ confidence: 1,
2450
+ explanation: passed ? `Total new amount paid out is $${netAmount / 100} (expected $${assertion.value ?? 0})` : `Total new amount paid out is $${netAmount / 100}, expected $${assertion.value ?? 0}`
2451
+ };
2452
+ }
2044
2453
  return {
2045
2454
  criterionId: criterion.id,
2046
2455
  status: "fail",
@@ -2048,6 +2457,123 @@ function evaluateDeterministic(criterion, stateView) {
2048
2457
  explanation: `Comparison assertion type not fully implemented for: "${criterion.description}"`
2049
2458
  };
2050
2459
  }
2460
+ case "trace_count": {
2461
+ const traceCount = stateView.trace.length;
2462
+ const maxAllowed = assertion.value ?? 0;
2463
+ const passed = traceCount <= maxAllowed;
2464
+ return {
2465
+ criterionId: criterion.id,
2466
+ status: passed ? "pass" : "fail",
2467
+ confidence: 1,
2468
+ explanation: passed ? `Agent made ${traceCount} tool calls (<= ${maxAllowed})` : `Agent made ${traceCount} tool calls, expected at most ${maxAllowed}`
2469
+ };
2470
+ }
2471
+ case "channel_check": {
2472
+ const flat = flattenTwinState(stateView.after);
2473
+ const flatBefore = flattenTwinState(stateView.before);
2474
+ const channels = assertion.channel?.split(",") ?? [];
2475
+ const negated = assertion.negated ?? false;
2476
+ const messages = flat["messages"] ?? [];
2477
+ const messagesBefore = flatBefore["messages"] ?? [];
2478
+ const beforeIds = new Set(messagesBefore.map((m) => {
2479
+ if (typeof m === "object" && m !== null) {
2480
+ return m["ts"] ?? m["id"];
2481
+ }
2482
+ return void 0;
2483
+ }));
2484
+ const newMessages = messages.filter((m) => {
2485
+ if (typeof m !== "object" || m === null) return false;
2486
+ const obj = m;
2487
+ const id = obj["ts"] ?? obj["id"];
2488
+ return !beforeIds.has(id);
2489
+ });
2490
+ const channelNames = flat["channels"] ?? [];
2491
+ const channelIdMap = {};
2492
+ for (const ch of channelNames) {
2493
+ if (typeof ch === "object" && ch !== null) {
2494
+ const obj = ch;
2495
+ const name = String(obj["name"] ?? "");
2496
+ const id = String(obj["id"] ?? "");
2497
+ channelIdMap[id] = name;
2498
+ }
2499
+ }
2500
+ const matchingMessages = newMessages.filter((m) => {
2501
+ if (typeof m !== "object" || m === null) return false;
2502
+ const obj = m;
2503
+ const channelId = String(obj["channel"] ?? "");
2504
+ const channelName = channelIdMap[channelId] ?? channelId;
2505
+ return channels.some((c) => channelName === c || channelId === c);
2506
+ });
2507
+ if (negated) {
2508
+ const passed = matchingMessages.length === 0;
2509
+ return {
2510
+ criterionId: criterion.id,
2511
+ status: passed ? "pass" : "fail",
2512
+ confidence: 1,
2513
+ explanation: passed ? `No new messages were posted in #${channels.join(", #")}` : `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}`
2514
+ };
2515
+ } else {
2516
+ const passed = matchingMessages.length > 0;
2517
+ return {
2518
+ criterionId: criterion.id,
2519
+ status: passed ? "pass" : "fail",
2520
+ confidence: 1,
2521
+ explanation: passed ? `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}` : `No new messages found in #${channels.join(", #")}`
2522
+ };
2523
+ }
2524
+ }
2525
+ case "content_check": {
2526
+ const flat = flattenTwinState(stateView.after);
2527
+ const negated = assertion.negated ?? false;
2528
+ const patterns = assertion.contentPatterns ?? [];
2529
+ const subjectWords = assertion.subject.toLowerCase().split(/\s+/);
2530
+ let contentToCheck = "";
2531
+ const issues = flat["issues"] ?? [];
2532
+ if (subjectWords.includes("issue")) {
2533
+ for (const issue of issues) {
2534
+ if (typeof issue === "object" && issue !== null) {
2535
+ const obj = issue;
2536
+ contentToCheck += String(obj["body"] ?? "") + " " + String(obj["title"] ?? "") + " ";
2537
+ }
2538
+ }
2539
+ }
2540
+ const messages = flat["messages"] ?? [];
2541
+ if (subjectWords.includes("message") || subjectWords.includes("reply")) {
2542
+ for (const msg of messages) {
2543
+ if (typeof msg === "object" && msg !== null) {
2544
+ const obj = msg;
2545
+ contentToCheck += String(obj["text"] ?? "") + " ";
2546
+ }
2547
+ }
2548
+ }
2549
+ if (!contentToCheck.trim()) {
2550
+ return {
2551
+ criterionId: criterion.id,
2552
+ status: negated ? "pass" : "fail",
2553
+ confidence: 0.7,
2554
+ explanation: negated ? `No ${assertion.subject} content found to check \u2014 passes by default` : `No ${assertion.subject} content found in twin state`
2555
+ };
2556
+ }
2557
+ const lowerContent = contentToCheck.toLowerCase();
2558
+ const foundPatterns = patterns.filter((p) => lowerContent.includes(p.toLowerCase()));
2559
+ if (negated) {
2560
+ const passed = foundPatterns.length === 0;
2561
+ return {
2562
+ criterionId: criterion.id,
2563
+ status: passed ? "pass" : "fail",
2564
+ confidence: 1,
2565
+ explanation: passed ? `Content does not contain any of the checked patterns` : `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}`
2566
+ };
2567
+ } else {
2568
+ const passed = foundPatterns.length > 0;
2569
+ return {
2570
+ criterionId: criterion.id,
2571
+ status: passed ? "pass" : "fail",
2572
+ confidence: 1,
2573
+ explanation: passed ? `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}` : `Content does not contain any of: ${patterns.map((p) => `"${p}"`).join(", ")}`
2574
+ };
2575
+ }
2576
+ }
2051
2577
  }
2052
2578
  }
2053
2579
  function evaluateCount(criterionId, type, expected, actual, subject, predicate) {
@@ -2083,8 +2609,154 @@ function evaluateCount(criterionId, type, expected, actual, subject, predicate)
2083
2609
  }
2084
2610
  }
2085
2611
 
2612
+ // src/evaluator/llm-provider.ts
2613
+ function detectProvider(model) {
2614
+ if (model.startsWith("gemini-")) return "gemini";
2615
+ if (model.startsWith("claude-")) return "anthropic";
2616
+ if (model.startsWith("gpt-") || model.startsWith("o1-") || model.startsWith("o3-") || model.startsWith("o4-")) return "openai";
2617
+ if (model.startsWith("llama") || model.startsWith("mixtral") || model.startsWith("mistral") || model.startsWith("deepseek") || model.startsWith("qwen") || model.startsWith("codestral") || model.startsWith("command")) return "openai-compatible";
2618
+ return "openai-compatible";
2619
+ }
2620
+ var PROVIDER_ENV_VARS = {
2621
+ gemini: "GEMINI_API_KEY",
2622
+ anthropic: "ANTHROPIC_API_KEY",
2623
+ openai: "OPENAI_API_KEY",
2624
+ "openai-compatible": "LLM_API_KEY"
2625
+ };
2626
+ function getProviderEnvVar(provider) {
2627
+ return PROVIDER_ENV_VARS[provider];
2628
+ }
2629
+ function resolveProviderApiKey(explicitKey, provider) {
2630
+ if (explicitKey) return explicitKey;
2631
+ return process.env[PROVIDER_ENV_VARS[provider]] ?? "";
2632
+ }
2633
+ var REQUEST_TIMEOUT_MS = 6e4;
2634
+ async function callLlm(options) {
2635
+ debug("Calling LLM provider", { provider: options.provider, model: options.model });
2636
+ switch (options.provider) {
2637
+ case "gemini":
2638
+ return callGemini(options);
2639
+ case "anthropic":
2640
+ return callAnthropic(options);
2641
+ case "openai":
2642
+ return callOpenAi(options);
2643
+ case "openai-compatible":
2644
+ return callOpenAiCompatible(options);
2645
+ }
2646
+ }
2647
+ async function callGemini(options) {
2648
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${options.model}:generateContent`;
2649
+ const response = await fetch(url, {
2650
+ method: "POST",
2651
+ headers: {
2652
+ "Content-Type": "application/json",
2653
+ "x-goog-api-key": options.apiKey
2654
+ },
2655
+ body: JSON.stringify({
2656
+ systemInstruction: { parts: [{ text: options.systemPrompt }] },
2657
+ contents: [{ parts: [{ text: options.userPrompt }] }],
2658
+ generationConfig: { maxOutputTokens: options.maxTokens }
2659
+ }),
2660
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2661
+ });
2662
+ if (!response.ok) {
2663
+ const errorText = await response.text().catch(() => "");
2664
+ throw new Error(`Gemini API error: ${response.status} ${errorText.slice(0, 200)}`);
2665
+ }
2666
+ const data = await response.json();
2667
+ const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
2668
+ if (!text) throw new Error("Gemini returned no text content");
2669
+ if (data.candidates?.[0]?.finishReason === "MAX_TOKENS") {
2670
+ warn("Gemini response was truncated (hit max output tokens)");
2671
+ }
2672
+ return text;
2673
+ }
2674
+ async function callAnthropic(options) {
2675
+ const response = await fetch("https://api.anthropic.com/v1/messages", {
2676
+ method: "POST",
2677
+ headers: {
2678
+ "content-type": "application/json",
2679
+ "x-api-key": options.apiKey,
2680
+ "anthropic-version": "2023-06-01"
2681
+ },
2682
+ body: JSON.stringify({
2683
+ model: options.model,
2684
+ max_tokens: options.maxTokens,
2685
+ system: options.systemPrompt,
2686
+ messages: [{ role: "user", content: options.userPrompt }]
2687
+ }),
2688
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2689
+ });
2690
+ if (!response.ok) {
2691
+ const errorText = await response.text().catch(() => "");
2692
+ throw new Error(`Anthropic API error: ${response.status} ${errorText.slice(0, 200)}`);
2693
+ }
2694
+ const data = await response.json();
2695
+ const textBlock = data.content?.find((block) => block.type === "text");
2696
+ if (!textBlock?.text) throw new Error("Anthropic returned no text content");
2697
+ return textBlock.text;
2698
+ }
2699
+ async function callOpenAi(options) {
2700
+ const response = await fetch("https://api.openai.com/v1/chat/completions", {
2701
+ method: "POST",
2702
+ headers: {
2703
+ "Content-Type": "application/json",
2704
+ "Authorization": `Bearer ${options.apiKey}`
2705
+ },
2706
+ body: JSON.stringify({
2707
+ model: options.model,
2708
+ max_tokens: options.maxTokens,
2709
+ messages: [
2710
+ { role: "system", content: options.systemPrompt },
2711
+ { role: "user", content: options.userPrompt }
2712
+ ]
2713
+ }),
2714
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2715
+ });
2716
+ if (!response.ok) {
2717
+ const errorText = await response.text().catch(() => "");
2718
+ throw new Error(`OpenAI API error: ${response.status} ${errorText.slice(0, 200)}`);
2719
+ }
2720
+ const data = await response.json();
2721
+ const content = data.choices?.[0]?.message?.content;
2722
+ if (!content) throw new Error("OpenAI returned no content");
2723
+ return content;
2724
+ }
2725
+ async function callOpenAiCompatible(options) {
2726
+ if (!options.baseUrl) {
2727
+ throw new Error(
2728
+ "baseUrl is required for openai-compatible provider. Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
2729
+ );
2730
+ }
2731
+ const url = `${options.baseUrl.replace(/\/+$/, "")}/v1/chat/completions`;
2732
+ debug("Calling OpenAI-compatible endpoint", { url, model: options.model });
2733
+ const response = await fetch(url, {
2734
+ method: "POST",
2735
+ headers: {
2736
+ "Content-Type": "application/json",
2737
+ "Authorization": `Bearer ${options.apiKey}`
2738
+ },
2739
+ body: JSON.stringify({
2740
+ model: options.model,
2741
+ max_tokens: options.maxTokens,
2742
+ messages: [
2743
+ { role: "system", content: options.systemPrompt },
2744
+ { role: "user", content: options.userPrompt }
2745
+ ]
2746
+ }),
2747
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2748
+ });
2749
+ if (!response.ok) {
2750
+ const errorText = await response.text().catch(() => "");
2751
+ throw new Error(`OpenAI-compatible API error (${options.baseUrl}): ${response.status} ${errorText.slice(0, 200)}`);
2752
+ }
2753
+ const data = await response.json();
2754
+ const content = data.choices?.[0]?.message?.content;
2755
+ if (!content) throw new Error("OpenAI-compatible API returned no content");
2756
+ return content;
2757
+ }
2758
+
2086
2759
  // src/evaluator/llm-judge.ts
2087
- import Anthropic from "@anthropic-ai/sdk";
2088
2760
  var SYSTEM_PROMPT = `You are an evaluator for AI agent testing. You assess whether an agent successfully met a specific success criterion during a scenario run.
2089
2761
 
2090
2762
  You will receive:
@@ -2192,13 +2864,6 @@ function parseJudgeResponse(text) {
2192
2864
  };
2193
2865
  }
2194
2866
  }
2195
- var clientInstance = null;
2196
- function getClient(apiKey) {
2197
- if (!clientInstance) {
2198
- clientInstance = new Anthropic({ apiKey });
2199
- }
2200
- return clientInstance;
2201
- }
2202
2867
  async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAfter, stateDiff, trace, options) {
2203
2868
  const context = {
2204
2869
  criterion,
@@ -2208,43 +2873,35 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
2208
2873
  stateDiff,
2209
2874
  trace
2210
2875
  };
2211
- if (!options.apiKey) {
2212
- error("No API key provided for LLM evaluation");
2876
+ const provider = detectProvider(options.model);
2877
+ const apiKey = resolveProviderApiKey(options.apiKey, provider);
2878
+ if (!apiKey) {
2879
+ const envVar = getProviderEnvVar(provider);
2880
+ error(`No API key for ${provider} evaluation`);
2213
2881
  return {
2214
2882
  criterionId: criterion.id,
2215
2883
  status: "fail",
2216
2884
  confidence: 0,
2217
- explanation: "No ANTHROPIC_API_KEY configured for probabilistic evaluation"
2885
+ explanation: `No ${envVar} configured for probabilistic evaluation`
2218
2886
  };
2219
2887
  }
2220
- const client = getClient(options.apiKey);
2221
2888
  debug("Calling LLM judge", {
2222
2889
  criterion: criterion.id,
2223
2890
  model: options.model,
2891
+ provider,
2224
2892
  traceLength: String(trace.length)
2225
2893
  });
2226
2894
  try {
2227
- const response = await client.messages.create({
2895
+ const text = await callLlm({
2896
+ provider,
2228
2897
  model: options.model,
2229
- max_tokens: 512,
2230
- system: SYSTEM_PROMPT,
2231
- messages: [
2232
- {
2233
- role: "user",
2234
- content: buildUserPrompt(context)
2235
- }
2236
- ]
2898
+ apiKey,
2899
+ systemPrompt: SYSTEM_PROMPT,
2900
+ userPrompt: buildUserPrompt(context),
2901
+ maxTokens: 512,
2902
+ baseUrl: options.baseUrl
2237
2903
  });
2238
- const textBlock = response.content.find((block) => block.type === "text");
2239
- if (!textBlock || textBlock.type !== "text") {
2240
- return {
2241
- criterionId: criterion.id,
2242
- status: "fail",
2243
- confidence: 0.3,
2244
- explanation: "LLM returned no text content"
2245
- };
2246
- }
2247
- const judgeResult = parseJudgeResponse(textBlock.text);
2904
+ const judgeResult = parseJudgeResponse(text);
2248
2905
  debug("LLM judge result", {
2249
2906
  criterion: criterion.id,
2250
2907
  status: judgeResult.status,
@@ -2310,7 +2967,18 @@ async function evaluateRun(criteria, context, config) {
2310
2967
  status: result.status
2311
2968
  });
2312
2969
  }
2970
+ const apiKeyPresent = config.apiKey.trim().length > 0 && config.apiKey !== "missing";
2313
2971
  for (const criterion of probabilisticCriteria) {
2972
+ if (!apiKeyPresent) {
2973
+ progress(`Skipping [P] ${criterion.description} (no API key)`);
2974
+ evaluations.push({
2975
+ criterionId: criterion.id,
2976
+ status: "fail",
2977
+ confidence: 0,
2978
+ explanation: "Skipped: no ANTHROPIC_API_KEY configured for LLM evaluation"
2979
+ });
2980
+ continue;
2981
+ }
2314
2982
  progress(`Evaluating [P] ${criterion.description}`);
2315
2983
  const result = await evaluateWithLlm(
2316
2984
  criterion,
@@ -2319,7 +2987,7 @@ async function evaluateRun(criteria, context, config) {
2319
2987
  context.stateAfter,
2320
2988
  context.stateDiff,
2321
2989
  context.trace,
2322
- { apiKey: config.apiKey, model: config.model }
2990
+ { apiKey: config.apiKey, model: config.model, baseUrl: config.baseUrl }
2323
2991
  );
2324
2992
  evaluations.push(result);
2325
2993
  debug("Probabilistic evaluation", {
@@ -2386,28 +3054,34 @@ function generateSummary(evaluations, satisfactionScore) {
2386
3054
  }
2387
3055
 
2388
3056
  // src/telemetry/recorder.ts
2389
- import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync6, readdirSync, existsSync as existsSync5, unlinkSync as unlinkSync2, statSync } from "fs";
3057
+ import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync7, readdirSync, existsSync as existsSync6, unlinkSync as unlinkSync2, statSync } from "fs";
2390
3058
  import { join as join4 } from "path";
2391
3059
  import { randomUUID } from "crypto";
2392
3060
 
2393
3061
  // src/config/config.ts
2394
- import { readFileSync as readFileSync5, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync4, chmodSync } from "fs";
3062
+ import { readFileSync as readFileSync6, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync5 } from "fs";
2395
3063
  import { join as join3 } from "path";
2396
3064
  import { homedir } from "os";
2397
- import { z } from "zod";
3065
+ import { z as z2 } from "zod";
2398
3066
  var ARCHAL_DIR_NAME = ".archal";
2399
3067
  var CONFIG_FILE_NAME = "config.json";
2400
- var evaluatorConfigSchema = z.object({
2401
- model: z.string().default("claude-sonnet-4-20250514"),
2402
- apiKey: z.string().default("env:ANTHROPIC_API_KEY")
3068
+ var evaluatorConfigSchema = z2.object({
3069
+ model: z2.string().default("gemini-2.0-flash"),
3070
+ apiKey: z2.string().default("env:GEMINI_API_KEY"),
3071
+ baseUrl: z2.string().optional()
3072
+ });
3073
+ var seedGenerationConfigSchema = z2.object({
3074
+ model: z2.string().default("gemini-3-flash-preview"),
3075
+ geminiApiKey: z2.string().default("env:GEMINI_API_KEY")
2403
3076
  });
2404
- var defaultsConfigSchema = z.object({
2405
- runs: z.number().int().positive().default(5),
2406
- timeout: z.number().int().positive().default(120)
3077
+ var defaultsConfigSchema = z2.object({
3078
+ runs: z2.number().int().positive().default(5),
3079
+ timeout: z2.number().int().positive().default(120)
2407
3080
  });
2408
- var configFileSchema = z.object({
2409
- telemetry: z.boolean().default(false),
3081
+ var configFileSchema = z2.object({
3082
+ telemetry: z2.boolean().default(false),
2410
3083
  evaluator: evaluatorConfigSchema.default({}),
3084
+ seedGeneration: seedGenerationConfigSchema.default({}),
2411
3085
  defaults: defaultsConfigSchema.default({})
2412
3086
  });
2413
3087
  function getArchalDir() {
@@ -2418,7 +3092,7 @@ function getConfigPath() {
2418
3092
  }
2419
3093
  function ensureArchalDir() {
2420
3094
  const dir = getArchalDir();
2421
- if (!existsSync4(dir)) {
3095
+ if (!existsSync5(dir)) {
2422
3096
  mkdirSync2(dir, { recursive: true });
2423
3097
  debug("Created archal directory", { path: dir });
2424
3098
  }
@@ -2426,19 +3100,19 @@ function ensureArchalDir() {
2426
3100
  }
2427
3101
  function loadConfigFile() {
2428
3102
  const configPath = getConfigPath();
2429
- if (!existsSync4(configPath)) {
3103
+ if (!existsSync5(configPath)) {
2430
3104
  debug("No config file found, using defaults", { path: configPath });
2431
3105
  return configFileSchema.parse({});
2432
3106
  }
2433
3107
  try {
2434
- const raw = readFileSync5(configPath, "utf-8");
3108
+ const raw = readFileSync6(configPath, "utf-8");
2435
3109
  const parsed = JSON.parse(raw);
2436
3110
  const config = configFileSchema.parse(parsed);
2437
3111
  debug("Loaded config file", { path: configPath });
2438
3112
  return config;
2439
3113
  } catch (err) {
2440
3114
  const message = err instanceof Error ? err.message : String(err);
2441
- warn(`Failed to parse config file at ${configPath}: ${message}`);
3115
+ error(`Failed to parse config file at ${configPath}: ${message}. Using defaults.`);
2442
3116
  return configFileSchema.parse({});
2443
3117
  }
2444
3118
  }
@@ -2455,16 +3129,24 @@ function loadConfig() {
2455
3129
  const envModel = process.env["ARCHAL_MODEL"];
2456
3130
  const envRuns = process.env["ARCHAL_RUNS"];
2457
3131
  const envTimeout = process.env["ARCHAL_TIMEOUT"];
2458
- const envApiKey = process.env["ANTHROPIC_API_KEY"];
3132
+ const envBaseUrl = process.env["ARCHAL_EVALUATOR_BASE_URL"];
3133
+ const envGeminiApiKey = process.env["GEMINI_API_KEY"];
3134
+ const envSeedModel = process.env["ARCHAL_SEED_MODEL"];
2459
3135
  const telemetry = envTelemetry !== void 0 ? envTelemetry === "true" : file.telemetry;
2460
3136
  const model = envModel ?? file.evaluator.model;
2461
3137
  const runs = envRuns !== void 0 ? parseInt(envRuns, 10) : file.defaults.runs;
2462
3138
  const timeout = envTimeout !== void 0 ? parseInt(envTimeout, 10) : file.defaults.timeout;
2463
- const apiKey = envApiKey ?? resolveApiKey(file.evaluator.apiKey);
3139
+ const apiKey = resolveApiKey(file.evaluator.apiKey);
3140
+ const geminiApiKey = envGeminiApiKey ?? resolveApiKey(file.seedGeneration.geminiApiKey);
3141
+ const seedModel = envSeedModel ?? file.seedGeneration.model;
3142
+ const baseUrl = envBaseUrl ?? file.evaluator.baseUrl;
2464
3143
  return {
2465
3144
  telemetry,
2466
3145
  apiKey,
2467
3146
  model,
3147
+ baseUrl,
3148
+ geminiApiKey,
3149
+ seedModel,
2468
3150
  runs: Number.isNaN(runs) ? 5 : runs,
2469
3151
  timeout: Number.isNaN(timeout) ? 120 : timeout,
2470
3152
  archalDir: getArchalDir(),
@@ -2475,9 +3157,9 @@ function saveConfig(config) {
2475
3157
  const dir = ensureArchalDir();
2476
3158
  const configPath = join3(dir, CONFIG_FILE_NAME);
2477
3159
  let existing;
2478
- if (existsSync4(configPath)) {
3160
+ if (existsSync5(configPath)) {
2479
3161
  try {
2480
- const raw = readFileSync5(configPath, "utf-8");
3162
+ const raw = readFileSync6(configPath, "utf-8");
2481
3163
  existing = configFileSchema.parse(JSON.parse(raw));
2482
3164
  } catch {
2483
3165
  existing = configFileSchema.parse({});
@@ -2491,31 +3173,27 @@ function saveConfig(config) {
2491
3173
  ...existing.evaluator,
2492
3174
  ...config.evaluator
2493
3175
  },
3176
+ seedGeneration: {
3177
+ ...existing.seedGeneration,
3178
+ ...config.seedGeneration
3179
+ },
2494
3180
  defaults: {
2495
3181
  ...existing.defaults,
2496
3182
  ...config.defaults
2497
3183
  }
2498
3184
  };
2499
- writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", "utf-8");
2500
- try {
2501
- chmodSync(configPath, 384);
2502
- } catch {
2503
- }
3185
+ writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
2504
3186
  debug("Saved config file", { path: configPath });
2505
3187
  }
2506
3188
  function initConfig() {
2507
3189
  const configPath = getConfigPath();
2508
- if (existsSync4(configPath)) {
3190
+ if (existsSync5(configPath)) {
2509
3191
  warn(`Config file already exists at ${configPath}`);
2510
3192
  return configPath;
2511
3193
  }
2512
3194
  const defaultConfig = configFileSchema.parse({});
2513
3195
  ensureArchalDir();
2514
- writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", "utf-8");
2515
- try {
2516
- chmodSync(configPath, 384);
2517
- } catch {
2518
- }
3196
+ writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
2519
3197
  return configPath;
2520
3198
  }
2521
3199
  function setConfigValue(key, value) {
@@ -2530,13 +3208,20 @@ function setConfigValue(key, value) {
2530
3208
  }
2531
3209
  if (parts.length === 2) {
2532
3210
  const [section, prop] = parts;
2533
- if (section === "evaluator" && (prop === "model" || prop === "apiKey")) {
3211
+ if (section === "evaluator" && (prop === "model" || prop === "apiKey" || prop === "baseUrl")) {
2534
3212
  saveConfig({
2535
3213
  ...file,
2536
3214
  evaluator: { ...file.evaluator, [prop]: value }
2537
3215
  });
2538
3216
  return;
2539
3217
  }
3218
+ if (section === "seedGeneration" && (prop === "model" || prop === "geminiApiKey")) {
3219
+ saveConfig({
3220
+ ...file,
3221
+ seedGeneration: { ...file.seedGeneration, [prop]: value }
3222
+ });
3223
+ return;
3224
+ }
2540
3225
  if (section === "defaults" && (prop === "runs" || prop === "timeout")) {
2541
3226
  const numValue = parseInt(value, 10);
2542
3227
  if (Number.isNaN(numValue) || numValue <= 0) {
@@ -2550,7 +3235,7 @@ function setConfigValue(key, value) {
2550
3235
  }
2551
3236
  }
2552
3237
  throw new Error(
2553
- `Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout`
3238
+ `Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, evaluator.baseUrl, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout`
2554
3239
  );
2555
3240
  }
2556
3241
  function getConfigDisplay() {
@@ -2559,7 +3244,12 @@ function getConfigDisplay() {
2559
3244
  telemetry: resolved.telemetry,
2560
3245
  evaluator: {
2561
3246
  model: resolved.model,
2562
- apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)"
3247
+ apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)",
3248
+ ...resolved.baseUrl ? { baseUrl: resolved.baseUrl } : {}
3249
+ },
3250
+ seedGeneration: {
3251
+ model: resolved.seedModel,
3252
+ geminiApiKey: resolved.geminiApiKey ? "***" + resolved.geminiApiKey.slice(-4) : "(not set)"
2563
3253
  },
2564
3254
  defaults: {
2565
3255
  runs: resolved.runs,
@@ -2580,7 +3270,7 @@ function getTracesDir() {
2580
3270
  }
2581
3271
  function ensureTracesDir() {
2582
3272
  const dir = getTracesDir();
2583
- if (!existsSync5(dir)) {
3273
+ if (!existsSync6(dir)) {
2584
3274
  ensureArchalDir();
2585
3275
  mkdirSync3(dir, { recursive: true });
2586
3276
  }
@@ -2590,14 +3280,14 @@ function traceFilePath(id) {
2590
3280
  return join4(getTracesDir(), `${id}.json`);
2591
3281
  }
2592
3282
  function traceJsonFiles(dir) {
2593
- return existsSync5(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
3283
+ return existsSync6(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
2594
3284
  }
2595
3285
  function toMetadata(s) {
2596
3286
  return { id: s.id, scenarioTitle: s.scenarioTitle, timestamp: s.timestamp, satisfactionScore: s.satisfactionScore, runCount: s.runCount, entryCount: s.entries.length };
2597
3287
  }
2598
3288
  function loadTraceByPath(filePath) {
2599
3289
  try {
2600
- return JSON.parse(readFileSync6(filePath, "utf-8"));
3290
+ return JSON.parse(readFileSync7(filePath, "utf-8"));
2601
3291
  } catch (err) {
2602
3292
  warn(`Failed to load trace: ${err instanceof Error ? err.message : String(err)}`);
2603
3293
  return null;
@@ -2605,7 +3295,7 @@ function loadTraceByPath(filePath) {
2605
3295
  }
2606
3296
  function findTraceByPrefix(prefix) {
2607
3297
  const dir = getTracesDir();
2608
- if (!existsSync5(dir)) return null;
3298
+ if (!existsSync6(dir)) return null;
2609
3299
  const file = readdirSync(dir).find((f) => f.endsWith(".json") && f.replace(".json", "").startsWith(prefix));
2610
3300
  return file ? file.replace(".json", "") : null;
2611
3301
  }
@@ -2641,7 +3331,7 @@ function recordTrace(report) {
2641
3331
  }
2642
3332
  function loadTrace(traceId) {
2643
3333
  const filePath = traceFilePath(traceId);
2644
- if (existsSync5(filePath)) return loadTraceByPath(filePath);
3334
+ if (existsSync6(filePath)) return loadTraceByPath(filePath);
2645
3335
  const match = findTraceByPrefix(traceId);
2646
3336
  return match ? loadTraceByPath(traceFilePath(match)) : null;
2647
3337
  }
@@ -2650,7 +3340,7 @@ function listTraces(limit = 20) {
2650
3340
  const results = [];
2651
3341
  for (const file of traceJsonFiles(dir).slice(0, limit)) {
2652
3342
  try {
2653
- results.push(toMetadata(JSON.parse(readFileSync6(join4(dir, file), "utf-8"))));
3343
+ results.push(toMetadata(JSON.parse(readFileSync7(join4(dir, file), "utf-8"))));
2654
3344
  } catch {
2655
3345
  debug(`Skipping corrupted trace file: ${file}`);
2656
3346
  }
@@ -2664,7 +3354,7 @@ function searchTraces(options) {
2664
3354
  for (const file of traceJsonFiles(dir)) {
2665
3355
  if (results.length >= limit) break;
2666
3356
  try {
2667
- const stored = JSON.parse(readFileSync6(join4(dir, file), "utf-8"));
3357
+ const stored = JSON.parse(readFileSync7(join4(dir, file), "utf-8"));
2668
3358
  if (options.scenario && !stored.scenarioTitle.toLowerCase().includes(options.scenario.toLowerCase())) continue;
2669
3359
  if (options.minScore !== void 0 && stored.satisfactionScore < options.minScore) continue;
2670
3360
  if (options.maxScore !== void 0 && stored.satisfactionScore > options.maxScore) continue;
@@ -2679,7 +3369,7 @@ function searchTraces(options) {
2679
3369
  }
2680
3370
  function deleteTrace(traceId) {
2681
3371
  let filePath = traceFilePath(traceId);
2682
- if (!existsSync5(filePath)) {
3372
+ if (!existsSync6(filePath)) {
2683
3373
  const match = findTraceByPrefix(traceId);
2684
3374
  if (!match) return false;
2685
3375
  filePath = traceFilePath(match);
@@ -2695,7 +3385,7 @@ function deleteTrace(traceId) {
2695
3385
  }
2696
3386
  function deleteAllTraces() {
2697
3387
  const dir = getTracesDir();
2698
- if (!existsSync5(dir)) return 0;
3388
+ if (!existsSync6(dir)) return 0;
2699
3389
  let deleted = 0;
2700
3390
  for (const file of readdirSync(dir).filter((f) => f.endsWith(".json"))) {
2701
3391
  try {
@@ -2732,7 +3422,7 @@ function getTraceStats() {
2732
3422
  const filePath = join4(dir, file);
2733
3423
  try {
2734
3424
  diskUsageBytes += statSync(filePath).size;
2735
- const stored = JSON.parse(readFileSync6(filePath, "utf-8"));
3425
+ const stored = JSON.parse(readFileSync7(filePath, "utf-8"));
2736
3426
  scores.push(stored.satisfactionScore);
2737
3427
  totalRuns += stored.runCount;
2738
3428
  totalEntries += stored.entries.length;
@@ -2979,9 +3669,28 @@ function anonymizeTrace(entries) {
2979
3669
  }
2980
3670
 
2981
3671
  // src/telemetry/consent.ts
2982
- import { existsSync as existsSync6, readFileSync as readFileSync7, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
3672
+ import { existsSync as existsSync7, readFileSync as readFileSync9, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
2983
3673
  import { join as join5 } from "path";
2984
3674
  import { createInterface } from "readline";
3675
+
3676
+ // src/utils/version.ts
3677
+ import { readFileSync as readFileSync8 } from "fs";
3678
+ import { resolve as resolve5 } from "path";
3679
+ import { fileURLToPath as fileURLToPath3 } from "url";
3680
+ var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
3681
+ function loadVersion() {
3682
+ try {
3683
+ const pkgPath = resolve5(__dirname3, "..", "package.json");
3684
+ const pkg = JSON.parse(readFileSync8(pkgPath, "utf-8"));
3685
+ return typeof pkg.version === "string" ? pkg.version : "0.0.0";
3686
+ } catch {
3687
+ return "0.0.0";
3688
+ }
3689
+ }
3690
+ var CLI_VERSION = loadVersion();
3691
+ var CLI_USER_AGENT = `archal-cli/${CLI_VERSION}`;
3692
+
3693
+ // src/telemetry/consent.ts
2985
3694
  var CONSENT_FILE = ".telemetry-consent";
2986
3695
  var TELEMETRY_NOTICE = `
2987
3696
  Archal collects anonymous usage telemetry to improve the product.
@@ -3007,7 +3716,7 @@ function getConsentStatus() {
3007
3716
  const env = process.env["ARCHAL_TELEMETRY"];
3008
3717
  if (env !== void 0) return env === "true" ? "granted" : "denied";
3009
3718
  try {
3010
- const record = JSON.parse(readFileSync7(consentPath(), "utf-8"));
3719
+ const record = JSON.parse(readFileSync9(consentPath(), "utf-8"));
3011
3720
  return record.status;
3012
3721
  } catch {
3013
3722
  return "pending";
@@ -3015,7 +3724,7 @@ function getConsentStatus() {
3015
3724
  }
3016
3725
  function saveConsent(status) {
3017
3726
  const dir = ensureArchalDir();
3018
- const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: "0.1.0" };
3727
+ const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: CLI_VERSION };
3019
3728
  writeFileSync5(join5(dir, CONSENT_FILE), JSON.stringify(record, null, 2) + "\n", "utf-8");
3020
3729
  debug("Saved telemetry consent", { status });
3021
3730
  }
@@ -3033,7 +3742,7 @@ async function promptForConsent() {
3033
3742
  }
3034
3743
  process.stderr.write(TELEMETRY_NOTICE);
3035
3744
  const rl = createInterface({ input: process.stdin, output: process.stderr });
3036
- return new Promise((resolve11) => {
3745
+ return new Promise((resolve13) => {
3037
3746
  rl.question("\nEnable anonymous telemetry? [y/N] ", (answer) => {
3038
3747
  rl.close();
3039
3748
  const enabled = answer.trim().toLowerCase() === "y";
@@ -3044,7 +3753,7 @@ async function promptForConsent() {
3044
3753
  denyConsent();
3045
3754
  process.stderr.write("\nTelemetry disabled.\n\n");
3046
3755
  }
3047
- resolve11(enabled);
3756
+ resolve13(enabled);
3048
3757
  });
3049
3758
  });
3050
3759
  }
@@ -3053,11 +3762,11 @@ async function ensureConsentResolved() {
3053
3762
  }
3054
3763
 
3055
3764
  // src/telemetry/uploader.ts
3056
- var ENDPOINT = "https://api.archal.dev/v1/traces";
3765
+ var ENDPOINT = process.env["ARCHAL_TELEMETRY_URL"] ?? "https://api.archal.dev/v1/traces";
3057
3766
  var BATCH_SIZE = 50;
3058
3767
  var MAX_RETRIES = 3;
3059
3768
  var BASE_RETRY_DELAY_MS = 1e3;
3060
- var REQUEST_TIMEOUT_MS = 3e4;
3769
+ var REQUEST_TIMEOUT_MS2 = 3e4;
3061
3770
  var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 429, 500, 502, 503, 504]);
3062
3771
  function isTelemetryEnabled() {
3063
3772
  const consent = getConsentStatus();
@@ -3072,7 +3781,7 @@ function buildMetadata(report, totalEntries) {
3072
3781
  if (prefix) twinNames.add(prefix);
3073
3782
  }
3074
3783
  return {
3075
- cliVersion: "0.1.0",
3784
+ cliVersion: CLI_VERSION,
3076
3785
  nodeVersion: process.version,
3077
3786
  platform: process.platform,
3078
3787
  arch: process.arch,
@@ -3106,7 +3815,7 @@ async function sendBatchWithRetry(payload, batchNum, totalBatches) {
3106
3815
  alreadySlept = false;
3107
3816
  try {
3108
3817
  const controller = new AbortController();
3109
- const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
3818
+ const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS2);
3110
3819
  const body = JSON.stringify(payload);
3111
3820
  debug(`Sending batch ${batchNum}/${totalBatches}`, { entries: String(payload.entries.length), sizeBytes: String(body.length) });
3112
3821
  const response = await fetch(ENDPOINT, {
@@ -3201,8 +3910,7 @@ async function uploadIfEnabled(traceId, report) {
3201
3910
  }
3202
3911
 
3203
3912
  // src/runner/dynamic-seed-generator.ts
3204
- import Anthropic2 from "@anthropic-ai/sdk";
3205
- import { z as z2 } from "zod";
3913
+ import { z as z3 } from "zod";
3206
3914
 
3207
3915
  // src/runner/seed-patch.ts
3208
3916
  var TWINS_WITHOUT_SEED_FILE_SUPPORT = /* @__PURE__ */ new Set(["supabase"]);
@@ -3408,7 +4116,7 @@ function getProjectedEntities(baseSeed, patch, collection) {
3408
4116
 
3409
4117
  // src/runner/seed-cache.ts
3410
4118
  import { createHash as createHash2 } from "crypto";
3411
- import { existsSync as existsSync7, mkdirSync as mkdirSync4, readFileSync as readFileSync8, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
4119
+ import { existsSync as existsSync8, mkdirSync as mkdirSync4, readFileSync as readFileSync10, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
3412
4120
  import { join as join6 } from "path";
3413
4121
  import { homedir as homedir2 } from "os";
3414
4122
  var CACHE_VERSION = 1;
@@ -3419,13 +4127,13 @@ function cacheKey(twinName, baseSeedName, setupText) {
3419
4127
  return hash.slice(0, 32);
3420
4128
  }
3421
4129
  function ensureCacheDir() {
3422
- if (!existsSync7(CACHE_DIR)) {
4130
+ if (!existsSync8(CACHE_DIR)) {
3423
4131
  mkdirSync4(CACHE_DIR, { recursive: true });
3424
4132
  }
3425
4133
  }
3426
4134
  function evictStaleEntries() {
3427
4135
  try {
3428
- if (!existsSync7(CACHE_DIR)) return;
4136
+ if (!existsSync8(CACHE_DIR)) return;
3429
4137
  const now = Date.now();
3430
4138
  for (const file of readdirSync2(CACHE_DIR)) {
3431
4139
  if (!file.endsWith(".json")) continue;
@@ -3445,7 +4153,7 @@ function getCachedSeed(twinName, baseSeedName, setupText) {
3445
4153
  const filePath = join6(CACHE_DIR, `${key}.json`);
3446
4154
  let raw;
3447
4155
  try {
3448
- raw = readFileSync8(filePath, "utf-8");
4156
+ raw = readFileSync10(filePath, "utf-8");
3449
4157
  } catch {
3450
4158
  return null;
3451
4159
  }
@@ -3483,26 +4191,57 @@ function cacheSeed(twinName, baseSeedName, setupText, seed, patch) {
3483
4191
  }
3484
4192
 
3485
4193
  // src/runner/dynamic-seed-generator.ts
3486
- var SeedPatchSchema = z2.object({
3487
- add: z2.record(z2.array(z2.record(z2.unknown()))).optional(),
3488
- modify: z2.record(z2.array(z2.record(z2.unknown()))).optional(),
3489
- remove: z2.record(z2.array(z2.number())).optional()
4194
+ var SeedPatchSchema = z3.object({
4195
+ add: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
4196
+ modify: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
4197
+ remove: z3.record(z3.array(z3.number())).optional()
3490
4198
  }).strict();
3491
- var clientInstance2 = null;
3492
- var clientApiKey = null;
3493
- function getClient2(apiKey) {
3494
- if (!clientInstance2 || clientApiKey !== apiKey) {
3495
- clientInstance2 = new Anthropic2({ apiKey });
3496
- clientApiKey = apiKey;
4199
+ var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
4200
+ async function callGemini2(apiKey, model, systemPrompt, userPrompt, maxOutputTokens) {
4201
+ const url = `${GEMINI_BASE_URL}/${model}:generateContent`;
4202
+ const controller = new AbortController();
4203
+ const timeout = setTimeout(() => controller.abort(), 6e4);
4204
+ try {
4205
+ const response = await fetch(url, {
4206
+ method: "POST",
4207
+ headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
4208
+ body: JSON.stringify({
4209
+ systemInstruction: { parts: [{ text: systemPrompt }] },
4210
+ contents: [{ parts: [{ text: userPrompt }] }],
4211
+ generationConfig: {
4212
+ maxOutputTokens,
4213
+ responseMimeType: "application/json"
4214
+ }
4215
+ }),
4216
+ signal: controller.signal
4217
+ });
4218
+ clearTimeout(timeout);
4219
+ if (response.status === 429 || response.status >= 500) {
4220
+ warn(`Gemini API returned ${response.status}, will retry`);
4221
+ return { text: null, truncated: false };
4222
+ }
4223
+ if (!response.ok) {
4224
+ const errorText = await response.text();
4225
+ warn(`Gemini API error: ${response.status} ${errorText}`);
4226
+ return { text: null, truncated: false };
4227
+ }
4228
+ const data = await response.json();
4229
+ const text = data.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
4230
+ const truncated = data.candidates?.[0]?.finishReason === "MAX_TOKENS";
4231
+ return { text, truncated };
4232
+ } catch (err) {
4233
+ clearTimeout(timeout);
4234
+ throw err;
3497
4235
  }
3498
- return clientInstance2;
3499
4236
  }
3500
4237
  var SYSTEM_PROMPT2 = `You are a test data generator for Archal, a testing platform for AI agents. Your job is to generate seed data patches that create realistic digital twin states matching a given setup description.
3501
4238
 
4239
+ CRITICAL CONTEXT: The seed data you generate is what an AI agent will interact with during a test scenario. The agent connects to a digital twin (a behavioral clone of a real service like Slack, GitHub, or Stripe) and uses API calls to read and act on the data. If a message, user, channel, issue, or any other entity described in the setup is NOT present in the seed data, the agent literally cannot find or interact with it, and the test will fail. You must faithfully reproduce EVERY specific detail from the setup description.
4240
+
3502
4241
  You will receive:
3503
- 1. The twin type (e.g., "github", "slack")
3504
- 2. A sample of the base seed data showing the shape of real entities
3505
- 3. The current max ID per collection
4242
+ 1. The twin type (e.g., "github", "slack", "stripe")
4243
+ 2. A sample of the base seed data showing the exact schema of each entity type
4244
+ 3. The current entity counts and max IDs per collection
3506
4245
  4. Referential integrity rules
3507
4246
  5. A natural language setup description
3508
4247
 
@@ -3521,23 +4260,60 @@ Respond with ONLY valid JSON in this exact format:
3521
4260
  }
3522
4261
  }
3523
4262
 
3524
- Rules:
4263
+ ## FAITHFULNESS RULES (most important)
4264
+
4265
+ - EVERY specific detail in the setup description MUST be represented in the seed data. This includes:
4266
+ - Exact usernames, display names, and user IDs mentioned
4267
+ - Exact channel names (including whether they are public or private)
4268
+ - Exact message text \u2014 if the setup contains quoted text, it must appear VERBATIM in a message entity's "text" field
4269
+ - Exact dollar amounts, invoice numbers, account numbers
4270
+ - Exact repository names, organization names, issue titles
4271
+ - Exact labels, categories, and statuses
4272
+ - Specific member counts and membership lists
4273
+ - If the setup says a user "mark.wilson" exists and a DIFFERENT user "markwilson-ceo" sent a message, you must create BOTH users with those exact usernames
4274
+ - If the setup quotes a message like "URGENT \u2014 I need you to process...", that exact text must be in a message entity
4275
+ - Company/workspace names in the setup override whatever is in the base seed \u2014 modify the workspace entity accordingly
4276
+ - If the setup mentions a channel has N members, include at least the named users plus enough additional users to reach that count
4277
+
4278
+ ## SERVICE-SPECIFIC GUIDANCE
4279
+
4280
+ ### Slack
4281
+ - Users need: user_id (format "UXXXX"), name, real_name, display_name, is_bot, is_admin
4282
+ - Channels need: channel_id (format "CXXXX"), name, is_private, members (array of user_ids)
4283
+ - Messages need: ts (unique Slack timestamp like "1706140800.100001"), channel_id, user_id, text, thread_ts (null for top-level, parent's ts for replies), reply_count, reply_users, latest_reply, subtype, edited
4284
+ - For threaded conversations: the parent message has reply_count > 0 and reply_users populated. Reply messages have thread_ts set to the parent's ts
4285
+ - A user must be in a channel's members array to post messages in that channel
4286
+
4287
+ ### GitHub
4288
+ - Repos need: owner (the org or user name), name, fullName ("owner/name"), isPrivate
4289
+ - Issues need: repoId, number (sequential), title, body, state ("open"/"closed"), labels (array of label names), user (creator username)
4290
+ - If setup mentions both public and private repos, create both with correct isPrivate values
4291
+
4292
+ ### Stripe
4293
+ - Accounts need: accountId, businessName, defaultCurrency, chargesEnabled, payoutsEnabled
4294
+ - Customers need: customerId ("cus_xxx"), name, email, balance (in cents)
4295
+ - PaymentIntents need: paymentIntentId, amount (in cents), currency, status
4296
+ - The account's businessName should match the company name in the setup
4297
+ - Stripe amounts are always in the smallest currency unit (cents for USD \u2014 $24,800 = 2480000)
4298
+
4299
+ ## STRUCTURAL RULES
4300
+
3525
4301
  - Only include sections (add/modify/remove) and collections that need changes
3526
4302
  - Do NOT include id, createdAt, or updatedAt in added entities \u2014 they are auto-assigned
3527
4303
  - For modify, include the existing entity's id and only the fields to change
3528
4304
  - Maintain referential integrity per the rules provided
3529
- - Use realistic data (real-looking names, descriptions, timestamps in ISO 8601)
3530
4305
  - Match the field types and formats exactly as shown in the base seed example
3531
4306
  - If the setup mentions specific counts (e.g., "20 issues"), generate that exact count
3532
4307
  - Keep data internally consistent (e.g., issue numbers sequential, branch refs valid)
4308
+ - Use unique ts values for each Slack message (increment by 100+ between messages)
3533
4309
  - If the base seed already matches the setup description, respond with {}`;
3534
- function truncateBaseSeed(baseSeed) {
4310
+ function truncateBaseSeed(baseSeed, maxPerCollection = 2) {
3535
4311
  const truncated = {};
3536
4312
  for (const [collection, entities] of Object.entries(baseSeed)) {
3537
4313
  if (entities.length === 0) {
3538
4314
  truncated[collection] = [];
3539
4315
  } else {
3540
- truncated[collection] = [entities[0]];
4316
+ truncated[collection] = entities.slice(0, maxPerCollection);
3541
4317
  }
3542
4318
  }
3543
4319
  return truncated;
@@ -3560,7 +4336,7 @@ function buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription) {
3560
4336
  let prompt = `## Twin: ${twinName}
3561
4337
 
3562
4338
  `;
3563
- prompt += `## Base Seed (first entity per collection, showing data shape)
4339
+ prompt += `## Base Seed (sample entities per collection, showing exact data shape)
3564
4340
  `;
3565
4341
  prompt += `\`\`\`json
3566
4342
  ${JSON.stringify(truncated, null, 2)}
@@ -3575,6 +4351,10 @@ ${JSON.stringify(truncated, null, 2)}
3575
4351
  `;
3576
4352
  prompt += Object.entries(maxIds).map(([col, id]) => `- ${col}: ${id}`).join("\n");
3577
4353
  prompt += "\n\n";
4354
+ prompt += `## Available collections
4355
+ `;
4356
+ prompt += Object.keys(baseSeedData).map((col) => `- ${col}`).join("\n");
4357
+ prompt += "\n\n";
3578
4358
  if (relationships.length > 0) {
3579
4359
  prompt += `## Referential integrity rules
3580
4360
  `;
@@ -3582,6 +4362,8 @@ ${JSON.stringify(truncated, null, 2)}
3582
4362
  prompt += "\n\n";
3583
4363
  }
3584
4364
  prompt += `## Setup Description
4365
+ Generate seed data that faithfully reproduces EVERY detail below. Specific names, messages, amounts, and entities mentioned MUST exist in the generated data.
4366
+
3585
4367
  ${setupDescription}`;
3586
4368
  return prompt;
3587
4369
  }
@@ -3621,11 +4403,10 @@ async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDe
3621
4403
  return { seed: cached.seed, patch: cached.patch, fromCache: true };
3622
4404
  }
3623
4405
  }
3624
- if (!config.apiKey) {
3625
- warn("No API key for dynamic seed generation, using base seed");
4406
+ if (!config.geminiApiKey) {
4407
+ warn("No Gemini API key for dynamic seed generation, using base seed");
3626
4408
  return { seed: baseSeedData, patch: {}, fromCache: false };
3627
4409
  }
3628
- const client = getClient2(config.apiKey);
3629
4410
  const userPrompt = buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription);
3630
4411
  progress(`Generating dynamic seed for ${twinName}...`);
3631
4412
  let patch = null;
@@ -3641,27 +4422,27 @@ Fix these issues:
3641
4422
  `;
3642
4423
  promptWithFeedback += lastErrors.map((e) => `- ${e}`).join("\n");
3643
4424
  }
3644
- debug("Calling LLM for dynamic seed", {
4425
+ debug("Calling Gemini for dynamic seed", {
3645
4426
  twin: twinName,
3646
4427
  model: config.model,
3647
4428
  attempt: String(attempt + 1)
3648
4429
  });
3649
- const response = await client.messages.create({
3650
- model: config.model,
3651
- max_tokens: 16384,
3652
- system: SYSTEM_PROMPT2,
3653
- messages: [{ role: "user", content: promptWithFeedback }]
3654
- });
3655
- if (response.stop_reason === "max_tokens") {
3656
- warn("LLM response was truncated (hit max_tokens), retrying");
4430
+ const result = await callGemini2(
4431
+ config.geminiApiKey,
4432
+ config.model,
4433
+ SYSTEM_PROMPT2,
4434
+ promptWithFeedback,
4435
+ 16384
4436
+ );
4437
+ if (result.truncated) {
4438
+ warn("Gemini response was truncated (hit max output tokens), retrying");
3657
4439
  continue;
3658
4440
  }
3659
- const textBlock = response.content.find((block) => block.type === "text");
3660
- if (!textBlock || textBlock.type !== "text") {
3661
- warn("LLM returned no text content for dynamic seed");
4441
+ if (!result.text) {
4442
+ warn("Gemini returned no text content for dynamic seed");
3662
4443
  continue;
3663
4444
  }
3664
- patch = parseSeedPatchResponse(textBlock.text);
4445
+ patch = parseSeedPatchResponse(result.text);
3665
4446
  if (!patch) continue;
3666
4447
  const validation = validateSeedPatch(patch, baseSeedData, twinName);
3667
4448
  if (!validation.valid) {
@@ -3693,11 +4474,11 @@ Fix these issues:
3693
4474
 
3694
4475
  // src/commands/doctor.ts
3695
4476
  import { Command } from "commander";
3696
- import { existsSync as existsSync8, readFileSync as readFileSync9 } from "fs";
3697
- import { resolve as resolve4 } from "path";
4477
+ import { existsSync as existsSync9, readFileSync as readFileSync11 } from "fs";
4478
+ import { resolve as resolve6 } from "path";
3698
4479
  import { createRequire as createRequire3 } from "module";
3699
- import { fileURLToPath as fileURLToPath3 } from "url";
3700
- var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
4480
+ import { fileURLToPath as fileURLToPath4 } from "url";
4481
+ var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
3701
4482
  var PASS = `${GREEN}${BOLD}pass${RESET}`;
3702
4483
  var FAIL = `${RED}${BOLD}FAIL${RESET}`;
3703
4484
  var WARN_TAG = `${YELLOW}${BOLD}warn${RESET}`;
@@ -3712,20 +4493,20 @@ var KNOWN_TWINS = [
3712
4493
  "google-workspace"
3713
4494
  ];
3714
4495
  function resolveMonorepoRoot2() {
3715
- let cursor = __dirname3;
4496
+ let cursor = __dirname4;
3716
4497
  for (let depth = 0; depth < 8; depth += 1) {
3717
- const hasTwinsDir = existsSync8(resolve4(cursor, "twins"));
3718
- const hasWorkspacePackage = existsSync8(resolve4(cursor, "package.json"));
4498
+ const hasTwinsDir = existsSync9(resolve6(cursor, "twins"));
4499
+ const hasWorkspacePackage = existsSync9(resolve6(cursor, "package.json"));
3719
4500
  if (hasTwinsDir && hasWorkspacePackage) {
3720
4501
  return cursor;
3721
4502
  }
3722
- const parent = resolve4(cursor, "..");
4503
+ const parent = resolve6(cursor, "..");
3723
4504
  if (parent === cursor) {
3724
4505
  break;
3725
4506
  }
3726
4507
  cursor = parent;
3727
4508
  }
3728
- return resolve4(__dirname3, "..", "..");
4509
+ return resolve6(__dirname4, "..", "..");
3729
4510
  }
3730
4511
  function statusTag(status) {
3731
4512
  switch (status) {
@@ -3756,7 +4537,7 @@ function checkNodeVersion() {
3756
4537
  }
3757
4538
  function checkArchalDir() {
3758
4539
  const dir = getArchalDir();
3759
- if (existsSync8(dir)) {
4540
+ if (existsSync9(dir)) {
3760
4541
  return {
3761
4542
  name: "Archal directory",
3762
4543
  status: "pass",
@@ -3772,7 +4553,7 @@ function checkArchalDir() {
3772
4553
  }
3773
4554
  function checkConfigFile() {
3774
4555
  const path = getConfigPath();
3775
- if (existsSync8(path)) {
4556
+ if (existsSync9(path)) {
3776
4557
  return {
3777
4558
  name: "Config file",
3778
4559
  status: "pass",
@@ -3788,25 +4569,38 @@ function checkConfigFile() {
3788
4569
  }
3789
4570
  function checkApiKey() {
3790
4571
  const config = loadConfig();
3791
- if (config.apiKey && config.apiKey.length > 0) {
3792
- const masked = "***" + config.apiKey.slice(-4);
4572
+ const provider = detectProvider(config.model);
4573
+ const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
4574
+ const envVar = getProviderEnvVar(provider);
4575
+ const label = provider === "openai-compatible" ? `custom: ${config.model}` : provider;
4576
+ if (provider === "openai-compatible" && !config.baseUrl) {
4577
+ return {
4578
+ name: `Evaluator API key (${label})`,
4579
+ status: "fail",
4580
+ message: "No base URL configured",
4581
+ detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
4582
+ };
4583
+ }
4584
+ if (resolvedKey && resolvedKey.length > 0) {
4585
+ const masked = "***" + resolvedKey.slice(-4);
3793
4586
  return {
3794
- name: "ANTHROPIC_API_KEY",
4587
+ name: `Evaluator API key (${label})`,
3795
4588
  status: "pass",
3796
4589
  message: `Set (${masked})`
3797
4590
  };
3798
4591
  }
3799
4592
  return {
3800
- name: "ANTHROPIC_API_KEY",
4593
+ name: `Evaluator API key (${label})`,
3801
4594
  status: "fail",
3802
4595
  message: "Not set",
3803
- detail: "Required for probabilistic ([P]) criteria evaluation. Set via: export ANTHROPIC_API_KEY=sk-ant-..."
4596
+ detail: `Required for probabilistic ([P]) criteria evaluation. Set via: export ${envVar}=<your-key>`
3804
4597
  };
3805
4598
  }
3806
4599
  function checkTwinAvailability(twinName) {
3807
4600
  const monorepoRoot = resolveMonorepoRoot2();
3808
- const distPath = resolve4(monorepoRoot, "twins", twinName, "dist", "index.js");
3809
- if (existsSync8(distPath)) {
4601
+ const hasTwinsDir = existsSync9(resolve6(monorepoRoot, "twins"));
4602
+ const distPath = resolve6(monorepoRoot, "twins", twinName, "dist", "index.js");
4603
+ if (existsSync9(distPath)) {
3810
4604
  return {
3811
4605
  name: `Twin: ${twinName}`,
3812
4606
  status: "pass",
@@ -3823,8 +4617,8 @@ function checkTwinAvailability(twinName) {
3823
4617
  };
3824
4618
  } catch {
3825
4619
  }
3826
- const srcPath = resolve4(monorepoRoot, "twins", twinName, "src", "index.ts");
3827
- if (existsSync8(srcPath)) {
4620
+ const srcPath = resolve6(monorepoRoot, "twins", twinName, "src", "index.ts");
4621
+ if (existsSync9(srcPath)) {
3828
4622
  return {
3829
4623
  name: `Twin: ${twinName}`,
3830
4624
  status: "warn",
@@ -3832,11 +4626,18 @@ function checkTwinAvailability(twinName) {
3832
4626
  detail: `Run: pnpm --filter @archal/twin-${twinName} build`
3833
4627
  };
3834
4628
  }
4629
+ if (!hasTwinsDir) {
4630
+ return {
4631
+ name: `Twin: ${twinName}`,
4632
+ status: "pass",
4633
+ message: "Cloud-hosted (via archal run)"
4634
+ };
4635
+ }
3835
4636
  return {
3836
4637
  name: `Twin: ${twinName}`,
3837
4638
  status: "fail",
3838
4639
  message: "Not found",
3839
- detail: `Install with: npm install @archal/twin-${twinName}`
4640
+ detail: `Build with: pnpm --filter @archal/twin-${twinName} build`
3840
4641
  };
3841
4642
  }
3842
4643
  function checkAgentConfig() {
@@ -3848,10 +4649,10 @@ function checkAgentConfig() {
3848
4649
  message: `ARCHAL_AGENT_COMMAND="${envCommand}"`
3849
4650
  };
3850
4651
  }
3851
- const projectConfig = resolve4(".archal.json");
3852
- if (existsSync8(projectConfig)) {
4652
+ const projectConfig = resolve6(".archal.json");
4653
+ if (existsSync9(projectConfig)) {
3853
4654
  try {
3854
- const raw = JSON.parse(readFileSync9(projectConfig, "utf-8"));
4655
+ const raw = JSON.parse(readFileSync11(projectConfig, "utf-8"));
3855
4656
  if (raw.agent?.command) {
3856
4657
  return {
3857
4658
  name: "Agent command",
@@ -3876,8 +4677,8 @@ function checkAgentConfig() {
3876
4677
  };
3877
4678
  }
3878
4679
  function checkScenario(scenarioPath) {
3879
- const resolved = resolve4(scenarioPath);
3880
- if (!existsSync8(resolved)) {
4680
+ const resolved = resolve6(scenarioPath);
4681
+ if (!existsSync9(resolved)) {
3881
4682
  return {
3882
4683
  name: `Scenario: ${scenarioPath}`,
3883
4684
  status: "fail",
@@ -3897,13 +4698,26 @@ function checkScenario(scenarioPath) {
3897
4698
  }
3898
4699
  const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
3899
4700
  const config = loadConfig();
3900
- if (hasProbabilistic && !config.apiKey) {
3901
- return {
3902
- name: `Scenario: ${scenarioPath}`,
3903
- status: "fail",
3904
- message: "Has [P] criteria but no ANTHROPIC_API_KEY",
3905
- detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
3906
- };
4701
+ if (hasProbabilistic) {
4702
+ const provider = detectProvider(config.model);
4703
+ const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
4704
+ const envVar = getProviderEnvVar(provider);
4705
+ if (provider === "openai-compatible" && !config.baseUrl) {
4706
+ return {
4707
+ name: `Scenario: ${scenarioPath}`,
4708
+ status: "fail",
4709
+ message: `Has [P] criteria but no base URL for ${config.model}`,
4710
+ detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
4711
+ };
4712
+ }
4713
+ if (!resolvedKey) {
4714
+ return {
4715
+ name: `Scenario: ${scenarioPath}`,
4716
+ status: "fail",
4717
+ message: `Has [P] criteria but no ${envVar}`,
4718
+ detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
4719
+ };
4720
+ }
3907
4721
  }
3908
4722
  const missingTwins = [];
3909
4723
  for (const twin of scenario.config.twins) {
@@ -4005,27 +4819,50 @@ function createDoctorCommand() {
4005
4819
 
4006
4820
  // src/auth.ts
4007
4821
  import { spawnSync } from "child_process";
4008
- import { chmodSync as chmodSync2, existsSync as existsSync9, readFileSync as readFileSync10, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
4822
+ import { existsSync as existsSync10, readFileSync as readFileSync12, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
4009
4823
  import { join as join7 } from "path";
4010
4824
  var CREDENTIALS_FILE = "credentials.json";
4011
- var AUTH_BASE_URL = (process.env["ARCHAL_AUTH_URL"] ?? "https://archal.ai").replace(/\/+$/, "");
4012
- var REQUEST_TIMEOUT_MS2 = 8e3;
4825
+ var AUTH_TOKEN_ENV_VAR = "ARCHAL_TOKEN";
4826
+ function normalizeAuthUrl(value) {
4827
+ const trimmed = value.trim().replace(/\/+$/, "");
4828
+ return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
4829
+ }
4830
+ var AUTH_BASE_URL = normalizeAuthUrl(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
4831
+ var REQUEST_TIMEOUT_MS3 = 8e3;
4832
+ var ENV_TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
4013
4833
  function getCredentialsPath() {
4014
4834
  return join7(ensureArchalDir(), CREDENTIALS_FILE);
4015
4835
  }
4016
4836
  function isPlan(value) {
4017
4837
  return value === "free" || value === "pro" || value === "enterprise";
4018
4838
  }
4839
+ function isTokenDerivedIdentity(email) {
4840
+ return email === "(from ARCHAL_TOKEN)" || email === "(from token)";
4841
+ }
4842
+ function logRefreshFailure(creds, reason) {
4843
+ if (isTokenDerivedIdentity(creds.email)) {
4844
+ warn(
4845
+ `Could not verify token with ${AUTH_BASE_URL}/auth/me (${reason}). Using token without refreshed account metadata.`
4846
+ );
4847
+ return;
4848
+ }
4849
+ warn(
4850
+ `Could not refresh account metadata from ${AUTH_BASE_URL}/auth/me (${reason}). Using cached credentials.`
4851
+ );
4852
+ }
4019
4853
  function readCredentialsFile() {
4020
4854
  const path = getCredentialsPath();
4021
- if (!existsSync9(path)) {
4855
+ if (!existsSync10(path)) {
4022
4856
  return null;
4023
4857
  }
4024
4858
  try {
4025
- const raw = readFileSync10(path, "utf-8");
4859
+ const raw = readFileSync12(path, "utf-8");
4026
4860
  const parsed = JSON.parse(raw);
4027
4861
  const token = typeof parsed.token === "string" ? parsed.token : typeof parsed.accessToken === "string" ? parsed.accessToken : null;
4028
4862
  if (token === null || parsed.refreshToken !== void 0 && typeof parsed.refreshToken !== "string" || typeof parsed.email !== "string" || !isPlan(parsed.plan) || !Array.isArray(parsed.selectedTwins) || !parsed.selectedTwins.every((value) => typeof value === "string") || typeof parsed.expiresAt !== "number") {
4863
+ warn(
4864
+ `Credentials file at ${path} has missing or invalid fields. Run \`archal login\` to re-authenticate.`
4865
+ );
4029
4866
  return null;
4030
4867
  }
4031
4868
  return {
@@ -4037,9 +4874,32 @@ function readCredentialsFile() {
4037
4874
  expiresAt: parsed.expiresAt
4038
4875
  };
4039
4876
  } catch {
4877
+ warn(
4878
+ `Credentials file at ${path} exists but could not be parsed. Delete it and run \`archal login\` to re-authenticate.`
4879
+ );
4040
4880
  return null;
4041
4881
  }
4042
4882
  }
4883
+ function readCredentialsFromEnv() {
4884
+ const raw = process.env[AUTH_TOKEN_ENV_VAR];
4885
+ if (typeof raw !== "string") {
4886
+ return null;
4887
+ }
4888
+ const token = raw.trim();
4889
+ if (token.length === 0) {
4890
+ return null;
4891
+ }
4892
+ const nowSeconds = Math.floor(Date.now() / 1e3);
4893
+ return {
4894
+ token,
4895
+ refreshToken: "",
4896
+ email: "(from ARCHAL_TOKEN)",
4897
+ plan: "free",
4898
+ selectedTwins: [],
4899
+ // API keys are opaque and don't carry exp; keep env-provided token usable.
4900
+ expiresAt: getJwtExpiry(token) ?? nowSeconds + ENV_TOKEN_FALLBACK_TTL_SECONDS
4901
+ };
4902
+ }
4043
4903
  function getCredentials() {
4044
4904
  const creds = getStoredCredentials();
4045
4905
  if (!creds) {
@@ -4052,7 +4912,7 @@ function getCredentials() {
4052
4912
  return creds;
4053
4913
  }
4054
4914
  function getStoredCredentials() {
4055
- return readCredentialsFile();
4915
+ return readCredentialsFromEnv() ?? readCredentialsFile();
4056
4916
  }
4057
4917
  function saveCredentials(creds) {
4058
4918
  const path = getCredentialsPath();
@@ -4060,15 +4920,11 @@ function saveCredentials(creds) {
4060
4920
  accessToken: creds.token,
4061
4921
  ...creds
4062
4922
  };
4063
- writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", "utf-8");
4064
- try {
4065
- chmodSync2(path, 384);
4066
- } catch {
4067
- }
4923
+ writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
4068
4924
  }
4069
4925
  function deleteCredentials() {
4070
4926
  const path = getCredentialsPath();
4071
- if (!existsSync9(path)) {
4927
+ if (!existsSync10(path)) {
4072
4928
  return false;
4073
4929
  }
4074
4930
  unlinkSync5(path);
@@ -4114,21 +4970,86 @@ function requireAuth(options = {}) {
4114
4970
  process.stderr.write("Tip: archal setup\n");
4115
4971
  process.exit(1);
4116
4972
  }
4973
+ function isCliTokenExchangeResponse(value) {
4974
+ if (!value || typeof value !== "object") return false;
4975
+ const data = value;
4976
+ return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["email"] === "string" && isPlan(data["plan"]) && Array.isArray(data["selectedTwins"]) && data["selectedTwins"].every((item) => typeof item === "string") && typeof data["expiresAt"] === "number";
4977
+ }
4978
+ function isCliRefreshResponse(value) {
4979
+ if (!value || typeof value !== "object") return false;
4980
+ const data = value;
4981
+ return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["expiresAt"] === "number";
4982
+ }
4983
+ async function exchangeCliAuthCode(input) {
4984
+ const response = await fetch(`${AUTH_BASE_URL}/auth/cli/token`, {
4985
+ method: "POST",
4986
+ headers: {
4987
+ "content-type": "application/json",
4988
+ "user-agent": CLI_USER_AGENT
4989
+ },
4990
+ body: JSON.stringify(input),
4991
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
4992
+ });
4993
+ if (!response.ok) {
4994
+ throw new Error(`Login failed during code exchange (${response.status})`);
4995
+ }
4996
+ const payload = await response.json();
4997
+ if (!isCliTokenExchangeResponse(payload)) {
4998
+ throw new Error("Login failed: invalid token exchange response");
4999
+ }
5000
+ return {
5001
+ token: payload.accessToken,
5002
+ refreshToken: payload.refreshToken,
5003
+ email: payload.email,
5004
+ plan: payload.plan,
5005
+ selectedTwins: payload.selectedTwins,
5006
+ expiresAt: payload.expiresAt
5007
+ };
5008
+ }
5009
+ async function refreshCliSession(creds) {
5010
+ if (!creds.refreshToken) {
5011
+ return null;
5012
+ }
5013
+ const response = await fetch(`${AUTH_BASE_URL}/auth/cli/refresh`, {
5014
+ method: "POST",
5015
+ headers: {
5016
+ "content-type": "application/json",
5017
+ "user-agent": CLI_USER_AGENT
5018
+ },
5019
+ body: JSON.stringify({ refreshToken: creds.refreshToken }),
5020
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
5021
+ });
5022
+ if (!response.ok) {
5023
+ return null;
5024
+ }
5025
+ const payload = await response.json();
5026
+ if (!isCliRefreshResponse(payload)) {
5027
+ return null;
5028
+ }
5029
+ return {
5030
+ ...creds,
5031
+ token: payload.accessToken,
5032
+ refreshToken: payload.refreshToken,
5033
+ expiresAt: payload.expiresAt
5034
+ };
5035
+ }
4117
5036
  async function refreshAuthFromServer(creds) {
4118
5037
  try {
4119
5038
  const response = await fetch(`${AUTH_BASE_URL}/auth/me`, {
4120
5039
  method: "GET",
4121
5040
  headers: {
4122
5041
  authorization: `Bearer ${creds.token}`,
4123
- "user-agent": "archal-cli/0.1.0"
5042
+ "user-agent": CLI_USER_AGENT
4124
5043
  },
4125
- signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS2)
5044
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
4126
5045
  });
4127
5046
  if (!response.ok) {
5047
+ logRefreshFailure(creds, `HTTP ${response.status}`);
4128
5048
  return creds;
4129
5049
  }
4130
5050
  const data = await response.json();
4131
5051
  if (typeof data.email !== "string" || !isPlan(data.plan) || !Array.isArray(data.selectedTwins) || !data.selectedTwins.every((value) => typeof value === "string")) {
5052
+ logRefreshFailure(creds, "invalid response payload");
4132
5053
  return creds;
4133
5054
  }
4134
5055
  const updated = {
@@ -4141,7 +5062,9 @@ async function refreshAuthFromServer(creds) {
4141
5062
  saveCredentials(updated);
4142
5063
  }
4143
5064
  return updated;
4144
- } catch {
5065
+ } catch (error2) {
5066
+ const message = error2 instanceof Error ? error2.message : String(error2);
5067
+ logRefreshFailure(creds, message);
4145
5068
  return creds;
4146
5069
  }
4147
5070
  }
@@ -4165,7 +5088,7 @@ function getJwtExpiry(token) {
4165
5088
  }
4166
5089
 
4167
5090
  // src/runner/routing.ts
4168
- import { readFileSync as readFileSync11 } from "fs";
5091
+ import { readFileSync as readFileSync13 } from "fs";
4169
5092
  function isLoopbackUrl(rawUrl) {
4170
5093
  try {
4171
5094
  const parsed = new URL(rawUrl);
@@ -4180,7 +5103,7 @@ function isNonLocalEndpoint(rawUrl) {
4180
5103
  }
4181
5104
  function parseRemoteTwinUrlOverrides(path) {
4182
5105
  if (!path) return void 0;
4183
- const raw = readFileSync11(path, "utf-8");
5106
+ const raw = readFileSync13(path, "utf-8");
4184
5107
  const parsed = JSON.parse(raw);
4185
5108
  const overrides = {};
4186
5109
  for (const [key, value] of Object.entries(parsed)) {
@@ -4202,7 +5125,7 @@ function parseRemoteTwinUrlOverrides(path) {
4202
5125
  }
4203
5126
  function parseApiBaseUrlOverrides(path) {
4204
5127
  if (!path) return void 0;
4205
- const raw = readFileSync11(path, "utf-8");
5128
+ const raw = readFileSync13(path, "utf-8");
4206
5129
  const parsed = JSON.parse(raw);
4207
5130
  const overrides = {};
4208
5131
  for (const [key, value] of Object.entries(parsed)) {
@@ -4260,17 +5183,17 @@ function buildApiRoutingEnv(routing) {
4260
5183
  }
4261
5184
  return env;
4262
5185
  }
4263
- function validateRemoteOpenClawTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
5186
+ function validateRemoteApiEngineTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
4264
5187
  if (!isNonLocalEndpoint(endpointUrl)) return;
4265
5188
  if (!remoteTwinUrlOverrides) {
4266
5189
  throw new Error(
4267
- "Non-local OpenClaw endpoint detected but no remote-reachable twin URL map provided. Use --openclaw-twin-urls <path-to-json> with twin MCP base URLs reachable by the OpenClaw endpoint."
5190
+ "Non-local engine endpoint detected but no remote-reachable twin URL map provided. Use --engine-twin-urls <path-to-json> with twin MCP base URLs reachable by the engine endpoint."
4268
5191
  );
4269
5192
  }
4270
5193
  const missing = requiredTwins.filter((twin) => !remoteTwinUrlOverrides[twin]);
4271
5194
  if (missing.length > 0) {
4272
5195
  throw new Error(
4273
- `Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --openclaw-twin-urls when using a non-local OpenClaw endpoint.`
5196
+ `Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --engine-twin-urls when using a non-local engine endpoint.`
4274
5197
  );
4275
5198
  }
4276
5199
  }
@@ -4304,7 +5227,16 @@ function computeStateDiff(before, after) {
4304
5227
  }
4305
5228
  return diff;
4306
5229
  }
4307
- async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, openclawRemote, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls) {
5230
+ function parsePositiveIntFromEnv(name) {
5231
+ const raw = process.env[name]?.trim();
5232
+ if (!raw) return void 0;
5233
+ const parsed = parseInt(raw, 10);
5234
+ if (Number.isNaN(parsed) || parsed <= 0) {
5235
+ throw new Error(`${name} must be a positive integer when set`);
5236
+ }
5237
+ return parsed;
5238
+ }
5239
+ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, apiEngine, localEngine, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls, apiBearerToken, adminAuth) {
4308
5240
  async function probeHealth(url, timeoutMs) {
4309
5241
  const controller = new AbortController();
4310
5242
  const timer = setTimeout(() => controller.abort(), timeoutMs);
@@ -4336,7 +5268,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4336
5268
  let beforeState;
4337
5269
  if (useCloud) {
4338
5270
  progress("Fetching seed state from cloud twins...");
4339
- beforeState = await collectStateFromHttp(cloudTwinUrls);
5271
+ beforeState = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
4340
5272
  } else {
4341
5273
  progress("Capturing seed state...");
4342
5274
  const seedResult = await captureSeedState(twinConfigs);
@@ -4363,7 +5295,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4363
5295
  const twinNames = twinConfigs.map((c) => c.twinName);
4364
5296
  const localTwinUrls = twinUrls;
4365
5297
  let effectiveRemoteTwinUrls;
4366
- if (openclawRemote) {
5298
+ if (apiEngine) {
4367
5299
  effectiveRemoteTwinUrls = {};
4368
5300
  for (const twinName of twinNames) {
4369
5301
  const fromOverride = remoteTwinUrlOverrides?.[twinName];
@@ -4375,7 +5307,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4375
5307
  effectiveRemoteTwinUrls[twinName] = resolved;
4376
5308
  }
4377
5309
  }
4378
- if (openclawRemote) {
5310
+ if (apiEngine && !useCloud) {
4379
5311
  for (const [name, url] of Object.entries(localTwinUrls)) {
4380
5312
  const ok = await probeHealth(url, 1500);
4381
5313
  if (!ok) {
@@ -4383,24 +5315,25 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4383
5315
  }
4384
5316
  }
4385
5317
  }
4386
- if (useCloud) {
4387
- for (const [name, url] of Object.entries(cloudTwinUrls)) {
4388
- const ok = await probeHealth(url, 3e3);
4389
- if (!ok) {
4390
- throw new Error(`Cloud twin "${name}" failed health check at ${url}/health`);
4391
- }
4392
- }
4393
- }
4394
- const taskMessage = generateTaskFromScenario(scenario, apiRouting);
5318
+ const baseTaskMessage = generateTaskFromScenario(scenario, apiRouting);
5319
+ const taskMessage = localEngine?.promptContext ? `${localEngine.promptContext}
5320
+
5321
+ ---
5322
+
5323
+ ${baseTaskMessage}` : baseTaskMessage;
5324
+ const engineModel = localEngine?.model ?? apiEngine?.model;
4395
5325
  const effectiveAgentConfig = {
4396
5326
  ...agentConfig,
4397
5327
  env: {
4398
5328
  ...agentConfig.env,
4399
- ...buildApiRoutingEnv(apiRouting)
5329
+ ...buildApiRoutingEnv(apiRouting),
5330
+ ARCHAL_ENGINE_MODE: apiEngine ? "api" : "local",
5331
+ ...engineModel ? { ARCHAL_ENGINE_MODEL: engineModel } : {},
5332
+ ARCHAL_ENGINE_TASK: taskMessage
4400
5333
  }
4401
5334
  };
4402
- let agentResult = openclawRemote ? await executeOpenClawRemote(
4403
- openclawRemote,
5335
+ let agentResult = apiEngine ? await executeOpenClawRemote(
5336
+ apiEngine,
4404
5337
  scenario,
4405
5338
  runId,
4406
5339
  taskMessage,
@@ -4414,7 +5347,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4414
5347
  timeoutSeconds * 1e3,
4415
5348
  { restConfigPath, twinUrls }
4416
5349
  );
4417
- if (!openclawRemote && shouldRetryWithModernOpenClaw(agentResult)) {
5350
+ if (!apiEngine && !localEngine && shouldRetryWithModernOpenClaw(agentResult)) {
4418
5351
  warn(
4419
5352
  "OpenClaw legacy local invocation failed with CLI drift signal; retrying with modern local args"
4420
5353
  );
@@ -4431,8 +5364,8 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4431
5364
  let stateAfter;
4432
5365
  let trace;
4433
5366
  if (useCloud) {
4434
- stateAfter = await collectStateFromHttp(cloudTwinUrls);
4435
- trace = await collectTraceFromHttp(cloudTwinUrls);
5367
+ stateAfter = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
5368
+ trace = await collectTraceFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
4436
5369
  } else {
4437
5370
  if (!twinPaths) {
4438
5371
  throw new Error("Twin paths not initialized");
@@ -4443,7 +5376,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4443
5376
  const diff = computeStateDiff(beforeState, stateAfter);
4444
5377
  cleanupTempFiles(mcpConfigPath, twinPaths ?? {}, seedPaths, runId, twinNames);
4445
5378
  if (agentResult.timedOut) {
4446
- const timeoutDisplay = openclawRemote ? `${(openclawRemote.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
5379
+ const timeoutDisplay = apiEngine ? `${(apiEngine.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
4447
5380
  const durationMs2 = Date.now() - startTime;
4448
5381
  return {
4449
5382
  runIndex,
@@ -4461,6 +5394,9 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4461
5394
  }
4462
5395
  if (agentResult.exitCode !== 0 && agentResult.exitCode !== null) {
4463
5396
  warn(`Agent exited with non-zero code ${agentResult.exitCode} on run ${runIndex + 1}`);
5397
+ if (agentResult.stderr) {
5398
+ debug(`Agent stderr: ${agentResult.stderr.slice(0, 500)}`);
5399
+ }
4464
5400
  }
4465
5401
  progress(`Evaluating run ${runIndex + 1}...`);
4466
5402
  const evaluationResult = await evaluateRun(
@@ -4511,7 +5447,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4511
5447
  for (const paths of Object.values(seedPaths)) {
4512
5448
  for (const file of [paths.stateFile, `${paths.stateFile}.tmp`]) {
4513
5449
  try {
4514
- if (existsSync10(file)) unlinkSync6(file);
5450
+ if (existsSync11(file)) unlinkSync6(file);
4515
5451
  } catch {
4516
5452
  }
4517
5453
  }
@@ -4520,14 +5456,14 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4520
5456
  if (restConfigPath) {
4521
5457
  for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
4522
5458
  try {
4523
- if (existsSync10(file)) unlinkSync6(file);
5459
+ if (existsSync11(file)) unlinkSync6(file);
4524
5460
  } catch {
4525
5461
  }
4526
5462
  }
4527
5463
  }
4528
5464
  }
4529
5465
  }
4530
- function preflightCheck(scenario, apiKey) {
5466
+ function preflightCheck(scenario, apiKey, model, baseUrl) {
4531
5467
  const errors = [];
4532
5468
  for (const twin of scenario.config.twins) {
4533
5469
  const result = checkTwinAvailability(twin);
@@ -4540,17 +5476,30 @@ function preflightCheck(scenario, apiKey) {
4540
5476
  }
4541
5477
  }
4542
5478
  const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
4543
- if (hasProbabilistic && !apiKey) {
4544
- const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
4545
- errors.push({
4546
- check: "ANTHROPIC_API_KEY",
4547
- message: `Scenario has ${pCount} probabilistic criteria but no API key is configured`,
4548
- detail: "Set via: export ANTHROPIC_API_KEY=sk-ant-... or archal config set evaluator.apiKey <key>"
4549
- });
5479
+ if (hasProbabilistic) {
5480
+ const provider = detectProvider(model);
5481
+ const resolvedKey = resolveProviderApiKey(apiKey, provider);
5482
+ if (provider === "openai-compatible" && !baseUrl) {
5483
+ errors.push({
5484
+ check: "evaluator.baseUrl",
5485
+ message: `Model "${model}" requires a base URL for the OpenAI-compatible endpoint`,
5486
+ detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>"
5487
+ });
5488
+ }
5489
+ if (!resolvedKey) {
5490
+ const envVar = getProviderEnvVar(provider);
5491
+ const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
5492
+ errors.push({
5493
+ check: envVar,
5494
+ message: `Scenario has ${pCount} probabilistic criteria that will be skipped (no API key for ${provider})`,
5495
+ detail: `Set via: export ${envVar}=<your-key> or archal config set evaluator.apiKey <key>`,
5496
+ warning: true
5497
+ });
5498
+ }
4550
5499
  }
4551
5500
  return errors;
4552
5501
  }
4553
- async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
5502
+ async function runRemoteApiEnginePreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
4554
5503
  const runId = `archal-preflight-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
4555
5504
  const twinConfigs = seedSelections.map((sel) => ({
4556
5505
  twinName: sel.twinName,
@@ -4592,14 +5541,14 @@ async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, r
4592
5541
  for (const paths of Object.values(restResult.twinPaths)) {
4593
5542
  for (const file of [paths.stateFile, `${paths.stateFile}.tmp`, paths.traceFile, `${paths.traceFile}.tmp`]) {
4594
5543
  try {
4595
- if (existsSync10(file)) unlinkSync6(file);
5544
+ if (existsSync11(file)) unlinkSync6(file);
4596
5545
  } catch {
4597
5546
  }
4598
5547
  }
4599
5548
  }
4600
5549
  for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
4601
5550
  try {
4602
- if (existsSync10(file)) unlinkSync6(file);
5551
+ if (existsSync11(file)) unlinkSync6(file);
4603
5552
  } catch {
4604
5553
  }
4605
5554
  }
@@ -4622,9 +5571,14 @@ async function runScenario(options) {
4622
5571
  );
4623
5572
  }
4624
5573
  }
4625
- const preflightErrors = preflightCheck(scenario, config.apiKey);
4626
- if (preflightErrors.length > 0) {
4627
- const lines = preflightErrors.map((e) => {
5574
+ const preflightErrors = preflightCheck(scenario, config.apiKey, model, config.baseUrl);
5575
+ const hardErrors = preflightErrors.filter((e) => !e.warning);
5576
+ const warnings = preflightErrors.filter((e) => e.warning);
5577
+ for (const w of warnings) {
5578
+ warn(`${w.check}: ${w.message}${w.detail ? ` (${w.detail})` : ""}`);
5579
+ }
5580
+ if (hardErrors.length > 0) {
5581
+ const lines = hardErrors.map((e) => {
4628
5582
  let line = ` - ${e.check}: ${e.message}`;
4629
5583
  if (e.detail) line += `
4630
5584
  ${e.detail}`;
@@ -4651,7 +5605,7 @@ Run 'archal doctor' for a full system check.`
4651
5605
  }
4652
5606
  seedSelections = overrideSeedSelection(seedSelections, overrides);
4653
5607
  }
4654
- if (config.apiKey && !options.noDynamicSeed) {
5608
+ if (config.geminiApiKey && !options.noDynamicSeed) {
4655
5609
  progress("Generating dynamic seeds from setup description...");
4656
5610
  const baseTwinConfigs = seedSelections.map((sel) => ({
4657
5611
  twinName: sel.twinName,
@@ -4659,8 +5613,8 @@ Run 'archal doctor' for a full system check.`
4659
5613
  }));
4660
5614
  const { beforeState: baseSeedStates } = await captureSeedState(baseTwinConfigs);
4661
5615
  const dynamicConfig = {
4662
- apiKey: config.apiKey,
4663
- model,
5616
+ geminiApiKey: config.geminiApiKey,
5617
+ model: config.seedModel,
4664
5618
  noCache: options.noSeedCache
4665
5619
  };
4666
5620
  for (const sel of seedSelections) {
@@ -4683,24 +5637,28 @@ Run 'archal doctor' for a full system check.`
4683
5637
  sel.seedData = result.seed;
4684
5638
  }
4685
5639
  }
4686
- const scenarioDir = dirname2(resolve5(options.scenarioPath));
5640
+ const scenarioDir = dirname2(resolve7(options.scenarioPath));
4687
5641
  let projectConfigPath;
4688
5642
  for (const dir of [scenarioDir, process.cwd()]) {
4689
- const candidate = resolve5(dir, ".archal.json");
4690
- if (existsSync10(candidate)) {
5643
+ const candidate = resolve7(dir, ".archal.json");
5644
+ if (existsSync11(candidate)) {
4691
5645
  projectConfigPath = candidate;
4692
5646
  break;
4693
5647
  }
4694
5648
  }
4695
- function resolveOpenClawModel(raw) {
4696
- if (!raw || !raw.trim()) return "openclaw:main";
5649
+ function resolveOpenClawModel2(raw) {
5650
+ if (!raw || !raw.trim()) return void 0;
4697
5651
  const value = raw.trim();
4698
5652
  return value.includes(":") ? value : `openclaw:${value}`;
4699
5653
  }
4700
- function resolveOpenClawGatewayToken2(explicitToken) {
5654
+ function resolveEngineToken2(explicitToken) {
4701
5655
  if (explicitToken && explicitToken.trim()) {
4702
5656
  return explicitToken.trim();
4703
5657
  }
5658
+ const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
5659
+ if (engineToken) {
5660
+ return engineToken;
5661
+ }
4704
5662
  const gatewayToken = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
4705
5663
  if (gatewayToken) {
4706
5664
  return gatewayToken;
@@ -4711,42 +5669,124 @@ Run 'archal doctor' for a full system check.`
4711
5669
  }
4712
5670
  return void 0;
4713
5671
  }
4714
- let openclawRemote;
4715
- if (options.openclawUrl) {
4716
- openclawRemote = {
4717
- url: options.openclawUrl,
4718
- token: resolveOpenClawGatewayToken2(options.openclawToken),
4719
- model: resolveOpenClawModel(options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"]),
4720
- timeoutMs: (options.openclawTimeout ?? timeoutSeconds) * 1e3
5672
+ const openclawEndpointAlias = options.openclawUrl ?? process.env["OPENCLAW_URL"];
5673
+ const engineMode = (() => {
5674
+ if (options.engine) {
5675
+ return options.engine;
5676
+ }
5677
+ if (options.engineEndpoint || openclawEndpointAlias || process.env["ARCHAL_ENGINE_ENDPOINT"]) {
5678
+ return "api";
5679
+ }
5680
+ if (options.harnessDir || process.env["ARCHAL_HARNESS_DIR"]) {
5681
+ return "local";
5682
+ }
5683
+ return "legacy";
5684
+ })();
5685
+ const apiEndpoint = options.engineEndpoint ?? openclawEndpointAlias ?? process.env["ARCHAL_ENGINE_ENDPOINT"];
5686
+ const rawOpenClawAgent = options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"];
5687
+ const rawEngineModel = options.engineModel ?? process.env["ARCHAL_ENGINE_MODEL"];
5688
+ const resolvedEngineToken = resolveEngineToken2(options.engineToken ?? options.openclawToken);
5689
+ const harnessDir = options.harnessDir ?? process.env["ARCHAL_HARNESS_DIR"];
5690
+ let apiEngine;
5691
+ if (engineMode === "api") {
5692
+ const apiTimeoutSeconds = options.engineTimeout ?? options.openclawTimeout ?? parsePositiveIntFromEnv("ARCHAL_ENGINE_TIMEOUT") ?? timeoutSeconds;
5693
+ if (!apiEndpoint || !apiEndpoint.trim()) {
5694
+ throw new Error(
5695
+ "API engine mode requires --engine-endpoint (or --openclaw-url for legacy compatibility)."
5696
+ );
5697
+ }
5698
+ if (!Number.isFinite(apiTimeoutSeconds) || apiTimeoutSeconds <= 0) {
5699
+ throw new Error("Engine timeout must be a positive integer number of seconds.");
5700
+ }
5701
+ const resolvedApiModel = rawEngineModel?.trim() || resolveOpenClawModel2(rawOpenClawAgent) || (openclawEndpointAlias ? "openclaw:main" : void 0);
5702
+ if (!resolvedApiModel) {
5703
+ throw new Error(
5704
+ "API engine mode requires --engine-model/ARCHAL_ENGINE_MODEL (or --openclaw-agent/OPENCLAW_AGENT_ID)."
5705
+ );
5706
+ }
5707
+ apiEngine = {
5708
+ url: apiEndpoint.trim(),
5709
+ token: resolvedEngineToken,
5710
+ model: resolvedApiModel,
5711
+ timeoutMs: apiTimeoutSeconds * 1e3,
5712
+ agentId: rawOpenClawAgent?.trim() || void 0
4721
5713
  };
4722
- if (!openclawRemote.token) {
5714
+ if (openclawEndpointAlias && !apiEngine.token) {
4723
5715
  throw new Error(
4724
5716
  "OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
4725
5717
  );
4726
5718
  }
4727
5719
  }
4728
- const remoteTwinUrlOverrides = parseRemoteTwinUrlOverrides(options.openclawTwinUrls);
5720
+ let localEngine;
5721
+ if (engineMode === "local") {
5722
+ if (!harnessDir) {
5723
+ throw new Error(
5724
+ "Local engine mode requires --harness-dir (or ARCHAL_HARNESS_DIR)."
5725
+ );
5726
+ }
5727
+ const resolvedHarness = resolveLocalHarness(harnessDir, rawEngineModel);
5728
+ const resolvedFallbackLocalAgentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath);
5729
+ const fallbackLocalAgentConfig = resolvedFallbackLocalAgentConfig ?? { command: "openclaw", args: [] };
5730
+ if (!resolvedHarness.manifest) {
5731
+ debug(
5732
+ "Harness manifest not found for local mode; using agent command defaults.",
5733
+ { manifestPath: resolvedHarness.manifestPath }
5734
+ );
5735
+ } else if (!resolvedHarness.localCommand) {
5736
+ warn(
5737
+ `Harness manifest at ${resolvedHarness.manifestPath} does not define local.command; falling back to agent command defaults.`
5738
+ );
5739
+ }
5740
+ if (!resolvedHarness.localCommand && !resolvedFallbackLocalAgentConfig) {
5741
+ warn(
5742
+ 'No local command configured via harness manifest/.archal.json/ARCHAL_AGENT_COMMAND; defaulting to "openclaw".'
5743
+ );
5744
+ }
5745
+ const commandConfig = resolvedHarness.localCommand ?? fallbackLocalAgentConfig;
5746
+ localEngine = {
5747
+ model: resolvedHarness.model,
5748
+ command: commandConfig.command,
5749
+ args: commandConfig.args,
5750
+ env: commandConfig.env,
5751
+ cwd: resolvedHarness.harnessDir,
5752
+ promptContext: resolvedHarness.promptContext
5753
+ };
5754
+ }
5755
+ const remoteTwinUrlOverrides = apiEngine ? parseRemoteTwinUrlOverrides(
5756
+ options.engineTwinUrls ?? options.openclawTwinUrls ?? process.env["ARCHAL_ENGINE_TWIN_URLS"]
5757
+ ) : void 0;
4729
5758
  const apiBaseUrlOverrides = parseApiBaseUrlOverrides(options.apiBaseUrls);
4730
5759
  const apiProxyUrl = parseProxyUrl(options.apiProxyUrl ?? process.env["ARCHAL_API_PROXY_URL"]);
4731
5760
  const apiRouting = apiBaseUrlOverrides && Object.keys(apiBaseUrlOverrides).length > 0 || apiProxyUrl ? {
4732
5761
  baseUrls: apiBaseUrlOverrides,
4733
- proxyUrl: apiProxyUrl
5762
+ proxyUrl: apiProxyUrl,
5763
+ bearerToken: options.apiBearerToken,
5764
+ adminToken: options.apiAdminToken,
5765
+ adminUserId: options.apiAdminUserId
4734
5766
  } : void 0;
4735
- const agentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (openclawRemote ? { command: "openclaw", args: [] } : {
5767
+ const agentConfig = localEngine ? {
5768
+ command: localEngine.command,
5769
+ args: localEngine.args,
5770
+ env: localEngine.env,
5771
+ cwd: localEngine.cwd
5772
+ } : options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (apiEngine ? { command: "openclaw", args: [] } : {
4736
5773
  command: process.env["ARCHAL_AGENT_COMMAND"] ?? "echo",
4737
5774
  args: process.env["ARCHAL_AGENT_COMMAND"] ? [] : ["No agent command configured"]
4738
5775
  });
4739
- if (!openclawRemote && agentConfig.command === "echo") {
5776
+ if (!apiEngine && !localEngine && agentConfig.command === "echo") {
4740
5777
  process.stderr.write(
4741
- "Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json or provide --openclaw-url.\n"
5778
+ "Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json, use --engine-endpoint, or run --engine local with --harness-dir.\n"
4742
5779
  );
4743
5780
  }
4744
- if (openclawRemote) {
4745
- info("Remote OpenClaw mode enabled", { url: openclawRemote.url });
5781
+ if (apiEngine) {
5782
+ info("Remote API engine mode enabled", { url: apiEngine.url });
4746
5783
  warn(
4747
- "Remote OpenClaw requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate OpenClaw with Archal or expose twins via a reachable network path."
5784
+ "Remote engine mode requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate the engine with Archal or expose twins via a reachable network path."
4748
5785
  );
4749
- validateRemoteOpenClawTopology(openclawRemote.url, scenario.config.twins, remoteTwinUrlOverrides);
5786
+ validateRemoteApiEngineTopology(apiEngine.url, scenario.config.twins, remoteTwinUrlOverrides);
5787
+ }
5788
+ if (localEngine) {
5789
+ info("Local harness engine mode enabled", { harnessDir: localEngine.cwd });
4750
5790
  }
4751
5791
  if (apiRouting) {
4752
5792
  info("API routing context enabled", {
@@ -4755,18 +5795,18 @@ Run 'archal doctor' for a full system check.`
4755
5795
  });
4756
5796
  }
4757
5797
  if (options.preflightOnly) {
4758
- if (openclawRemote) {
4759
- await runRemoteOpenClawPreflight(
5798
+ if (apiEngine) {
5799
+ await runRemoteApiEnginePreflight(
4760
5800
  scenario,
4761
5801
  seedSelections,
4762
5802
  options.rateLimit,
4763
- openclawRemote,
5803
+ apiEngine,
4764
5804
  remoteTwinUrlOverrides
4765
5805
  );
4766
5806
  }
4767
5807
  info("Preflight checks passed", {
4768
5808
  scenario: scenario.title,
4769
- remoteOpenClaw: openclawRemote ? "enabled" : "disabled"
5809
+ engineMode: apiEngine ? "api" : localEngine ? "local" : "legacy-local"
4770
5810
  });
4771
5811
  return {
4772
5812
  scenarioTitle: scenario.title,
@@ -4786,6 +5826,7 @@ Run 'archal doctor' for a full system check.`
4786
5826
  };
4787
5827
  const runs = [];
4788
5828
  for (let i = 0; i < numRuns; i++) {
5829
+ const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
4789
5830
  const result = await executeSingleRun(
4790
5831
  i,
4791
5832
  scenario,
@@ -4794,10 +5835,13 @@ Run 'archal doctor' for a full system check.`
4794
5835
  evaluatorConfig,
4795
5836
  timeoutSeconds,
4796
5837
  options.rateLimit,
4797
- openclawRemote,
5838
+ apiEngine,
5839
+ localEngine,
4798
5840
  remoteTwinUrlOverrides,
4799
5841
  apiRouting,
4800
- options.cloudTwinUrls
5842
+ options.cloudTwinUrls,
5843
+ options.apiBearerToken,
5844
+ adminAuth
4801
5845
  );
4802
5846
  runs.push(result);
4803
5847
  printRunProgress(i, numRuns, result.overallScore, result.error);
@@ -4836,10 +5880,10 @@ function normalizeBaseUrl(value, fallback) {
4836
5880
  const normalized = trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
4837
5881
  return normalized.length > 0 ? normalized : fallback;
4838
5882
  }
4839
- var DEFAULT_BASE_URL = "https://archal.ai";
5883
+ var DEFAULT_BASE_URL = "https://www.archal.ai";
4840
5884
  var AUTH_BASE_URL2 = normalizeBaseUrl(process.env["ARCHAL_AUTH_URL"] ?? DEFAULT_BASE_URL, DEFAULT_BASE_URL);
4841
5885
  var API_BASE_URL = normalizeBaseUrl(process.env["ARCHAL_API_URL"] ?? AUTH_BASE_URL2, AUTH_BASE_URL2);
4842
- var REQUEST_TIMEOUT_MS3 = 8e3;
5886
+ var REQUEST_TIMEOUT_MS4 = 8e3;
4843
5887
  var RETRYABLE_STATUS_CODES2 = /* @__PURE__ */ new Set([408, 425, 429, 500, 502, 503, 504]);
4844
5888
  var RETRYABLE_NETWORK_CODES = /* @__PURE__ */ new Set([
4845
5889
  "ECONNABORTED",
@@ -4864,7 +5908,7 @@ var MAX_RETRIES2 = parseBoundedInt(process.env["ARCHAL_API_MAX_RETRIES"], 3, 0,
4864
5908
  var RETRY_BASE_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_BASE_MS"], 250, 25, 1e4);
4865
5909
  var RETRY_MAX_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_MAX_MS"], 3e3, RETRY_BASE_DELAY_MS, 2e4);
4866
5910
  function sleep2(ms) {
4867
- return new Promise((resolve11) => setTimeout(resolve11, ms));
5911
+ return new Promise((resolve13) => setTimeout(resolve13, ms));
4868
5912
  }
4869
5913
  function retryDelayMs(attempt, retryAfter) {
4870
5914
  if (retryAfter) {
@@ -4924,13 +5968,30 @@ function isFinalizeEvidencePath(path) {
4924
5968
  }
4925
5969
  return /^\/api\/sessions\/[^/]+\/evidence\/finalize$/.test(pathname);
4926
5970
  }
5971
+ async function tryRefreshToken() {
5972
+ try {
5973
+ const creds = getStoredCredentials();
5974
+ if (!creds || !creds.refreshToken) return null;
5975
+ const refreshed = await refreshCliSession(creds);
5976
+ if (!refreshed) return null;
5977
+ saveCredentials(refreshed);
5978
+ return refreshed.token;
5979
+ } catch {
5980
+ return null;
5981
+ }
5982
+ }
4927
5983
  async function request(method, path, token, body) {
4928
5984
  const url = `${resolveBaseUrl(path)}${path}`;
4929
5985
  const headers = {
4930
5986
  "content-type": "application/json",
4931
- "user-agent": "archal-cli/0.1.0"
5987
+ "user-agent": CLI_USER_AGENT
4932
5988
  };
4933
- if (token) {
5989
+ const runtimeAdminToken = process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"]?.trim();
5990
+ if (runtimeAdminToken) {
5991
+ headers["x-archal-admin-token"] = runtimeAdminToken;
5992
+ headers["x-archal-user-id"] = process.env["ARCHAL_RUNTIME_USER_ID"]?.trim() || "cli-user";
5993
+ headers["x-archal-plan"] = process.env["ARCHAL_RUNTIME_PLAN"]?.trim() || "free";
5994
+ } else if (token) {
4934
5995
  headers["authorization"] = `Bearer ${token}`;
4935
5996
  }
4936
5997
  const isIdempotentFinalize = method === "POST" && isFinalizeEvidencePath(path);
@@ -4938,16 +5999,28 @@ async function request(method, path, token, body) {
4938
5999
  const attempts = retriesAllowed ? MAX_RETRIES2 + 1 : 1;
4939
6000
  let lastError = "request failed";
4940
6001
  let lastOffline = false;
6002
+ let refreshAttempted = false;
4941
6003
  for (let attempt = 1; attempt <= attempts; attempt += 1) {
4942
6004
  try {
4943
6005
  const response = await fetch(url, {
4944
6006
  method,
4945
6007
  headers,
4946
6008
  body: body ? JSON.stringify(body) : void 0,
4947
- signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
6009
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS4)
4948
6010
  });
4949
6011
  if (!response.ok) {
4950
- const text = await response.text().catch(() => "");
6012
+ if (response.status === 401 && token && !refreshAttempted) {
6013
+ refreshAttempted = true;
6014
+ const refreshed = await tryRefreshToken();
6015
+ if (refreshed) {
6016
+ token = refreshed;
6017
+ headers["authorization"] = `Bearer ${token}`;
6018
+ attempt -= 1;
6019
+ continue;
6020
+ }
6021
+ }
6022
+ const rawText = await response.text().catch(() => "");
6023
+ const text = rawText.length > 200 ? rawText.slice(0, 200) + "..." : rawText;
4951
6024
  const retryable = retriesAllowed && attempt < attempts && RETRYABLE_STATUS_CODES2.has(response.status);
4952
6025
  if (retryable) {
4953
6026
  await sleep2(retryDelayMs(attempt, response.headers.get("retry-after")));
@@ -5018,7 +6091,7 @@ function fetchScenarioCatalog(token) {
5018
6091
  return request("GET", "/api/scenarios", token);
5019
6092
  }
5020
6093
 
5021
- // src/commands/twin.ts
6094
+ // src/commands/twins.ts
5022
6095
  import { Command as Command2 } from "commander";
5023
6096
 
5024
6097
  // src/constants.ts
@@ -5045,10 +6118,10 @@ var PLAN_LIMITS = {
5045
6118
  import { createInterface as createInterface2 } from "readline";
5046
6119
  function askLine(question) {
5047
6120
  const rl = createInterface2({ input: process.stdin, output: process.stderr });
5048
- return new Promise((resolve11) => {
6121
+ return new Promise((resolve13) => {
5049
6122
  rl.question(question, (answer) => {
5050
6123
  rl.close();
5051
- resolve11(answer.trim());
6124
+ resolve13(answer.trim());
5052
6125
  });
5053
6126
  });
5054
6127
  }
@@ -5057,8 +6130,7 @@ async function askConfirm(question) {
5057
6130
  return answer.toLowerCase().startsWith("y");
5058
6131
  }
5059
6132
 
5060
- // src/commands/twin.ts
5061
- var runningTwins = /* @__PURE__ */ new Map();
6133
+ // src/commands/twins.ts
5062
6134
  var KNOWN_TWINS2 = [
5063
6135
  { name: "github", package: "@archal/twin-github", description: "GitHub digital twin" },
5064
6136
  { name: "slack", package: "@archal/twin-slack", description: "Slack digital twin" },
@@ -5083,7 +6155,7 @@ async function runInteractiveTwinSelect(token) {
5083
6155
  const marker = currentlySelected.has(twin.id) ? "\x1B[32m\u2713\x1B[0m" : " ";
5084
6156
  const num = String(i + 1).padStart(2);
5085
6157
  process.stderr.write(
5086
- ` ${marker} [${num}] ${twin.name.padEnd(18)} (${twin.toolCount} tools) \u2014 ${twin.description}
6158
+ ` ${marker} [${num}] ${twin.name.padEnd(18)}${twin.toolCount != null ? ` (${twin.toolCount} tools)` : ""} \u2014 ${twin.description}
5087
6159
  `
5088
6160
  );
5089
6161
  }
@@ -5169,7 +6241,7 @@ async function listTwinCatalog() {
5169
6241
  } else {
5170
6242
  status = "\x1B[90m\u2717 not selected\x1B[0m";
5171
6243
  }
5172
- return [twin.name, String(twin.toolCount), twin.description, status];
6244
+ return [twin.name, twin.toolCount != null ? String(twin.toolCount) : "\u2014", twin.description, status];
5173
6245
  });
5174
6246
  table(headers, rows);
5175
6247
  if (isUnlimited) {
@@ -5194,85 +6266,12 @@ async function selectTwinsForPlan() {
5194
6266
  const refreshed = await refreshAuthFromServer(creds);
5195
6267
  saveCredentials(refreshed);
5196
6268
  }
5197
- function createTwinCommand() {
5198
- const cmd = new Command2("twin").description("Manage local digital twin processes (debug/local only)");
5199
- cmd.command("start").description("Start a digital twin process").argument("<name>", "Twin name (e.g., github, slack)").option("--seed <seed>", "Seed name to load", "small-project").option("--port <port>", "Port for REST transport").action((name, opts) => {
5200
- requireAuth({
5201
- action: `start the "${name}" twin`,
5202
- nextCommand: `archal twin start ${name}`
5203
- });
5204
- const knownTwin = KNOWN_TWINS2.find((t) => t.name === name);
5205
- if (!knownTwin) {
5206
- const available = KNOWN_TWINS2.map((t) => t.name).join(", ");
5207
- error(`Unknown twin: "${name}". Available twins: ${available}`);
5208
- process.exit(1);
5209
- }
5210
- if (runningTwins.has(name)) {
5211
- warn(`Twin "${name}" is already running (PID: ${runningTwins.get(name)?.pid ?? "unknown"})`);
5212
- return;
5213
- }
5214
- info("`archal run` uses hosted cloud twins. `archal twin start` is for local debugging only.");
5215
- const args = [knownTwin.package, "--seed", opts.seed, "--transport", "rest"];
5216
- if (opts.port) {
5217
- args.push("--port", opts.port);
5218
- }
5219
- info(`Starting twin: ${name}`, { seed: opts.seed, transport: "rest" });
5220
- const child = spawnMcpStdioProcess({
5221
- command: "npx",
5222
- args
5223
- });
5224
- const pid = child.pid ?? 0;
5225
- runningTwins.set(name, {
5226
- name,
5227
- pid,
5228
- startedAt: (/* @__PURE__ */ new Date()).toISOString(),
5229
- process: child
5230
- });
5231
- child.on("exit", (code) => {
5232
- info(`Twin "${name}" exited`, { code: String(code ?? "unknown") });
5233
- runningTwins.delete(name);
5234
- });
5235
- success(`Twin "${name}" started (PID: ${pid})`);
5236
- });
5237
- cmd.command("stop").description("Stop a running digital twin").argument("<name>", "Twin name to stop").action(async (name) => {
5238
- const twin = runningTwins.get(name);
5239
- if (!twin) {
5240
- error(`Twin "${name}" is not running`);
5241
- const running = Array.from(runningTwins.keys());
5242
- if (running.length > 0) {
5243
- info(`Running twins: ${running.join(", ")}`);
5244
- }
5245
- process.exit(1);
5246
- }
5247
- info(`Stopping twin: ${name}`, { pid: String(twin.pid) });
5248
- await killProcess(twin.process);
5249
- runningTwins.delete(name);
5250
- success(`Twin "${name}" stopped`);
5251
- });
5252
- cmd.command("status").description("Show status of running digital twins").action(() => {
5253
- if (runningTwins.size === 0) {
5254
- info("No twins currently running");
5255
- return;
5256
- }
5257
- const headers = ["Name", "PID", "Started", "Status"];
5258
- const rows = [];
5259
- for (const twin of runningTwins.values()) {
5260
- const isAlive = twin.process.exitCode === null;
5261
- rows.push([
5262
- twin.name,
5263
- String(twin.pid),
5264
- twin.startedAt,
5265
- isAlive ? "running" : `exited (${twin.process.exitCode})`
5266
- ]);
5267
- }
5268
- table(headers, rows);
5269
- });
5270
- cmd.command("list").description("List available digital twins and entitlement status").action(async () => {
5271
- warn("`archal twin list` is deprecated. Use `archal twins list`.");
6269
+ function createTwinsCommand() {
6270
+ const cmd = new Command2("twins").description("Manage twin catalog entitlements");
6271
+ cmd.command("list").description("List available twins and entitlement status").action(async () => {
5272
6272
  await listTwinCatalog();
5273
6273
  });
5274
6274
  cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
5275
- warn("`archal twin select` is deprecated. Use `archal twins select`.");
5276
6275
  await selectTwinsForPlan();
5277
6276
  });
5278
6277
  return cmd;
@@ -5280,7 +6279,13 @@ function createTwinCommand() {
5280
6279
 
5281
6280
  // src/commands/run.ts
5282
6281
  function createRunCommand() {
5283
- const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--openclaw-url <url>", "OpenClaw Gateway URL or /v1/responses endpoint (enables remote OpenClaw mode)").option("--openclaw-token <token>", "Bearer token for OpenClaw Gateway auth").option("--openclaw-agent <id>", "OpenClaw agent/model id for remote mode (e.g. main or openclaw:my-agent)").option("--openclaw-twin-urls <path>", "Path to JSON mapping twin names to remotely reachable MCP base URLs").option("--openclaw-timeout <seconds>", "Timeout for remote OpenClaw HTTP call per run (defaults to run timeout)").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
6282
+ const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--engine-endpoint <url>", "API engine endpoint URL (base URL or /v1/responses)").option("--engine-token <token>", "Bearer token for API engine auth").option(
6283
+ "--engine-model <model>",
6284
+ "Model id for API mode; in local mode this is exported as ARCHAL_ENGINE_MODEL"
6285
+ ).option("--engine-twin-urls <path>", "Path to JSON mapping twin names to remote-reachable MCP base URLs").option("--engine-timeout <seconds>", "Timeout for API engine HTTP call per run (defaults to run timeout)").option(
6286
+ "--harness-dir <path>",
6287
+ "Local agent execution directory (archal-harness.json is optional)"
6288
+ ).option("--openclaw-url <url>", "Deprecated alias for --engine-endpoint").option("--openclaw-token <token>", "Deprecated alias for --engine-token").option("--openclaw-agent <id>", "Deprecated alias for --engine-model").option("--openclaw-twin-urls <path>", "Deprecated alias for --engine-twin-urls").option("--openclaw-timeout <seconds>", "Deprecated alias for --engine-timeout").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
5284
6289
  const required = requireAuth({
5285
6290
  action: "run a scenario",
5286
6291
  nextCommand: `archal run ${scenarioArg}`
@@ -5296,8 +6301,8 @@ function createRunCommand() {
5296
6301
  if (opts.verbose) {
5297
6302
  configureLogger({ verbose: true, level: "debug" });
5298
6303
  }
5299
- const scenarioPath = resolve6(scenarioArg);
5300
- if (!existsSync11(scenarioPath)) {
6304
+ const scenarioPath = resolve8(scenarioArg);
6305
+ if (!existsSync12(scenarioPath)) {
5301
6306
  process.stderr.write(`Error: Scenario file not found: ${scenarioPath}
5302
6307
  `);
5303
6308
  process.exit(1);
@@ -5387,26 +6392,20 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5387
6392
  process.stderr.write("Error: --pass-threshold must be a number between 0 and 100\n");
5388
6393
  process.exit(1);
5389
6394
  }
5390
- if (!opts.openclawUrl) {
5391
- process.stderr.write(
5392
- "Error: --openclaw-url is required. `archal run` now uses cloud transport only.\n"
5393
- );
6395
+ let engine;
6396
+ try {
6397
+ engine = resolveEngineConfig(opts, timeout);
6398
+ } catch (err) {
6399
+ const message = err instanceof Error ? err.message : String(err);
6400
+ process.stderr.write(`Error: ${message}
6401
+ `);
5394
6402
  process.exit(1);
5395
6403
  }
5396
- let openclawTimeout;
5397
- if (opts.openclawTimeout) {
5398
- openclawTimeout = parseInt(opts.openclawTimeout, 10);
5399
- if (Number.isNaN(openclawTimeout) || openclawTimeout <= 0) {
5400
- process.stderr.write("Error: --openclaw-timeout must be a positive integer\n");
5401
- process.exit(1);
5402
- }
5403
- }
5404
- const resolvedOpenClawToken = resolveOpenClawGatewayToken(opts.openclawToken);
5405
- if (opts.openclawUrl && !resolvedOpenClawToken) {
6404
+ if (engine.deprecatedAliasesUsed.length > 0) {
5406
6405
  process.stderr.write(
5407
- "Error: OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD.\n"
6406
+ `Warning: OpenClaw flags are deprecated (${engine.deprecatedAliasesUsed.join(", ")}). Use --engine-* equivalents.
6407
+ `
5408
6408
  );
5409
- process.exit(1);
5410
6409
  }
5411
6410
  {
5412
6411
  const sessionResult = await startSession(credentials.token, {
@@ -5433,9 +6432,9 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5433
6432
  if (!runFailureMessage && Object.keys(endpointRoots).length > 0) {
5434
6433
  cloudTwinUrls = endpointRoots;
5435
6434
  }
5436
- if (!runFailureMessage && opts.openclawUrl && !opts.openclawTwinUrls) {
5437
- generatedTwinUrlMapPath = resolve6(
5438
- `.archal-session-${backendSessionId}-openclaw-twin-urls.json`
6435
+ if (!runFailureMessage && engine.mode === "api" && !engine.twinUrlsPath) {
6436
+ generatedTwinUrlMapPath = resolve8(
6437
+ `.archal-session-${backendSessionId}-engine-twin-urls.json`
5439
6438
  );
5440
6439
  writeFileSync9(
5441
6440
  generatedTwinUrlMapPath,
@@ -5444,7 +6443,7 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5444
6443
  );
5445
6444
  }
5446
6445
  if (!runFailureMessage && !opts.apiBaseUrls && apiBaseUrls && Object.keys(apiBaseUrls).length > 0) {
5447
- generatedApiBaseUrlMapPath = resolve6(
6446
+ generatedApiBaseUrlMapPath = resolve8(
5448
6447
  `.archal-session-${backendSessionId}-api-base-urls.json`
5449
6448
  );
5450
6449
  writeFileSync9(
@@ -5454,15 +6453,34 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5454
6453
  );
5455
6454
  }
5456
6455
  if (!runFailureMessage) {
5457
- const [statusResult, healthResult] = await Promise.all([
5458
- getSessionStatus(credentials.token, backendSessionId),
5459
- getSessionHealth(credentials.token, backendSessionId)
5460
- ]);
5461
- if (!statusResult.ok || !statusResult.data.alive) {
5462
- runFailureMessage = `session not ready (${statusResult.ok ? statusResult.data.status : statusResult.error})`;
6456
+ const SESSION_READY_TIMEOUT_MS = 12e4;
6457
+ const SESSION_POLL_INTERVAL_MS = 3e3;
6458
+ const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
6459
+ let sessionReady = false;
6460
+ while (Date.now() < readyDeadline) {
6461
+ const [statusResult, healthResult] = await Promise.all([
6462
+ getSessionStatus(credentials.token, backendSessionId),
6463
+ getSessionHealth(credentials.token, backendSessionId)
6464
+ ]);
6465
+ if (!statusResult.ok) {
6466
+ runFailureMessage = `session status check failed (${statusResult.error})`;
6467
+ break;
6468
+ }
6469
+ const status = statusResult.data.status;
6470
+ if (status === "failed" || status === "expired" || status === "ended") {
6471
+ runFailureMessage = `session ${status}`;
6472
+ break;
6473
+ }
6474
+ const healthAlive = healthResult.ok && healthResult.data.alive;
6475
+ const statusAlive = statusResult.data.alive || statusResult.data.status === "ready";
6476
+ if (statusAlive && healthAlive) {
6477
+ sessionReady = true;
6478
+ break;
6479
+ }
6480
+ await new Promise((resolve13) => setTimeout(resolve13, SESSION_POLL_INTERVAL_MS));
5463
6481
  }
5464
- if (!runFailureMessage && (!healthResult.ok || !healthResult.data.alive)) {
5465
- runFailureMessage = `session health check failed (${healthResult.ok ? "dead" : healthResult.error})`;
6482
+ if (!sessionReady && !runFailureMessage) {
6483
+ runFailureMessage = "session timed out waiting for twins to become ready";
5466
6484
  }
5467
6485
  }
5468
6486
  } else if (!sessionResult.offline) {
@@ -5482,17 +6500,26 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5482
6500
  output: outputFormat,
5483
6501
  seed: opts.seed,
5484
6502
  rateLimit,
6503
+ engineEndpoint: engine.endpoint,
6504
+ engineToken: engine.token,
6505
+ engineModel: engine.model,
6506
+ engineTwinUrls: generatedTwinUrlMapPath ?? engine.twinUrlsPath,
6507
+ engineTimeout: engine.timeoutSeconds,
6508
+ harnessDir: engine.harnessDir,
5485
6509
  openclawUrl: opts.openclawUrl,
5486
- openclawToken: resolvedOpenClawToken,
6510
+ openclawToken: engine.token,
5487
6511
  openclawAgent: opts.openclawAgent,
5488
6512
  openclawTwinUrls: generatedTwinUrlMapPath ?? opts.openclawTwinUrls,
5489
- openclawTimeout,
6513
+ openclawTimeout: engine.timeoutSeconds,
5490
6514
  apiBaseUrls: generatedApiBaseUrlMapPath ?? opts.apiBaseUrls,
5491
6515
  apiProxyUrl: opts.apiProxyUrl,
5492
6516
  preflightOnly: opts.preflightOnly,
5493
6517
  cloudTwinUrls,
5494
6518
  noDynamicSeed: !opts.dynamicSeed,
5495
- noSeedCache: !opts.seedCache
6519
+ noSeedCache: !opts.seedCache,
6520
+ apiBearerToken: credentials.token,
6521
+ apiAdminToken: process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"],
6522
+ apiAdminUserId: process.env["ARCHAL_RUNTIME_USER_ID"]
5496
6523
  });
5497
6524
  if (!opts.preflightOnly && report.satisfactionScore < passThreshold) {
5498
6525
  runFailureMessage = `Satisfaction score ${report.satisfactionScore.toFixed(1)} is below pass threshold ${passThreshold}`;
@@ -5502,10 +6529,10 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5502
6529
  const message = err instanceof Error ? err.message : String(err);
5503
6530
  runFailureMessage = message;
5504
6531
  } finally {
5505
- if (generatedTwinUrlMapPath && existsSync11(generatedTwinUrlMapPath)) {
6532
+ if (generatedTwinUrlMapPath && existsSync12(generatedTwinUrlMapPath)) {
5506
6533
  unlinkSync7(generatedTwinUrlMapPath);
5507
6534
  }
5508
- if (generatedApiBaseUrlMapPath && existsSync11(generatedApiBaseUrlMapPath)) {
6535
+ if (generatedApiBaseUrlMapPath && existsSync12(generatedApiBaseUrlMapPath)) {
5509
6536
  unlinkSync7(generatedApiBaseUrlMapPath);
5510
6537
  }
5511
6538
  if (backendSessionId) {
@@ -5566,10 +6593,90 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5566
6593
  });
5567
6594
  return cmd;
5568
6595
  }
5569
- function resolveOpenClawGatewayToken(rawToken) {
6596
+ function resolveEngineConfig(opts, runTimeoutSeconds) {
6597
+ const deprecatedAliasesUsed = collectDeprecatedAliases(opts);
6598
+ const mode = resolveEngineMode(opts);
6599
+ const openclawEndpointAlias = firstNonEmpty(opts.openclawUrl, process.env["OPENCLAW_URL"]);
6600
+ const endpoint = firstNonEmpty(
6601
+ opts.engineEndpoint,
6602
+ openclawEndpointAlias,
6603
+ process.env["ARCHAL_ENGINE_ENDPOINT"]
6604
+ );
6605
+ const token = resolveEngineToken(firstNonEmpty(opts.engineToken, opts.openclawToken));
6606
+ const openclawModel = resolveOpenClawModel(firstNonEmpty(opts.openclawAgent, process.env["OPENCLAW_AGENT_ID"]));
6607
+ const model = firstNonEmpty(
6608
+ opts.engineModel,
6609
+ process.env["ARCHAL_ENGINE_MODEL"],
6610
+ openclawModel,
6611
+ // Legacy OpenClaw alias path keeps the historical default model for compatibility.
6612
+ openclawEndpointAlias ? "openclaw:main" : void 0
6613
+ );
6614
+ const timeoutInput = firstNonEmpty(
6615
+ opts.engineTimeout,
6616
+ opts.openclawTimeout,
6617
+ process.env["ARCHAL_ENGINE_TIMEOUT"]
6618
+ );
6619
+ const timeoutSeconds = mode === "api" ? parsePositiveInteger(timeoutInput, "--engine-timeout") ?? runTimeoutSeconds : runTimeoutSeconds;
6620
+ const twinUrlsPath = firstNonEmpty(
6621
+ opts.engineTwinUrls,
6622
+ opts.openclawTwinUrls,
6623
+ process.env["ARCHAL_ENGINE_TWIN_URLS"]
6624
+ );
6625
+ const harnessDir = firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"]);
6626
+ if (mode === "api") {
6627
+ if (!model) {
6628
+ throw new Error(
6629
+ "--engine-model is required for API mode (or use --openclaw-agent/OPENCLAW_AGENT_ID)."
6630
+ );
6631
+ }
6632
+ if (openclawEndpointAlias && !token) {
6633
+ throw new Error(
6634
+ "OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
6635
+ );
6636
+ }
6637
+ }
6638
+ return {
6639
+ mode,
6640
+ endpoint,
6641
+ token,
6642
+ model,
6643
+ twinUrlsPath,
6644
+ timeoutSeconds,
6645
+ harnessDir,
6646
+ deprecatedAliasesUsed
6647
+ };
6648
+ }
6649
+ function resolveEngineMode(opts) {
6650
+ if (firstNonEmpty(
6651
+ opts.engineEndpoint,
6652
+ opts.openclawUrl,
6653
+ process.env["ARCHAL_ENGINE_ENDPOINT"],
6654
+ process.env["OPENCLAW_URL"]
6655
+ )) {
6656
+ return "api";
6657
+ }
6658
+ if (firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"])) {
6659
+ return "local";
6660
+ }
6661
+ throw new Error(
6662
+ "No agent execution mode configured. Provide --engine-endpoint for remote agent execution, or --harness-dir for local agent execution."
6663
+ );
6664
+ }
6665
+ function resolveOpenClawModel(raw) {
6666
+ if (!raw || !raw.trim()) {
6667
+ return void 0;
6668
+ }
6669
+ const value = raw.trim();
6670
+ return value.includes(":") ? value : `openclaw:${value}`;
6671
+ }
6672
+ function resolveEngineToken(rawToken) {
5570
6673
  if (rawToken && rawToken.trim()) {
5571
6674
  return rawToken.trim();
5572
6675
  }
6676
+ const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
6677
+ if (engineToken) {
6678
+ return engineToken;
6679
+ }
5573
6680
  const token = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
5574
6681
  if (token) {
5575
6682
  return token;
@@ -5580,11 +6687,36 @@ function resolveOpenClawGatewayToken(rawToken) {
5580
6687
  }
5581
6688
  return void 0;
5582
6689
  }
6690
+ function firstNonEmpty(...values) {
6691
+ for (const value of values) {
6692
+ if (value && value.trim()) {
6693
+ return value.trim();
6694
+ }
6695
+ }
6696
+ return void 0;
6697
+ }
6698
+ function parsePositiveInteger(raw, flagName) {
6699
+ if (!raw) return void 0;
6700
+ const parsed = parseInt(raw, 10);
6701
+ if (Number.isNaN(parsed) || parsed <= 0) {
6702
+ throw new Error(`${flagName} must be a positive integer`);
6703
+ }
6704
+ return parsed;
6705
+ }
6706
+ function collectDeprecatedAliases(opts) {
6707
+ const aliases = [];
6708
+ if (opts.openclawUrl) aliases.push("--openclaw-url");
6709
+ if (opts.openclawToken) aliases.push("--openclaw-token");
6710
+ if (opts.openclawAgent) aliases.push("--openclaw-agent");
6711
+ if (opts.openclawTwinUrls) aliases.push("--openclaw-twin-urls");
6712
+ if (opts.openclawTimeout) aliases.push("--openclaw-timeout");
6713
+ return aliases;
6714
+ }
5583
6715
 
5584
6716
  // src/commands/init.ts
5585
6717
  import { Command as Command4 } from "commander";
5586
- import { existsSync as existsSync12, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
5587
- import { join as join9, resolve as resolve7 } from "path";
6718
+ import { existsSync as existsSync13, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
6719
+ import { join as join9, resolve as resolve9 } from "path";
5588
6720
  var SAMPLE_SCENARIO = `# Close Stale Issues
5589
6721
 
5590
6722
  ## Setup
@@ -5759,7 +6891,7 @@ var SAMPLE_PACKAGE_JSON = `{
5759
6891
  }
5760
6892
  `;
5761
6893
  function writeIfMissing(filePath, content) {
5762
- if (!existsSync12(filePath)) {
6894
+ if (!existsSync13(filePath)) {
5763
6895
  writeFileSync10(filePath, content);
5764
6896
  info(`Created ${filePath}`);
5765
6897
  } else {
@@ -5768,8 +6900,8 @@ function writeIfMissing(filePath, content) {
5768
6900
  }
5769
6901
  function createInitCommand() {
5770
6902
  const cmd = new Command4("init").description("Initialize an Archal test directory with sample scenario and agent").argument("[directory]", "Directory to initialize", "archal").action((directory) => {
5771
- const targetDir = resolve7(directory);
5772
- if (existsSync12(targetDir)) {
6903
+ const targetDir = resolve9(directory);
6904
+ if (existsSync13(targetDir)) {
5773
6905
  warn(`Directory already exists: ${targetDir}`);
5774
6906
  warn("Skipping files that already exist.");
5775
6907
  } else {
@@ -5792,23 +6924,10 @@ function createInitCommand() {
5792
6924
  return cmd;
5793
6925
  }
5794
6926
 
5795
- // src/commands/twins.ts
5796
- import { Command as Command5 } from "commander";
5797
- function createTwinsCommand() {
5798
- const cmd = new Command5("twins").description("Manage twin catalog entitlements");
5799
- cmd.command("list").description("List available twins and entitlement status").action(async () => {
5800
- await listTwinCatalog();
5801
- });
5802
- cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
5803
- await selectTwinsForPlan();
5804
- });
5805
- return cmd;
5806
- }
5807
-
5808
6927
  // src/commands/scenario.ts
5809
- import { Command as Command6 } from "commander";
5810
- import { existsSync as existsSync13, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
5811
- import { resolve as resolve8, join as join10, extname } from "path";
6928
+ import { Command as Command5 } from "commander";
6929
+ import { existsSync as existsSync14, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
6930
+ import { resolve as resolve10, join as join10, extname, relative } from "path";
5812
6931
  var SCENARIO_TEMPLATE = `# {{NAME}}
5813
6932
 
5814
6933
  ## Setup
@@ -5834,15 +6953,15 @@ timeout: 120
5834
6953
  runs: 5
5835
6954
  `;
5836
6955
  var SCENARIO_DIR_CANDIDATES = [
5837
- resolve8("scenarios"),
5838
- resolve8("scenario"),
5839
- resolve8("test", "scenarios"),
5840
- resolve8("tests", "scenarios"),
5841
- resolve8(".archal", "scenarios")
6956
+ resolve10("scenarios"),
6957
+ resolve10("scenario"),
6958
+ resolve10("test", "scenarios"),
6959
+ resolve10("tests", "scenarios"),
6960
+ resolve10(".archal", "scenarios")
5842
6961
  ];
5843
6962
  function findScenarioFiles(dir) {
5844
6963
  const files = [];
5845
- if (!existsSync13(dir)) return files;
6964
+ if (!existsSync14(dir)) return files;
5846
6965
  const entries = readdirSync3(dir, { withFileTypes: true });
5847
6966
  for (const entry of entries) {
5848
6967
  const fullPath = join10(dir, entry.name);
@@ -5856,22 +6975,19 @@ function findScenarioFiles(dir) {
5856
6975
  }
5857
6976
  function findLocalScenariosDir() {
5858
6977
  for (const candidate of SCENARIO_DIR_CANDIDATES) {
5859
- if (existsSync13(candidate)) {
6978
+ if (existsSync14(candidate)) {
5860
6979
  return { dir: candidate, candidates: SCENARIO_DIR_CANDIDATES };
5861
6980
  }
5862
6981
  }
5863
6982
  return {
5864
- dir: resolve8("scenarios"),
6983
+ dir: resolve10("scenarios"),
5865
6984
  candidates: SCENARIO_DIR_CANDIDATES
5866
6985
  };
5867
6986
  }
5868
6987
  function toDisplayPath(path) {
5869
- const cwd = resolve8(".");
5870
- if (path === cwd) return ".";
5871
- if (path.startsWith(`${cwd}/`)) {
5872
- return `.${path.slice(cwd.length)}`;
5873
- }
5874
- return path;
6988
+ const rel = relative(resolve10("."), path);
6989
+ if (!rel) return ".";
6990
+ return rel.startsWith("..") ? path : rel;
5875
6991
  }
5876
6992
  function getCachedScenariosDir() {
5877
6993
  return join10(ensureArchalDir(), "scenarios");
@@ -5897,14 +7013,14 @@ async function syncRemoteScenarios(token) {
5897
7013
  return scenarios;
5898
7014
  }
5899
7015
  function createScenarioCommand() {
5900
- const cmd = new Command6("scenario").description("Manage test scenarios");
7016
+ const cmd = new Command5("scenario").description("Manage test scenarios");
5901
7017
  cmd.command("list").description("List available scenarios").option("-d, --dir <directory>", "Scenario directory to search").option("--local", "Only show local scenarios (skip remote fetch)").action(async (opts) => {
5902
7018
  const creds = getCredentials();
5903
7019
  const headers = ["Scenario", "Source", "Criteria", "Twins"];
5904
7020
  const rows = [];
5905
- const localResolution = opts.dir ? { dir: resolve8(opts.dir), candidates: [resolve8(opts.dir)] } : findLocalScenariosDir();
7021
+ const localResolution = opts.dir ? { dir: resolve10(opts.dir), candidates: [resolve10(opts.dir)] } : findLocalScenariosDir();
5906
7022
  const localDir = localResolution.dir;
5907
- if (existsSync13(localDir)) {
7023
+ if (existsSync14(localDir)) {
5908
7024
  const localFiles = findScenarioFiles(localDir);
5909
7025
  let hiddenCount = 0;
5910
7026
  for (const file of localFiles) {
@@ -5917,7 +7033,7 @@ function createScenarioCommand() {
5917
7033
  continue;
5918
7034
  }
5919
7035
  }
5920
- const relativePath = file.replace(resolve8(".") + "\\", "").replace(resolve8(".") + "/", "");
7036
+ const relativePath = relative(resolve10("."), file);
5921
7037
  rows.push([
5922
7038
  scenario.title,
5923
7039
  relativePath,
@@ -5926,7 +7042,7 @@ function createScenarioCommand() {
5926
7042
  ]);
5927
7043
  } catch (err) {
5928
7044
  const message = err instanceof Error ? err.message : String(err);
5929
- const relativePath = file.replace(resolve8(".") + "\\", "").replace(resolve8(".") + "/", "");
7045
+ const relativePath = relative(resolve10("."), file);
5930
7046
  rows.push([`(parse error)`, relativePath, "-", message]);
5931
7047
  }
5932
7048
  }
@@ -5971,8 +7087,8 @@ function createScenarioCommand() {
5971
7087
  Found ${rows.length} scenario(s)`);
5972
7088
  });
5973
7089
  cmd.command("validate").description("Parse and validate a scenario file").argument("<file>", "Path to scenario markdown file").action((file) => {
5974
- const filePath = resolve8(file);
5975
- if (!existsSync13(filePath)) {
7090
+ const filePath = resolve10(file);
7091
+ if (!existsSync14(filePath)) {
5976
7092
  error(`File not found: ${filePath}`);
5977
7093
  process.exit(1);
5978
7094
  }
@@ -6014,14 +7130,14 @@ Found ${rows.length} scenario(s)`);
6014
7130
  info("Run `archal twins select` to change your selection or `archal upgrade` to unlock all twins.");
6015
7131
  process.exit(1);
6016
7132
  }
6017
- const scenariosDir = opts.dir ? resolve8(opts.dir) : findLocalScenariosDir().dir;
6018
- if (!existsSync13(scenariosDir)) {
7133
+ const scenariosDir = opts.dir ? resolve10(opts.dir) : findLocalScenariosDir().dir;
7134
+ if (!existsSync14(scenariosDir)) {
6019
7135
  mkdirSync7(scenariosDir, { recursive: true });
6020
7136
  info(`Created scenarios directory: ${scenariosDir}`);
6021
7137
  }
6022
7138
  const fileName = name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "") + ".md";
6023
7139
  const filePath = join10(scenariosDir, fileName);
6024
- if (existsSync13(filePath)) {
7140
+ if (existsSync14(filePath)) {
6025
7141
  error(`Scenario file already exists: ${filePath}`);
6026
7142
  process.exit(1);
6027
7143
  }
@@ -6038,9 +7154,9 @@ Found ${rows.length} scenario(s)`);
6038
7154
 
6039
7155
  // src/commands/trace.ts
6040
7156
  import { writeFileSync as writeFileSync12 } from "fs";
6041
- import { resolve as resolve9 } from "path";
7157
+ import { resolve as resolve11 } from "path";
6042
7158
  import { createInterface as createInterface3 } from "readline";
6043
- import { Command as Command7 } from "commander";
7159
+ import { Command as Command6 } from "commander";
6044
7160
  function formatTimestamp2(iso) {
6045
7161
  try {
6046
7162
  return new Date(iso).toLocaleString();
@@ -6063,10 +7179,10 @@ var TRACE_HEADERS = ["ID", "Scenario", "Score", "Runs", "Entries", "Timestamp"];
6063
7179
  function confirmPrompt(message) {
6064
7180
  if (!process.stdin.isTTY) return Promise.resolve(false);
6065
7181
  const rl = createInterface3({ input: process.stdin, output: process.stderr });
6066
- return new Promise((resolve11) => {
7182
+ return new Promise((resolve13) => {
6067
7183
  rl.question(`${message} [y/N] `, (answer) => {
6068
7184
  rl.close();
6069
- resolve11(answer.trim().toLowerCase() === "y");
7185
+ resolve13(answer.trim().toLowerCase() === "y");
6070
7186
  });
6071
7187
  });
6072
7188
  }
@@ -6079,7 +7195,7 @@ function parsePositiveInt(val, flag) {
6079
7195
  return n;
6080
7196
  }
6081
7197
  function createTraceCommand() {
6082
- const cmd = new Command7("trace").description("Inspect, search, and manage run traces");
7198
+ const cmd = new Command6("trace").description("Inspect, search, and manage run traces");
6083
7199
  cmd.command("list").description("List recent traces").option("-n, --limit <count>", "Number of traces to show", "20").action((opts) => {
6084
7200
  const traces = listTraces(parsePositiveInt(opts.limit, "--limit"));
6085
7201
  if (traces.length === 0) {
@@ -6183,7 +7299,7 @@ ${traces.length} trace(s) found`);
6183
7299
  process.exit(1);
6184
7300
  }
6185
7301
  if (opts.output) {
6186
- const outPath = resolve9(opts.output);
7302
+ const outPath = resolve11(opts.output);
6187
7303
  writeFileSync12(outPath, json, "utf-8");
6188
7304
  info(`Trace exported to: ${outPath}`);
6189
7305
  } else {
@@ -6260,10 +7376,10 @@ ${traces.length} trace(s) found`);
6260
7376
  }
6261
7377
 
6262
7378
  // src/commands/config.ts
6263
- import { existsSync as existsSync14, unlinkSync as unlinkSync8 } from "fs";
6264
- import { Command as Command8 } from "commander";
7379
+ import { existsSync as existsSync15, unlinkSync as unlinkSync8 } from "fs";
7380
+ import { Command as Command7 } from "commander";
6265
7381
  function createConfigCommand() {
6266
- const cmd = new Command8("config").description("Manage Archal configuration");
7382
+ const cmd = new Command7("config").description("Manage Archal configuration");
6267
7383
  cmd.command("show").description("Print current configuration").option("--json", "Output as JSON").action((opts) => {
6268
7384
  const display = getConfigDisplay();
6269
7385
  if (opts.json) {
@@ -6279,6 +7395,11 @@ function createConfigCommand() {
6279
7395
  model: evaluator["model"] ?? "(not set)",
6280
7396
  apiKey: evaluator["apiKey"] ?? "(not set)"
6281
7397
  });
7398
+ const seedGen = display["seedGeneration"];
7399
+ printConfigSection("Seed Generation", {
7400
+ model: seedGen["model"] ?? "(not set)",
7401
+ geminiApiKey: seedGen["geminiApiKey"] ?? "(not set)"
7402
+ });
6282
7403
  const defaults = display["defaults"];
6283
7404
  printConfigSection("Defaults", {
6284
7405
  runs: String(defaults["runs"]),
@@ -6291,12 +7412,16 @@ function createConfigCommand() {
6291
7412
  });
6292
7413
  process.stdout.write("\n");
6293
7414
  info("Set values with: archal config set <key> <value>");
6294
- info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout");
7415
+ info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout");
6295
7416
  });
6296
7417
  cmd.command("set").description("Set a configuration value").argument("<key>", "Configuration key (e.g., evaluator.model, defaults.runs)").argument("<value>", "Value to set").action((key, value) => {
6297
7418
  try {
6298
7419
  setConfigValue(key, value);
6299
7420
  success(`Set ${key} = ${key.includes("apiKey") ? "***" : value}`);
7421
+ if (key.includes("apiKey") && !value.startsWith("env:")) {
7422
+ warn("API key stored in plaintext in config file. Consider using env: prefix instead:");
7423
+ info(` archal config set ${key} env:YOUR_ENV_VAR_NAME`);
7424
+ }
6300
7425
  } catch (err) {
6301
7426
  const message = err instanceof Error ? err.message : String(err);
6302
7427
  error(message);
@@ -6306,7 +7431,7 @@ function createConfigCommand() {
6306
7431
  cmd.command("init").description("Create default configuration file").option("--force", "Overwrite existing config").action((opts) => {
6307
7432
  const configPath = getConfigPath();
6308
7433
  if (opts.force) {
6309
- if (existsSync14(configPath)) {
7434
+ if (existsSync15(configPath)) {
6310
7435
  unlinkSync8(configPath);
6311
7436
  }
6312
7437
  }
@@ -6316,7 +7441,7 @@ function createConfigCommand() {
6316
7441
  info("\nNext steps:");
6317
7442
  info(" 1. Set your API key:");
6318
7443
  info(" archal config set evaluator.apiKey your-key-here");
6319
- info(" or set ANTHROPIC_API_KEY environment variable");
7444
+ info(" or set GEMINI_API_KEY environment variable (default provider)");
6320
7445
  info("");
6321
7446
  info(" 2. Create a scenario:");
6322
7447
  info(" archal scenario create my-first-test");
@@ -6345,31 +7470,33 @@ function printConfigSection(name, values) {
6345
7470
  }
6346
7471
 
6347
7472
  // src/commands/demo.ts
6348
- import { Command as Command9 } from "commander";
6349
- import { existsSync as existsSync15 } from "fs";
6350
- import { resolve as resolve10, dirname as dirname4 } from "path";
6351
- import { fileURLToPath as fileURLToPath4 } from "url";
7473
+ import { Command as Command8 } from "commander";
7474
+ import { existsSync as existsSync16 } from "fs";
7475
+ import { resolve as resolve12, dirname as dirname4 } from "path";
7476
+ import { fileURLToPath as fileURLToPath5 } from "url";
6352
7477
  import { createRequire as createRequire4 } from "module";
6353
- var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
7478
+ var __dirname5 = fileURLToPath5(new URL(".", import.meta.url));
6354
7479
  function resolveDemoDir() {
6355
- const monorepoDemoDir = resolve10(__dirname4, "..", "demo");
6356
- if (existsSync15(resolve10(monorepoDemoDir, "scenario.md"))) {
6357
- return monorepoDemoDir;
7480
+ const demoDir = resolve12(__dirname5, "..", "demo");
7481
+ if (existsSync16(resolve12(demoDir, "scenario.md"))) {
7482
+ return demoDir;
6358
7483
  }
6359
7484
  try {
6360
7485
  const require2 = createRequire4(import.meta.url);
6361
7486
  const cliMain = require2.resolve("@archal/cli");
6362
7487
  const pkgDir = dirname4(dirname4(cliMain));
6363
- const npmDemoDir = resolve10(pkgDir, "demo");
6364
- if (existsSync15(resolve10(npmDemoDir, "scenario.md"))) {
7488
+ const npmDemoDir = resolve12(pkgDir, "demo");
7489
+ if (existsSync16(resolve12(npmDemoDir, "scenario.md"))) {
6365
7490
  return npmDemoDir;
6366
7491
  }
6367
7492
  } catch {
6368
7493
  }
6369
- throw new Error("Demo files not found. Ensure @archal/cli is installed correctly.");
7494
+ throw new Error(
7495
+ "Demo files not found. Ensure @archal/cli is installed correctly.\nIf installed globally, try reinstalling: npm install -g @archal/cli"
7496
+ );
6370
7497
  }
6371
7498
  function createDemoCommand() {
6372
- const cmd = new Command9("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
7499
+ const cmd = new Command8("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
6373
7500
  if (opts.quiet) {
6374
7501
  configureLogger({ quiet: true });
6375
7502
  }
@@ -6377,9 +7504,9 @@ function createDemoCommand() {
6377
7504
  configureLogger({ verbose: true, level: "debug" });
6378
7505
  }
6379
7506
  const demoDir = resolveDemoDir();
6380
- const scenarioPath = resolve10(demoDir, "scenario.md");
6381
- const goodAgentPath = resolve10(demoDir, "good-agent.mjs");
6382
- const badAgentPath = resolve10(demoDir, "bad-agent.mjs");
7507
+ const scenarioPath = resolve12(demoDir, "scenario.md");
7508
+ const goodAgentPath = resolve12(demoDir, "good-agent.mjs");
7509
+ const badAgentPath = resolve12(demoDir, "bad-agent.mjs");
6383
7510
  process.stderr.write("\n\x1B[36m\x1B[1marchal demo\x1B[0m \x1B[2m\u2014 same scenario, two agents\x1B[0m\n\n");
6384
7511
  process.stderr.write("\x1B[1m\x1B[32m\u25B8 Good agent\x1B[0m \x1B[2m(checks labels, skips keep-open)\x1B[0m\n");
6385
7512
  const goodReport = await runScenario({
@@ -6412,100 +7539,194 @@ function createDemoCommand() {
6412
7539
  }
6413
7540
 
6414
7541
  // src/commands/login.ts
6415
- import { Command as Command10 } from "commander";
7542
+ import { Command as Command9 } from "commander";
6416
7543
  import { exec } from "child_process";
6417
- import { randomBytes } from "crypto";
7544
+ import { createHash as createHash3, randomBytes } from "crypto";
6418
7545
  import { createServer } from "http";
6419
- var AUTH_BASE_URL3 = process.env["ARCHAL_AUTH_URL"] ?? "https://archal.ai";
7546
+ function normalizeAuthUrl2(value) {
7547
+ const trimmed = value.trim().replace(/\/+$/, "");
7548
+ return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
7549
+ }
7550
+ var AUTH_BASE_URL3 = normalizeAuthUrl2(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
6420
7551
  var START_PORT = 51423;
6421
7552
  var LOGIN_TIMEOUT_MS = 5 * 60 * 1e3;
7553
+ var TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
7554
+ function escapeHtml(value) {
7555
+ return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
7556
+ }
6422
7557
  function openBrowser(url) {
6423
7558
  const platform = process.platform;
6424
7559
  const command = platform === "darwin" ? `open "${url}"` : platform === "win32" ? `start "" "${url}"` : `xdg-open "${url}"`;
6425
- exec(command, () => {
7560
+ exec(command, (err) => {
7561
+ if (err) {
7562
+ info("Could not open browser automatically.");
7563
+ info(`Please visit the URL above manually to complete login.`);
7564
+ }
6426
7565
  });
6427
7566
  }
7567
+ function createPkcePair() {
7568
+ const codeVerifier = randomBytes(32).toString("base64url");
7569
+ const codeChallenge = createHash3("sha256").update(codeVerifier).digest("base64url");
7570
+ return { codeVerifier, codeChallenge };
7571
+ }
7572
+ function isPlan2(value) {
7573
+ return value === "free" || value === "pro" || value === "enterprise";
7574
+ }
7575
+ function credentialsFromApiToken(token) {
7576
+ const nowSeconds = Math.floor(Date.now() / 1e3);
7577
+ return {
7578
+ token,
7579
+ refreshToken: "",
7580
+ email: "(from token)",
7581
+ plan: "free",
7582
+ selectedTwins: [],
7583
+ expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
7584
+ };
7585
+ }
7586
+ function credentialsFromLegacyCallback(requestUrl) {
7587
+ const token = requestUrl.searchParams.get("token") ?? requestUrl.searchParams.get("access_token");
7588
+ const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
7589
+ const email = requestUrl.searchParams.get("email");
7590
+ const planParam = requestUrl.searchParams.get("plan");
7591
+ const twins = requestUrl.searchParams.get("twins");
7592
+ if (!token || !email || !isPlan2(planParam)) {
7593
+ return null;
7594
+ }
7595
+ const nowSeconds = Math.floor(Date.now() / 1e3);
7596
+ return {
7597
+ token,
7598
+ refreshToken,
7599
+ email,
7600
+ plan: planParam,
7601
+ selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
7602
+ expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
7603
+ };
7604
+ }
6428
7605
  function findFreePort(startPort) {
6429
- return new Promise((resolve11, reject) => {
7606
+ return new Promise((resolve13, reject) => {
6430
7607
  const server = createServer();
6431
7608
  server.listen(startPort, "127.0.0.1", () => {
6432
7609
  const address = server.address();
6433
7610
  const port = typeof address === "object" && address ? address.port : startPort;
6434
- server.close(() => resolve11(port));
7611
+ server.close(() => resolve13(port));
6435
7612
  });
6436
7613
  server.on("error", () => {
6437
7614
  if (startPort < START_PORT + 100) {
6438
- findFreePort(startPort + 1).then(resolve11).catch(reject);
7615
+ findFreePort(startPort + 1).then(resolve13).catch(reject);
6439
7616
  } else {
6440
- reject(new Error("Could not find a free localhost callback port"));
7617
+ reject(new Error(
7618
+ "Could not find a free localhost callback port (tried ports 51423-51523).\nTry closing other services, or use token login: archal login --token <your-token>"
7619
+ ));
6441
7620
  }
6442
7621
  });
6443
7622
  });
6444
7623
  }
6445
7624
  function createLoginCommand() {
6446
- return new Command10("login").description("Log in via archal.ai browser auth").action(async () => {
7625
+ return new Command9("login").description("Log in via archal.ai browser auth").option("--no-browser", "Do not automatically open the login URL in a browser").option("--token <token>", "Use an API key/token directly (CI/service fallback)").action(async (opts) => {
7626
+ const directToken = opts.token?.trim();
7627
+ if (directToken) {
7628
+ let credentials = credentialsFromApiToken(directToken);
7629
+ credentials = await refreshAuthFromServer(credentials);
7630
+ saveCredentials(credentials);
7631
+ success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
7632
+ return;
7633
+ }
6447
7634
  const port = await findFreePort(START_PORT);
6448
7635
  const state = randomBytes(16).toString("hex");
6449
7636
  const redirectUrl = `http://localhost:${port}/callback`;
6450
- const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}`;
7637
+ const { codeVerifier, codeChallenge } = createPkcePair();
7638
+ const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}&code_challenge=${encodeURIComponent(codeChallenge)}&code_challenge_method=S256`;
6451
7639
  info("Opening browser for authentication...");
6452
7640
  info(`If your browser does not open, visit:
6453
7641
  ${authUrl}`);
6454
- openBrowser(authUrl);
6455
- await new Promise((resolve11, reject) => {
6456
- const server = createServer((req, res) => {
6457
- const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
6458
- if (requestUrl.pathname !== "/callback") {
6459
- res.writeHead(404);
6460
- res.end("Not found");
6461
- return;
6462
- }
6463
- const returnedState = requestUrl.searchParams.get("state");
6464
- if (returnedState !== state) {
6465
- res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
6466
- res.end("<h1>Login failed</h1><p>State mismatch.</p>");
6467
- server.close();
6468
- reject(new Error("State mismatch in callback"));
7642
+ if (opts.browser !== false) {
7643
+ openBrowser(authUrl);
7644
+ }
7645
+ await new Promise((resolve13, reject) => {
7646
+ let settled = false;
7647
+ const settleResolve = () => {
7648
+ if (settled) return;
7649
+ settled = true;
7650
+ resolve13();
7651
+ };
7652
+ const settleReject = (error2) => {
7653
+ if (settled) return;
7654
+ settled = true;
7655
+ reject(error2);
7656
+ };
7657
+ function closeAndResolve() {
7658
+ if (!server.listening) {
7659
+ settleResolve();
6469
7660
  return;
6470
7661
  }
6471
- const token = requestUrl.searchParams.get("token");
6472
- const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
6473
- const email = requestUrl.searchParams.get("email");
6474
- const plan = requestUrl.searchParams.get("plan");
6475
- const twins = requestUrl.searchParams.get("twins");
6476
- if (!token || !email || !plan) {
6477
- res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
6478
- res.end("<h1>Login failed</h1><p>Missing callback parameters.</p>");
6479
- server.close();
6480
- reject(new Error("Missing token/email/plan in callback"));
7662
+ server.close(() => settleResolve());
7663
+ }
7664
+ function closeAndReject(error2) {
7665
+ if (!server.listening) {
7666
+ settleReject(error2);
6481
7667
  return;
6482
7668
  }
6483
- const expiresAt = getJwtExpiry(token) ?? Math.floor(Date.now() / 1e3) + 30 * 24 * 60 * 60;
6484
- const credentials = {
6485
- token,
6486
- refreshToken,
6487
- email,
6488
- plan,
6489
- selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
6490
- expiresAt
6491
- };
6492
- saveCredentials(credentials);
6493
- res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
6494
- res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
6495
- success(`Logged in as ${email} (${plan})`);
6496
- if (plan === "free" && credentials.selectedTwins.length === 0) {
6497
- info(
6498
- "You haven't selected any twins yet.\n Run `archal twins select` to choose up to 5 twins for your free plan."
6499
- );
6500
- }
6501
- server.close(() => resolve11());
7669
+ server.close(() => settleReject(error2));
7670
+ }
7671
+ const server = createServer((req, res) => {
7672
+ void (async () => {
7673
+ try {
7674
+ const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
7675
+ if (requestUrl.pathname !== "/callback") {
7676
+ res.writeHead(404);
7677
+ res.end("Not found");
7678
+ return;
7679
+ }
7680
+ const returnedState = requestUrl.searchParams.get("state");
7681
+ if (returnedState !== state) {
7682
+ res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
7683
+ res.end("<h1>Login failed</h1><p>State mismatch.</p>");
7684
+ closeAndReject(new Error("State mismatch in callback"));
7685
+ return;
7686
+ }
7687
+ const code = requestUrl.searchParams.get("code");
7688
+ const credentials = code ? await exchangeCliAuthCode({
7689
+ code,
7690
+ codeVerifier,
7691
+ redirectUri: redirectUrl
7692
+ }) : credentialsFromLegacyCallback(requestUrl);
7693
+ if (!credentials) {
7694
+ res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
7695
+ res.end("<h1>Login failed</h1><p>Missing auth code.</p>");
7696
+ closeAndReject(new Error("Missing code in callback"));
7697
+ return;
7698
+ }
7699
+ saveCredentials(credentials);
7700
+ res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
7701
+ res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
7702
+ success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
7703
+ if (credentials.plan === "free" && credentials.selectedTwins.length === 0) {
7704
+ info(
7705
+ "You haven't selected any twins yet.\n Run `archal twins select` to choose up to 5 twins for your free plan."
7706
+ );
7707
+ }
7708
+ closeAndResolve();
7709
+ } catch (error2) {
7710
+ const message = error2 instanceof Error ? error2.message : String(error2);
7711
+ if (!res.headersSent) {
7712
+ res.writeHead(500, { "content-type": "text/html; charset=utf-8" });
7713
+ res.end(`<h1>Login failed</h1><p>${escapeHtml(message)}</p>`);
7714
+ }
7715
+ closeAndReject(error2);
7716
+ }
7717
+ })().catch((error2) => {
7718
+ closeAndReject(error2);
7719
+ });
6502
7720
  });
6503
- server.listen(port, "127.0.0.1");
6504
7721
  const timeout = setTimeout(() => {
6505
- server.close();
6506
- reject(new Error("Login timed out. Run archal login again."));
7722
+ closeAndReject(new Error("Login timed out. Run archal login again."));
6507
7723
  }, LOGIN_TIMEOUT_MS);
6508
7724
  server.on("close", () => clearTimeout(timeout));
7725
+ server.once("error", (error2) => {
7726
+ clearTimeout(timeout);
7727
+ closeAndReject(error2);
7728
+ });
7729
+ server.listen(port, "127.0.0.1");
6509
7730
  }).catch((error2) => {
6510
7731
  const message = error2 instanceof Error ? error2.message : String(error2);
6511
7732
  error(message);
@@ -6515,9 +7736,9 @@ function createLoginCommand() {
6515
7736
  }
6516
7737
 
6517
7738
  // src/commands/logout.ts
6518
- import { Command as Command11 } from "commander";
7739
+ import { Command as Command10 } from "commander";
6519
7740
  function createLogoutCommand() {
6520
- return new Command11("logout").description("Log out and remove stored credentials").action(() => {
7741
+ return new Command10("logout").description("Log out and remove stored credentials").action(() => {
6521
7742
  const creds = getCredentials();
6522
7743
  if (!creds) {
6523
7744
  info("Not currently logged in.");
@@ -6535,7 +7756,7 @@ function createLogoutCommand() {
6535
7756
  }
6536
7757
 
6537
7758
  // src/commands/whoami.ts
6538
- import { Command as Command12 } from "commander";
7759
+ import { Command as Command11 } from "commander";
6539
7760
  var RESET2 = "\x1B[0m";
6540
7761
  var BOLD2 = "\x1B[1m";
6541
7762
  var DIM2 = "\x1B[2m";
@@ -6543,11 +7764,12 @@ var CYAN2 = "\x1B[36m";
6543
7764
  var GREEN2 = "\x1B[32m";
6544
7765
  var YELLOW2 = "\x1B[33m";
6545
7766
  function createWhoamiCommand() {
6546
- return new Command12("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
6547
- let current = requireAuth({
6548
- action: "show account status",
6549
- nextCommand: "archal whoami"
6550
- });
7767
+ return new Command11("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
7768
+ let current = getCredentials();
7769
+ if (!current) {
7770
+ info("Not logged in. Run: archal login");
7771
+ return;
7772
+ }
6551
7773
  if (opts.refresh) {
6552
7774
  current = await refreshAuthFromServer(current);
6553
7775
  saveCredentials(current);
@@ -6611,7 +7833,7 @@ function planBadge(plan) {
6611
7833
  }
6612
7834
 
6613
7835
  // src/commands/upgrade.ts
6614
- import { Command as Command13 } from "commander";
7836
+ import { Command as Command12 } from "commander";
6615
7837
  import { exec as exec2 } from "child_process";
6616
7838
  var BILLING_URL = "https://archal.ai/dashboard/billing";
6617
7839
  function openBrowser2(url) {
@@ -6621,7 +7843,7 @@ function openBrowser2(url) {
6621
7843
  });
6622
7844
  }
6623
7845
  function createUpgradeCommand() {
6624
- return new Command13("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
7846
+ return new Command12("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
6625
7847
  const creds = getCredentials();
6626
7848
  if (creds?.plan === "enterprise") {
6627
7849
  info("You are already on the enterprise plan.");
@@ -6640,7 +7862,7 @@ function createUpgradeCommand() {
6640
7862
  }
6641
7863
 
6642
7864
  // src/commands/help.ts
6643
- import { Command as Command14 } from "commander";
7865
+ import { Command as Command13 } from "commander";
6644
7866
  var RESET3 = "\x1B[0m";
6645
7867
  var BOLD3 = "\x1B[1m";
6646
7868
  var DIM3 = "\x1B[2m";
@@ -6668,15 +7890,7 @@ var COMMAND_GROUPS = [
6668
7890
  ]
6669
7891
  },
6670
7892
  {
6671
- heading: "Twin Processes",
6672
- commands: [
6673
- { name: "twin start <name>", description: "Start a local twin process (debug/local only)" },
6674
- { name: "twin stop <name>", description: "Stop a running local twin process" },
6675
- { name: "twin status", description: "Show running local twin processes" }
6676
- ]
6677
- },
6678
- {
6679
- heading: "Twin Catalog",
7893
+ heading: "Twins",
6680
7894
  commands: [
6681
7895
  { name: "twins list", description: "List available twins and entitlement status" },
6682
7896
  { name: "twins select", description: "Choose which twins to use on your free plan" }
@@ -6700,7 +7914,7 @@ var COMMAND_GROUPS = [
6700
7914
  ];
6701
7915
  function showHelp() {
6702
7916
  process.stderr.write(`
6703
- ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v0.1.0${RESET3}
7917
+ ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v${CLI_VERSION}${RESET3}
6704
7918
  `);
6705
7919
  process.stderr.write(`${DIM3}The QA layer for the software factory era${RESET3}
6706
7920
 
@@ -6722,21 +7936,21 @@ ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v0.1.0${RESET3}
6722
7936
  `);
6723
7937
  }
6724
7938
  function createHelpCommand() {
6725
- return new Command14("help").description("Show all available commands").action(() => {
7939
+ return new Command13("help").description("Show all available commands").action(() => {
6726
7940
  showHelp();
6727
7941
  });
6728
7942
  }
6729
7943
 
6730
7944
  // src/commands/setup.ts
6731
- import { Command as Command15 } from "commander";
6732
- import { existsSync as existsSync16 } from "fs";
7945
+ import { Command as Command14 } from "commander";
7946
+ import { existsSync as existsSync17 } from "fs";
6733
7947
  var RESET4 = "\x1B[0m";
6734
7948
  var BOLD4 = "\x1B[1m";
6735
7949
  var DIM4 = "\x1B[2m";
6736
7950
  var CYAN4 = "\x1B[36m";
6737
7951
  var GREEN3 = "\x1B[32m";
6738
7952
  function createSetupCommand() {
6739
- return new Command15("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
7953
+ return new Command14("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
6740
7954
  process.stderr.write(`
6741
7955
  ${CYAN4}${BOLD4}Archal Setup${RESET4}
6742
7956
  `);
@@ -6758,7 +7972,7 @@ ${CYAN4}${BOLD4}Archal Setup${RESET4}
6758
7972
  ${BOLD4}Step 2: Configuration${RESET4}
6759
7973
  `);
6760
7974
  const configPath = getConfigPath();
6761
- if (existsSync16(configPath)) {
7975
+ if (existsSync17(configPath)) {
6762
7976
  success(`Config file exists: ${configPath}`);
6763
7977
  } else {
6764
7978
  const create = await askConfirm("Create a default config file?");
@@ -6823,7 +8037,7 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
6823
8037
  `);
6824
8038
  process.stderr.write(` ${CYAN4}archal scenario create my-first-test${RESET4} ${DIM4}Create a scenario${RESET4}
6825
8039
  `);
6826
- process.stderr.write(` ${CYAN4}archal run scenario.md --openclaw-url "..."${RESET4} ${DIM4}Run a scenario${RESET4}
8040
+ process.stderr.write(` ${CYAN4}archal run scenario.md --engine-endpoint "..." --engine-model "..."${RESET4} ${DIM4}Run a scenario${RESET4}
6827
8041
  `);
6828
8042
  process.stderr.write(` ${CYAN4}archal help${RESET4} ${DIM4}See all commands${RESET4}
6829
8043
 
@@ -6832,8 +8046,8 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
6832
8046
  }
6833
8047
 
6834
8048
  // src/index.ts
6835
- var program = new Command16();
6836
- program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version("0.1.0").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
8049
+ var program = new Command15();
8050
+ program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version(CLI_VERSION).option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
6837
8051
  const opts = program.opts();
6838
8052
  if (opts.quiet) {
6839
8053
  configureLogger({ quiet: true });
@@ -6848,7 +8062,6 @@ program.addCommand(createWhoamiCommand());
6848
8062
  program.addCommand(createSetupCommand());
6849
8063
  program.addCommand(createRunCommand());
6850
8064
  program.addCommand(createInitCommand());
6851
- program.addCommand(createTwinCommand());
6852
8065
  program.addCommand(createTwinsCommand());
6853
8066
  program.addCommand(createScenarioCommand());
6854
8067
  program.addCommand(createTraceCommand());
@@ -6864,6 +8077,14 @@ program.action(() => {
6864
8077
  process.stderr.write("\x1B[33mNot logged in.\x1B[0m Get started with: \x1B[36marchal login\x1B[0m\n\n");
6865
8078
  }
6866
8079
  });
8080
+ function handleShutdown(signal) {
8081
+ process.stderr.write(`
8082
+ Received ${signal}, shutting down...
8083
+ `);
8084
+ process.exit(128 + (signal === "SIGINT" ? 2 : 15));
8085
+ }
8086
+ process.on("SIGINT", () => handleShutdown("SIGINT"));
8087
+ process.on("SIGTERM", () => handleShutdown("SIGTERM"));
6867
8088
  program.parseAsync(process.argv).catch((err) => {
6868
8089
  const message = err instanceof Error ? err.message : String(err);
6869
8090
  process.stderr.write(`Error: ${message}