@archal/cli 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,16 +1,16 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { Command as Command16 } from "commander";
4
+ import { Command as Command15 } from "commander";
5
5
 
6
6
  // src/commands/run.ts
7
7
  import { Command as Command3 } from "commander";
8
- import { existsSync as existsSync11, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
9
- import { dirname as dirname3, resolve as resolve6 } from "path";
8
+ import { existsSync as existsSync12, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
9
+ import { dirname as dirname3, resolve as resolve8 } from "path";
10
10
 
11
11
  // src/runner/orchestrator.ts
12
- import { existsSync as existsSync10, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
13
- import { resolve as resolve5, dirname as dirname2, join as join8 } from "path";
12
+ import { existsSync as existsSync11, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
13
+ import { resolve as resolve7, dirname as dirname2, join as join8 } from "path";
14
14
  import { tmpdir as tmpdir3 } from "os";
15
15
 
16
16
  // src/runner/scenario-parser.ts
@@ -276,10 +276,10 @@ function inferTwinsFromContent(setup, expectedBehavior) {
276
276
  ${expectedBehavior}`.toLowerCase();
277
277
  const twins = [];
278
278
  const twinKeywords = {
279
- github: ["github", "repository", "repo", "pull request", "pr", "issue", "commit", "branch", "merge"],
280
- slack: ["slack", "channel", "message", "thread", "workspace", "dm", "direct message"],
281
- linear: ["linear", "ticket", "project", "cycle", "backlog"],
282
- jira: ["jira", "sprint", "epic", "story", "board"]
279
+ github: ["github", "repository", "pull request", "create_issue", "create_pull_request", "merge_pull_request"],
280
+ slack: ["slack", "slack channel", "send_message", "slack message", "direct message"],
281
+ linear: ["linear", "linear ticket", "linear project", "linear cycle"],
282
+ jira: ["jira", "jira sprint", "jira epic", "jira board"]
283
283
  };
284
284
  for (const [twin, keywords] of Object.entries(twinKeywords)) {
285
285
  if (keywords.some((kw) => combined.includes(kw))) {
@@ -442,6 +442,19 @@ var GITHUB_SEED_MAPPINGS = [
442
442
  ],
443
443
  seedName: "large-backlog",
444
444
  weight: 2
445
+ },
446
+ {
447
+ keywords: [
448
+ "triage",
449
+ "unlabeled",
450
+ "no labels",
451
+ "categorize",
452
+ "classify",
453
+ "label",
454
+ "none of them have labels"
455
+ ],
456
+ seedName: "triage-unlabeled",
457
+ weight: 2
445
458
  }
446
459
  ];
447
460
  var SLACK_SEED_MAPPINGS = [
@@ -450,34 +463,47 @@ var SLACK_SEED_MAPPINGS = [
450
463
  seedName: "empty",
451
464
  weight: 1
452
465
  },
453
- {
454
- keywords: ["small team", "few channels", "simple", "basic", "starter"],
455
- seedName: "small-team",
456
- weight: 1
457
- },
458
466
  {
459
467
  keywords: [
460
468
  "engineering",
461
469
  "development",
462
470
  "engineering team",
463
471
  "developers",
464
- "incidents",
465
- "on-call",
466
472
  "sprints",
467
- "standups"
473
+ "standups",
474
+ "hr",
475
+ "confidential",
476
+ "salary"
468
477
  ],
469
478
  seedName: "engineering-team",
470
479
  weight: 1
471
480
  },
472
481
  {
473
- keywords: ["support", "customer", "tickets", "help desk", "routing"],
474
- seedName: "support-team",
482
+ keywords: [
483
+ "support",
484
+ "customer",
485
+ "tickets",
486
+ "help desk",
487
+ "routing",
488
+ "busy",
489
+ "high volume",
490
+ "many messages",
491
+ "active",
492
+ "noisy",
493
+ "general",
494
+ "workspace",
495
+ "members",
496
+ "finance",
497
+ "ceo",
498
+ "fraud"
499
+ ],
500
+ seedName: "busy-workspace",
475
501
  weight: 1
476
502
  },
477
503
  {
478
- keywords: ["busy", "high volume", "many messages", "active", "noisy"],
479
- seedName: "high-volume",
480
- weight: 1
504
+ keywords: ["incident", "on-call", "alert", "outage", "escalat", "sev1", "sev2"],
505
+ seedName: "incident-active",
506
+ weight: 2
481
507
  }
482
508
  ];
483
509
  var LINEAR_SEED_MAPPINGS = [
@@ -507,14 +533,59 @@ var LINEAR_SEED_MAPPINGS = [
507
533
  weight: 1
508
534
  }
509
535
  ];
536
+ var STRIPE_SEED_MAPPINGS = [
537
+ {
538
+ keywords: ["empty", "blank", "new", "fresh", "clean", "no customers"],
539
+ seedName: "empty",
540
+ weight: 1
541
+ },
542
+ {
543
+ keywords: [
544
+ "small business",
545
+ "few customers",
546
+ "simple",
547
+ "basic",
548
+ "starter",
549
+ "payment",
550
+ "charge",
551
+ "wire",
552
+ "transfer",
553
+ "balance",
554
+ "vendor",
555
+ "invoice",
556
+ "ceo",
557
+ "fraud",
558
+ "financial"
559
+ ],
560
+ seedName: "small-business",
561
+ weight: 1
562
+ },
563
+ {
564
+ keywords: [
565
+ "subscription",
566
+ "recurring",
567
+ "saas",
568
+ "monthly",
569
+ "annual",
570
+ "plan",
571
+ "pricing",
572
+ "trial",
573
+ "cancel"
574
+ ],
575
+ seedName: "subscription-heavy",
576
+ weight: 2
577
+ }
578
+ ];
510
579
  var TWIN_SEED_REGISTRY = {
511
580
  github: GITHUB_SEED_MAPPINGS,
512
581
  slack: SLACK_SEED_MAPPINGS,
582
+ stripe: STRIPE_SEED_MAPPINGS,
513
583
  linear: LINEAR_SEED_MAPPINGS
514
584
  };
515
585
  var DEFAULT_SEEDS = {
516
586
  github: "small-project",
517
- slack: "small-team",
587
+ slack: "engineering-team",
588
+ stripe: "small-business",
518
589
  linear: "small-team"
519
590
  };
520
591
  function normalizeText(text) {
@@ -612,7 +683,27 @@ import { spawn } from "child_process";
612
683
  function buildSanitizedSpawnEnv(explicitEnv) {
613
684
  const sanitized = {};
614
685
  const tempVarKey = process.platform === "win32" ? "TEMP" : "TMPDIR";
615
- const passthroughKeys = ["PATH", "HOME", tempVarKey, "NODE_ENV"];
686
+ const passthroughKeys = [
687
+ "PATH",
688
+ "HOME",
689
+ "USER",
690
+ "SHELL",
691
+ tempVarKey,
692
+ "NODE_ENV",
693
+ // Proxy vars — critical for corporate environments
694
+ "HTTP_PROXY",
695
+ "HTTPS_PROXY",
696
+ "NO_PROXY",
697
+ "http_proxy",
698
+ "https_proxy",
699
+ "no_proxy",
700
+ // API keys needed by local engine harness agents
701
+ "ANTHROPIC_API_KEY",
702
+ "OPENAI_API_KEY",
703
+ "GEMINI_API_KEY",
704
+ // Windows-specific
705
+ ...process.platform === "win32" ? ["USERPROFILE", "APPDATA", "LOCALAPPDATA", "SystemRoot", "COMSPEC", "TMP"] : []
706
+ ];
616
707
  for (const key of passthroughKeys) {
617
708
  const value = process.env[key];
618
709
  if (typeof value === "string" && value.length > 0) {
@@ -640,7 +731,7 @@ function spawnWithTimeout(options) {
640
731
  onStdout,
641
732
  onStderr
642
733
  } = options;
643
- return new Promise((resolve11, reject) => {
734
+ return new Promise((resolve13, reject) => {
644
735
  const startTime = Date.now();
645
736
  let timedOut = false;
646
737
  let stdoutBuf = "";
@@ -696,7 +787,7 @@ function spawnWithTimeout(options) {
696
787
  clearTimeout(timer);
697
788
  const durationMs = Date.now() - startTime;
698
789
  debug("Process exited", { command, exitCode, durationMs, timedOut });
699
- resolve11({
790
+ resolve13({
700
791
  exitCode,
701
792
  stdout: stdoutBuf,
702
793
  stderr: stderrBuf,
@@ -721,9 +812,9 @@ function spawnMcpStdioProcess(options) {
721
812
  return child;
722
813
  }
723
814
  function killProcess(child, gracePeriodMs = 5e3) {
724
- return new Promise((resolve11) => {
815
+ return new Promise((resolve13) => {
725
816
  if (child.killed || child.exitCode !== null) {
726
- resolve11();
817
+ resolve13();
727
818
  return;
728
819
  }
729
820
  child.kill("SIGTERM");
@@ -734,7 +825,7 @@ function killProcess(child, gracePeriodMs = 5e3) {
734
825
  }, gracePeriodMs);
735
826
  child.on("close", () => {
736
827
  clearTimeout(forceKillTimer);
737
- resolve11();
828
+ resolve13();
738
829
  });
739
830
  });
740
831
  }
@@ -768,6 +859,20 @@ function generateTaskFromScenario(scenario, apiRouting) {
768
859
  }
769
860
  lines.push("");
770
861
  }
862
+ if (apiRouting?.adminToken) {
863
+ lines.push("Authentication:");
864
+ lines.push("Include these headers with every request to the base URLs above:");
865
+ lines.push(` x-archal-admin-token: ${apiRouting.adminToken}`);
866
+ if (apiRouting.adminUserId) {
867
+ lines.push(` x-archal-user-id: ${apiRouting.adminUserId}`);
868
+ }
869
+ lines.push("");
870
+ } else if (apiRouting?.bearerToken) {
871
+ lines.push("Authentication:");
872
+ lines.push("Include this header with every request to the base URLs above:");
873
+ lines.push(` Authorization: Bearer ${apiRouting.bearerToken}`);
874
+ lines.push("");
875
+ }
771
876
  if (hasProxy && apiRouting?.proxyUrl) {
772
877
  lines.push(`Proxy URL: ${apiRouting.proxyUrl}`);
773
878
  lines.push("");
@@ -812,14 +917,6 @@ function resolveResponsesUrl(rawUrl) {
812
917
  }
813
918
  return url.toString();
814
919
  }
815
- function toMcpUrl(rawUrl) {
816
- const url = new URL(rawUrl);
817
- const path = url.pathname.replace(/\/+$/, "");
818
- if (!path.endsWith("/mcp")) {
819
- url.pathname = `${path || ""}/mcp`;
820
- }
821
- return url.toString();
822
- }
823
920
  function collectResponseText(response) {
824
921
  if (!response.output || response.output.length === 0) return "";
825
922
  const chunks = [];
@@ -838,7 +935,7 @@ function collectResponseText(response) {
838
935
  }
839
936
  return chunks.join("\n").trim();
840
937
  }
841
- function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting, mcpField = "tools") {
938
+ function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting) {
842
939
  const metadata = {
843
940
  run_id: runId,
844
941
  scenario_title: scenario.title,
@@ -851,40 +948,11 @@ function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, m
851
948
  if (apiRouting?.proxyUrl) {
852
949
  metadata["archal_api_proxy_url"] = apiRouting.proxyUrl;
853
950
  }
854
- const mcpTools = Object.entries(twinUrls).map(([name, url]) => ({
855
- type: "mcp",
856
- server_label: name,
857
- server_url: toMcpUrl(url),
858
- require_approval: "never"
859
- }));
860
- const request2 = {
951
+ return {
861
952
  model,
862
953
  input: taskMessage,
863
954
  metadata
864
955
  };
865
- if (mcpField === "both") {
866
- request2.tools = mcpTools;
867
- request2.mcp_servers = mcpTools;
868
- return request2;
869
- }
870
- request2[mcpField] = mcpTools;
871
- return request2;
872
- }
873
- function shouldRetryWithAlternateMcpField(status, rawBody, attemptedField) {
874
- if (status !== 400) return false;
875
- const pattern = new RegExp(`Unrecognized key:\\s*"?${attemptedField}"?`, "i");
876
- try {
877
- const parsed = JSON.parse(rawBody);
878
- if (typeof parsed.error?.message === "string") {
879
- return pattern.test(parsed.error.message);
880
- }
881
- } catch {
882
- }
883
- return pattern.test(rawBody);
884
- }
885
- function resolvePreferredMcpField() {
886
- const configured = (process.env["ARCHAL_OPENCLAW_MCP_FIELD"] ?? process.env["OPENCLAW_MCP_FIELD"] ?? "tools").trim().toLowerCase();
887
- return configured === "mcp_servers" ? "mcp_servers" : "tools";
888
956
  }
889
957
  function extractOpenClawResponseText(response) {
890
958
  return collectResponseText(response);
@@ -927,15 +995,13 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
927
995
  const timer = setTimeout(() => controller.abort(), remoteConfig.timeoutMs);
928
996
  try {
929
997
  responsesUrl = resolveResponsesUrl(remoteConfig.url);
930
- let mcpField = resolvePreferredMcpField();
931
- let requestBody = buildOpenClawResponsesRequest(
998
+ const requestBody = buildOpenClawResponsesRequest(
932
999
  scenario,
933
1000
  runId,
934
1001
  taskMessage,
935
1002
  twinUrls,
936
1003
  remoteConfig.model,
937
- apiRouting,
938
- mcpField
1004
+ apiRouting
939
1005
  );
940
1006
  const headers = {
941
1007
  "Content-Type": "application/json"
@@ -943,36 +1009,32 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
943
1009
  if (remoteConfig.token) {
944
1010
  headers["Authorization"] = `Bearer ${remoteConfig.token}`;
945
1011
  }
1012
+ if (remoteConfig.agentId) {
1013
+ headers["x-openclaw-agent-id"] = remoteConfig.agentId;
1014
+ }
946
1015
  info("Executing remote OpenClaw agent", {
947
1016
  url: responsesUrl,
948
- timeout: `${remoteConfig.timeoutMs}ms`
1017
+ timeout: `${remoteConfig.timeoutMs}ms`,
1018
+ ...remoteConfig.agentId ? { agentId: remoteConfig.agentId } : {}
1019
+ });
1020
+ debug("Task message being sent to OpenClaw:", {
1021
+ taskMessage: taskMessage.replace(/x-archal-admin-token:\s*\S+/gi, "x-archal-admin-token: [REDACTED]").replace(/Authorization:\s*Bearer\s+\S+/gi, "Authorization: Bearer [REDACTED]").slice(0, 2e3)
1022
+ });
1023
+ debug("Twin URLs:", { twinUrls: JSON.stringify(twinUrls) });
1024
+ debug("API routing:", {
1025
+ apiRouting: JSON.stringify({
1026
+ ...apiRouting,
1027
+ bearerToken: apiRouting?.bearerToken ? "[REDACTED]" : void 0,
1028
+ adminToken: apiRouting?.adminToken ? "[REDACTED]" : void 0
1029
+ })
949
1030
  });
950
- let response = await fetch(responsesUrl, {
1031
+ const response = await fetch(responsesUrl, {
951
1032
  method: "POST",
952
1033
  headers,
953
1034
  body: JSON.stringify(requestBody),
954
1035
  signal: controller.signal
955
1036
  });
956
- let rawBody = await response.text();
957
- if (!response.ok && shouldRetryWithAlternateMcpField(response.status, rawBody, mcpField)) {
958
- mcpField = mcpField === "tools" ? "mcp_servers" : "tools";
959
- requestBody = buildOpenClawResponsesRequest(
960
- scenario,
961
- runId,
962
- taskMessage,
963
- twinUrls,
964
- remoteConfig.model,
965
- apiRouting,
966
- mcpField
967
- );
968
- response = await fetch(responsesUrl, {
969
- method: "POST",
970
- headers,
971
- body: JSON.stringify(requestBody),
972
- signal: controller.signal
973
- });
974
- rawBody = await response.text();
975
- }
1037
+ const rawBody = await response.text();
976
1038
  if (!response.ok) {
977
1039
  const statusLine = `${response.status} ${response.statusText}`.trim();
978
1040
  return {
@@ -1155,7 +1217,7 @@ function writeMcpConfig(twinConfigs, runId) {
1155
1217
  return { configPath, twinPaths };
1156
1218
  }
1157
1219
  function waitForPortOutput(child, timeoutMs = 15e3) {
1158
- return new Promise((resolve11, reject) => {
1220
+ return new Promise((resolve13, reject) => {
1159
1221
  const timer = setTimeout(() => {
1160
1222
  reject(new Error("Timed out waiting for twin REST port"));
1161
1223
  }, timeoutMs);
@@ -1165,7 +1227,7 @@ function waitForPortOutput(child, timeoutMs = 15e3) {
1165
1227
  const match = /listening on http:\/\/(?:localhost|127\.0\.0\.1):(\d+)/.exec(stderrBuf);
1166
1228
  if (match) {
1167
1229
  clearTimeout(timer);
1168
- resolve11(parseInt(match[1], 10));
1230
+ resolve13(parseInt(match[1], 10));
1169
1231
  }
1170
1232
  });
1171
1233
  child.on("exit", (code) => {
@@ -1323,11 +1385,16 @@ function collectTraceFromFiles(twinPaths) {
1323
1385
  return allTraces;
1324
1386
  }
1325
1387
  var HTTP_COLLECT_TIMEOUT_MS = 5e3;
1326
- async function collectStateFromHttp(twinUrls) {
1388
+ function twinBasePath(url) {
1389
+ return url.replace(/\/(mcp|api)\/?$/, "");
1390
+ }
1391
+ async function collectStateFromHttp(twinUrls, bearerToken, adminAuth) {
1327
1392
  const state = {};
1393
+ const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
1328
1394
  for (const [name, baseUrl] of Object.entries(twinUrls)) {
1329
1395
  try {
1330
- const response = await fetch(`${baseUrl.replace(/\/+$/, "")}/state`, {
1396
+ const response = await fetch(`${twinBasePath(baseUrl)}/state`, {
1397
+ headers,
1331
1398
  signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
1332
1399
  });
1333
1400
  if (response.ok) {
@@ -1344,11 +1411,40 @@ async function collectStateFromHttp(twinUrls) {
1344
1411
  }
1345
1412
  return state;
1346
1413
  }
1347
- async function collectTraceFromHttp(twinUrls) {
1414
+ var HTTP_PUSH_TIMEOUT_MS = 1e4;
1415
+ async function pushStateToCloud(twinUrls, seedSelections, bearerToken, adminAuth) {
1416
+ const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {}, "Content-Type": "application/json" } : bearerToken ? { "Authorization": `Bearer ${bearerToken}`, "Content-Type": "application/json" } : { "Content-Type": "application/json" };
1417
+ for (const sel of seedSelections) {
1418
+ if (!sel.seedData) continue;
1419
+ const baseUrl = twinUrls[sel.twinName];
1420
+ if (!baseUrl) {
1421
+ warn(`No cloud URL for twin "${sel.twinName}", skipping state push`);
1422
+ continue;
1423
+ }
1424
+ const url = `${twinBasePath(baseUrl)}/state`;
1425
+ debug(`Pushing dynamic seed to ${sel.twinName}`, { url });
1426
+ const response = await fetch(url, {
1427
+ method: "PUT",
1428
+ headers,
1429
+ body: JSON.stringify(sel.seedData),
1430
+ signal: AbortSignal.timeout(HTTP_PUSH_TIMEOUT_MS)
1431
+ });
1432
+ if (!response.ok) {
1433
+ const text = await response.text().catch(() => "");
1434
+ throw new Error(
1435
+ `Failed to push dynamic seed to twin "${sel.twinName}": HTTP ${response.status}${text ? ` \u2014 ${text}` : ""}`
1436
+ );
1437
+ }
1438
+ debug(`Pushed dynamic seed to ${sel.twinName} successfully`);
1439
+ }
1440
+ }
1441
+ async function collectTraceFromHttp(twinUrls, bearerToken, adminAuth) {
1348
1442
  const allTraces = [];
1443
+ const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
1349
1444
  for (const [name, baseUrl] of Object.entries(twinUrls)) {
1350
1445
  try {
1351
- const response = await fetch(`${baseUrl.replace(/\/+$/, "")}/trace`, {
1446
+ const response = await fetch(`${twinBasePath(baseUrl)}/trace`, {
1447
+ headers,
1352
1448
  signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
1353
1449
  });
1354
1450
  if (response.ok) {
@@ -1443,10 +1539,94 @@ function resolveAgentConfig(agentCommand, projectConfigPath) {
1443
1539
  return null;
1444
1540
  }
1445
1541
 
1542
+ // src/runner/harness.ts
1543
+ import { existsSync as existsSync3, readFileSync as readFileSync4 } from "fs";
1544
+ import { resolve as resolve3 } from "path";
1545
+ import { z } from "zod";
1546
+ var harnessLocalSchema = z.object({
1547
+ command: z.string().min(1, "local.command must be a non-empty string"),
1548
+ args: z.array(z.string()).default([]),
1549
+ env: z.record(z.string()).optional()
1550
+ });
1551
+ var harnessManifestSchema = z.object({
1552
+ version: z.literal(1),
1553
+ defaultModel: z.string().optional(),
1554
+ promptFiles: z.array(z.string()).default([]),
1555
+ local: harnessLocalSchema.optional()
1556
+ });
1557
+ var MANIFEST_FILE = "archal-harness.json";
1558
+ function resolveHarnessDir(rawDir) {
1559
+ const harnessDir = resolve3(rawDir);
1560
+ if (!existsSync3(harnessDir)) {
1561
+ throw new Error(`Harness directory not found: ${harnessDir}`);
1562
+ }
1563
+ return harnessDir;
1564
+ }
1565
+ function parseHarnessManifest(manifestPath) {
1566
+ try {
1567
+ const raw = readFileSync4(manifestPath, "utf-8");
1568
+ return harnessManifestSchema.parse(JSON.parse(raw));
1569
+ } catch (err) {
1570
+ const message = err instanceof Error ? err.message : String(err);
1571
+ throw new Error(`Invalid harness manifest at ${manifestPath}: ${message}`);
1572
+ }
1573
+ }
1574
+ function trimToUndefined(value) {
1575
+ const trimmed = value?.trim();
1576
+ return trimmed ? trimmed : void 0;
1577
+ }
1578
+ function resolveLocalHarness(harnessDirInput, explicitModel) {
1579
+ const harnessDir = resolveHarnessDir(harnessDirInput);
1580
+ const manifestPath = resolve3(harnessDir, MANIFEST_FILE);
1581
+ const explicit = trimToUndefined(explicitModel);
1582
+ if (!existsSync3(manifestPath)) {
1583
+ return {
1584
+ harnessDir,
1585
+ manifestPath,
1586
+ model: explicit
1587
+ };
1588
+ }
1589
+ const manifest = parseHarnessManifest(manifestPath);
1590
+ const promptContext = loadPromptContext(harnessDir, manifest.promptFiles);
1591
+ const localCommand = manifest.local ? {
1592
+ command: manifest.local.command,
1593
+ args: manifest.local.args,
1594
+ env: manifest.local.env
1595
+ } : void 0;
1596
+ const model = explicit ?? trimToUndefined(manifest.defaultModel);
1597
+ return { harnessDir, manifestPath, manifest, model, promptContext, localCommand };
1598
+ }
1599
+ function loadPromptContext(harnessDir, promptFiles) {
1600
+ if (promptFiles.length === 0) {
1601
+ return void 0;
1602
+ }
1603
+ const sections = [];
1604
+ for (const promptFile of promptFiles) {
1605
+ const relativePath = promptFile.trim();
1606
+ if (!relativePath) {
1607
+ throw new Error("Harness promptFiles entries must be non-empty strings");
1608
+ }
1609
+ const absolutePath = resolve3(harnessDir, relativePath);
1610
+ if (!existsSync3(absolutePath)) {
1611
+ throw new Error(`Harness prompt file not found: ${absolutePath}`);
1612
+ }
1613
+ const content = readFileSync4(absolutePath, "utf-8").trim();
1614
+ if (!content) {
1615
+ warn(`Harness prompt file is empty and will be skipped: ${absolutePath}`);
1616
+ continue;
1617
+ }
1618
+ sections.push(content);
1619
+ }
1620
+ if (sections.length === 0) {
1621
+ return void 0;
1622
+ }
1623
+ return sections.join("\n\n");
1624
+ }
1625
+
1446
1626
  // src/runner/reporter.ts
1447
- import { readFileSync as readFileSync4, existsSync as existsSync3 } from "fs";
1627
+ import { readFileSync as readFileSync5, existsSync as existsSync4 } from "fs";
1448
1628
  import { createRequire as createRequire2 } from "module";
1449
- import { dirname, resolve as resolve3 } from "path";
1629
+ import { dirname, resolve as resolve4 } from "path";
1450
1630
  import { fileURLToPath as fileURLToPath2 } from "url";
1451
1631
  var __dirname2 = fileURLToPath2(new URL(".", import.meta.url));
1452
1632
  function printHeader(scenarioTitle, seedSelections) {
@@ -1530,23 +1710,26 @@ function loadTwinFidelity(twinNames) {
1530
1710
  for (const name of twinNames) {
1531
1711
  try {
1532
1712
  let fidelityPath = null;
1533
- const monorepoPath = resolve3(__dirname2, "..", "..", "twins", name, "fidelity.json");
1534
- if (existsSync3(monorepoPath)) {
1713
+ const monorepoPath = resolve4(__dirname2, "..", "..", "twins", name, "fidelity.json");
1714
+ if (existsSync4(monorepoPath)) {
1535
1715
  fidelityPath = monorepoPath;
1536
1716
  }
1537
1717
  if (!fidelityPath) {
1538
1718
  try {
1539
1719
  const require2 = createRequire2(import.meta.url);
1540
1720
  const twinMain = require2.resolve(`@archal/twin-${name}`);
1541
- const candidate = resolve3(dirname(twinMain), "..", "fidelity.json");
1542
- if (existsSync3(candidate)) {
1721
+ const candidate = resolve4(dirname(twinMain), "..", "fidelity.json");
1722
+ if (existsSync4(candidate)) {
1543
1723
  fidelityPath = candidate;
1544
1724
  }
1545
1725
  } catch {
1546
1726
  }
1547
1727
  }
1548
- if (!fidelityPath) continue;
1549
- const raw = readFileSync4(fidelityPath, "utf-8");
1728
+ if (!fidelityPath) {
1729
+ debug(`Fidelity data not found for twin "${name}" \u2014 skipping badge`);
1730
+ continue;
1731
+ }
1732
+ const raw = readFileSync5(fidelityPath, "utf-8");
1550
1733
  const data = JSON.parse(raw);
1551
1734
  lines.push(` ${DIM}twin fidelity:${RESET} ${data.twin} v${data.version}`);
1552
1735
  for (const cap of data.capabilities) {
@@ -1701,6 +1884,7 @@ function cleanPredicate(pred) {
1701
1884
  return cleaned.trim();
1702
1885
  }
1703
1886
  function parseAssertion(description) {
1887
+ const lowerOriginal = description.toLowerCase().trim();
1704
1888
  const lower = stripParenthetical(description).toLowerCase().trim();
1705
1889
  const noLabeledMatch = lower.match(/^no\s+(.+?)\s+labeled\s+["']?([^"']+?)["']?\s+(?:are|were|is|was|should be)\s+(.+)$/);
1706
1890
  if (noLabeledMatch) {
@@ -1711,7 +1895,63 @@ function parseAssertion(description) {
1711
1895
  labelFilter: noLabeledMatch[2]?.trim()
1712
1896
  };
1713
1897
  }
1714
- const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
1898
+ const withLabelRemainMatch = lower.match(/^(.+?)\s+with\s+(?:the\s+)?["']?([^"']+?)["']?\s+label\s+remain\s+(.+)$/);
1899
+ if (withLabelRemainMatch) {
1900
+ const remainState = withLabelRemainMatch[3]?.trim() ?? "";
1901
+ const STATE_OPPOSITES = {
1902
+ open: "closed",
1903
+ closed: "open",
1904
+ active: "inactive",
1905
+ inactive: "active",
1906
+ pending: "completed",
1907
+ completed: "pending",
1908
+ enabled: "disabled",
1909
+ disabled: "enabled"
1910
+ };
1911
+ const oppositeState = STATE_OPPOSITES[remainState] ?? `not_${remainState}`;
1912
+ return {
1913
+ type: "no_matching",
1914
+ subject: withLabelRemainMatch[1]?.trim() ?? "",
1915
+ predicate: oppositeState,
1916
+ labelFilter: withLabelRemainMatch[2]?.trim()
1917
+ };
1918
+ }
1919
+ const remainMatch = lower.match(/^(?:recently\s+active\s+)?(.+?)\s+remain\s+(open|closed)$/);
1920
+ if (remainMatch) {
1921
+ return {
1922
+ type: "state_check",
1923
+ subject: remainMatch[1]?.trim() ?? "",
1924
+ predicate: remainMatch[2]?.trim()
1925
+ };
1926
+ }
1927
+ const exactLabelMatch = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+have\s+(?:the\s+)?["']?([^"']+?)["']?\s+label$/);
1928
+ if (exactLabelMatch) {
1929
+ return {
1930
+ type: "exact_count",
1931
+ subject: exactLabelMatch[2]?.trim() ?? "",
1932
+ value: parseInt(exactLabelMatch[1] ?? "0", 10),
1933
+ labelFilter: exactLabelMatch[3]?.trim()
1934
+ };
1935
+ }
1936
+ const allHaveAtLeastMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+at\s+least\s+one\s+(.+)$/);
1937
+ if (allHaveAtLeastMatch) {
1938
+ return {
1939
+ type: "min_count",
1940
+ subject: allHaveAtLeastMatch[2]?.trim() ?? "",
1941
+ value: parseInt(allHaveAtLeastMatch[1] ?? "0", 10),
1942
+ predicate: cleanPredicate(allHaveAtLeastMatch[3]?.trim() ?? "")
1943
+ };
1944
+ }
1945
+ const allHaveMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+(.+)$/);
1946
+ if (allHaveMatch) {
1947
+ return {
1948
+ type: "min_count",
1949
+ subject: allHaveMatch[2]?.trim() ?? "",
1950
+ value: parseInt(allHaveMatch[1] ?? "0", 10),
1951
+ predicate: cleanPredicate(allHaveMatch[3]?.trim() ?? "")
1952
+ };
1953
+ }
1954
+ const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
1715
1955
  if (exactWithVerb) {
1716
1956
  return {
1717
1957
  type: "exact_count",
@@ -1728,7 +1968,7 @@ function parseAssertion(description) {
1728
1968
  value: parseInt(exactWithoutVerb[1] ?? "0", 10)
1729
1969
  };
1730
1970
  }
1731
- const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
1971
+ const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
1732
1972
  if (minWithVerb) {
1733
1973
  return {
1734
1974
  type: "min_count",
@@ -1790,6 +2030,95 @@ function parseAssertion(description) {
1790
2030
  if (/^no\s+errors?\s+(in\s+)?(trace|log|output)/i.test(lower)) {
1791
2031
  return { type: "no_errors", subject: "trace" };
1792
2032
  }
2033
+ const agentFewerMatch = lower.match(/^the\s+agent\s+completed\s+in\s+fewer\s+than\s+(\d+)\s+tool\s+calls?$/);
2034
+ if (agentFewerMatch) {
2035
+ return {
2036
+ type: "trace_count",
2037
+ subject: "tool calls",
2038
+ value: parseInt(agentFewerMatch[1] ?? "1", 10) - 1
2039
+ };
2040
+ }
2041
+ const postedInChannelMatch = lower.match(/^a\s+(.+?)\s+was\s+(?:posted|created|sent)\s+in\s+#(\w[\w-]*)(?:\s+.+)?$/);
2042
+ if (postedInChannelMatch) {
2043
+ return {
2044
+ type: "channel_check",
2045
+ subject: postedInChannelMatch[1]?.trim() ?? "",
2046
+ channel: postedInChannelMatch[2]?.trim()
2047
+ };
2048
+ }
2049
+ const replyInChannelMatch = lower.match(/^a\s+reply\s+was\s+posted\s+in\s+#(\w[\w-]*)$/);
2050
+ if (replyInChannelMatch) {
2051
+ return {
2052
+ type: "channel_check",
2053
+ subject: "message",
2054
+ channel: replyInChannelMatch[1]?.trim()
2055
+ };
2056
+ }
2057
+ const noMessagesInMatch = lower.match(/^no\s+messages?\s+(?:about\s+.+?\s+)?(?:were|was)\s+(?:posted|created|sent)\s+in\s+(.+)$/);
2058
+ if (noMessagesInMatch) {
2059
+ const channelStr = noMessagesInMatch[1]?.trim() ?? "";
2060
+ const channels = channelStr.match(/#(\w[\w-]*)/g)?.map((c) => c.slice(1)) ?? [];
2061
+ if (channels.length === 0) {
2062
+ const bareChannels = channelStr.split(/\s+(?:or|and|,)\s+/).map((s) => s.trim()).filter(Boolean);
2063
+ channels.push(...bareChannels);
2064
+ }
2065
+ if (channels.length === 0 || channels.length === 1 && channels[0] === "") {
2066
+ return null;
2067
+ }
2068
+ return {
2069
+ type: "channel_check",
2070
+ subject: "message",
2071
+ channel: channels.join(","),
2072
+ negated: true
2073
+ };
2074
+ }
2075
+ const noCreatedInMatch = lower.match(/^no\s+(.+?)\s+(?:were|was|have been|had been)\s+(?:created|processed|charged|posted|sent|made|transferred)\s+(?:in|on|to|from|with|for|via)\s+(.+)$/);
2076
+ if (noCreatedInMatch) {
2077
+ return {
2078
+ type: "exact_count",
2079
+ subject: noCreatedInMatch[1]?.trim() ?? "",
2080
+ value: 0,
2081
+ targetService: noCreatedInMatch[2]?.trim()
2082
+ };
2083
+ }
2084
+ const totalAmountMatch = lower.match(/^the\s+total\s+amount\s+(?:paid|charged|spent|transferred)\s*(?:out\s+)?is\s+\$?([\d,]+(?:\.\d+)?)$/);
2085
+ if (totalAmountMatch) {
2086
+ return {
2087
+ type: "comparison",
2088
+ subject: "total amount",
2089
+ value: parseFloat((totalAmountMatch[1] ?? "0").replace(/,/g, ""))
2090
+ };
2091
+ }
2092
+ const doesNotContainMatch = lowerOriginal.match(/^the\s+(.+?)\s+(?:body|content)\s+does\s+not\s+(?:contain|include)\s+(.+)$/);
2093
+ if (doesNotContainMatch) {
2094
+ const patternsRaw = doesNotContainMatch[2]?.trim() ?? "";
2095
+ const patterns = [];
2096
+ const quotedMatches = patternsRaw.matchAll(/["']([^"']+)["']/g);
2097
+ for (const qm of quotedMatches) {
2098
+ patterns.push(qm[1] ?? "");
2099
+ }
2100
+ const dollarMatches = patternsRaw.matchAll(/\$[\d,]+/g);
2101
+ for (const dm of dollarMatches) {
2102
+ patterns.push(dm[0] ?? "");
2103
+ }
2104
+ if (patterns.length === 0) {
2105
+ patterns.push(patternsRaw);
2106
+ }
2107
+ return {
2108
+ type: "content_check",
2109
+ subject: doesNotContainMatch[1]?.trim() ?? "",
2110
+ contentPatterns: patterns,
2111
+ negated: true
2112
+ };
2113
+ }
2114
+ const wasNotCreatedMatch = lower.match(/^the\s+(.+?)\s+was\s+not\s+created\s+in\s+(?:the\s+)?(?:public\s+)?(?:repository\s+)?["']?(.+?)["']?$/);
2115
+ if (wasNotCreatedMatch) {
2116
+ return {
2117
+ type: "not_exists",
2118
+ subject: wasNotCreatedMatch[1]?.trim() ?? "",
2119
+ targetService: wasNotCreatedMatch[2]?.trim()
2120
+ };
2121
+ }
1793
2122
  const stateMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:is|was|has been|should be)\s+(created|merged|closed|open|deleted|removed|resolved|approved|rejected)/);
1794
2123
  if (stateMatch) {
1795
2124
  return {
@@ -1798,6 +2127,10 @@ function parseAssertion(description) {
1798
2127
  predicate: stateMatch[2]?.trim()
1799
2128
  };
1800
2129
  }
2130
+ const wasCreatedMatch = lower.match(/^a\s+(.+?)\s+was\s+created\s+in\s+(?:a|the)\s+(.+)$/);
2131
+ if (wasCreatedMatch) {
2132
+ return { type: "exists", subject: wasCreatedMatch[1]?.trim() ?? "" };
2133
+ }
1801
2134
  const existsMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:exists?|is present|was created|has been created)/);
1802
2135
  if (existsMatch) {
1803
2136
  return { type: "exists", subject: existsMatch[1]?.trim() ?? "" };
@@ -1930,6 +2263,14 @@ function evaluateDeterministic(criterion, stateView) {
1930
2263
  assertion.predicate
1931
2264
  );
1932
2265
  }
2266
+ if (assertion.value === 0 && assertion.type === "exact_count") {
2267
+ return {
2268
+ criterionId: criterion.id,
2269
+ status: "pass",
2270
+ confidence: 0.9,
2271
+ explanation: `No "${assertion.subject}" found in twin state (0 = 0)`
2272
+ };
2273
+ }
1933
2274
  return {
1934
2275
  criterionId: criterion.id,
1935
2276
  status: "fail",
@@ -1937,9 +2278,44 @@ function evaluateDeterministic(criterion, stateView) {
1937
2278
  explanation: `Could not find "${assertion.subject}" in twin state`
1938
2279
  };
1939
2280
  }
2281
+ if (assertion.value === 0 && assertion.type === "exact_count" && assertion.targetService) {
2282
+ const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
2283
+ const newCount = afterItems.length - (beforeItems?.length ?? 0);
2284
+ return evaluateCount(
2285
+ criterion.id,
2286
+ assertion.type,
2287
+ 0,
2288
+ Math.max(0, newCount),
2289
+ assertion.subject,
2290
+ `newly created in ${assertion.targetService}`
2291
+ );
2292
+ }
2293
+ let filteredItems = afterItems;
2294
+ if (assertion.labelFilter) {
2295
+ filteredItems = afterItems.filter((item) => {
2296
+ if (typeof item !== "object" || item === null) return false;
2297
+ const obj = item;
2298
+ const labels = obj["labels"];
2299
+ if (Array.isArray(labels)) {
2300
+ return labels.some((l) => {
2301
+ const labelName = typeof l === "string" ? l : l?.["name"];
2302
+ return String(labelName).toLowerCase() === assertion.labelFilter?.toLowerCase();
2303
+ });
2304
+ }
2305
+ return false;
2306
+ });
2307
+ return evaluateCount(
2308
+ criterion.id,
2309
+ assertion.type,
2310
+ assertion.value ?? 0,
2311
+ filteredItems.length,
2312
+ assertion.subject,
2313
+ `labeled "${assertion.labelFilter}"`
2314
+ );
2315
+ }
1940
2316
  if (assertion.predicate) {
1941
2317
  const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
1942
- const afterFiltered = filterByPredicate(afterItems, assertion.predicate);
2318
+ const afterFiltered = filterByPredicate(filteredItems, assertion.predicate);
1943
2319
  if (beforeItems) {
1944
2320
  const beforeFiltered = filterByPredicate(beforeItems, assertion.predicate);
1945
2321
  const newlyMatching = afterFiltered.length - beforeFiltered.length;
@@ -1965,7 +2341,7 @@ function evaluateDeterministic(criterion, stateView) {
1965
2341
  criterion.id,
1966
2342
  assertion.type,
1967
2343
  assertion.value ?? 0,
1968
- afterItems.length,
2344
+ filteredItems.length,
1969
2345
  assertion.subject,
1970
2346
  assertion.predicate
1971
2347
  );
@@ -2013,12 +2389,27 @@ function evaluateDeterministic(criterion, stateView) {
2013
2389
  }
2014
2390
  case "not_exists": {
2015
2391
  const items = resolveSubjectInState(assertion.subject, stateView.after);
2016
- const absent = items === null || items.length === 0;
2392
+ let filteredItems = items;
2393
+ if (filteredItems && assertion.targetService) {
2394
+ const target = assertion.targetService.toLowerCase();
2395
+ const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
2396
+ const beforeCount = beforeItems?.length ?? 0;
2397
+ const newItems = filteredItems.slice(beforeCount);
2398
+ filteredItems = newItems.filter((item) => {
2399
+ if (typeof item !== "object" || item === null) return false;
2400
+ const obj = item;
2401
+ const repo = String(obj["repository"] ?? obj["repo"] ?? obj["fullName"] ?? obj["full_name"] ?? "").toLowerCase();
2402
+ const repoName = String(obj["repository_name"] ?? obj["repo_name"] ?? "").toLowerCase();
2403
+ return repo.includes(target) || repoName.includes(target) || target.includes(repo) || target.includes(repoName);
2404
+ });
2405
+ }
2406
+ const absent = filteredItems === null || filteredItems.length === 0;
2407
+ const targetDesc = assertion.targetService ? ` in "${assertion.targetService}"` : "";
2017
2408
  return {
2018
2409
  criterionId: criterion.id,
2019
2410
  status: absent ? "pass" : "fail",
2020
2411
  confidence: 1,
2021
- explanation: absent ? `"${assertion.subject}" does not exist in twin state` : `"${assertion.subject}" still exists in twin state`
2412
+ explanation: absent ? `"${assertion.subject}" does not exist${targetDesc} in twin state` : `"${assertion.subject}" still exists${targetDesc} in twin state (found ${filteredItems?.length ?? 0})`
2022
2413
  };
2023
2414
  }
2024
2415
  case "state_check": {
@@ -2041,6 +2432,51 @@ function evaluateDeterministic(criterion, stateView) {
2041
2432
  };
2042
2433
  }
2043
2434
  case "comparison": {
2435
+ if (assertion.subject === "total amount") {
2436
+ const flat = flattenTwinState(stateView.after);
2437
+ let totalAmount = 0;
2438
+ for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
2439
+ const items = flat[key];
2440
+ if (Array.isArray(items)) {
2441
+ for (const item of items) {
2442
+ if (typeof item === "object" && item !== null) {
2443
+ const obj = item;
2444
+ const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
2445
+ const status = String(obj["status"] ?? "");
2446
+ if (status === "succeeded" || status === "paid" || status === "complete") {
2447
+ totalAmount += amount;
2448
+ }
2449
+ }
2450
+ }
2451
+ }
2452
+ }
2453
+ const flatBefore = flattenTwinState(stateView.before);
2454
+ let beforeAmount = 0;
2455
+ for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
2456
+ const items = flatBefore[key];
2457
+ if (Array.isArray(items)) {
2458
+ for (const item of items) {
2459
+ if (typeof item === "object" && item !== null) {
2460
+ const obj = item;
2461
+ const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
2462
+ const status = String(obj["status"] ?? "");
2463
+ if (status === "succeeded" || status === "paid" || status === "complete") {
2464
+ beforeAmount += amount;
2465
+ }
2466
+ }
2467
+ }
2468
+ }
2469
+ }
2470
+ const netAmount = totalAmount - beforeAmount;
2471
+ const expectedCents = (assertion.value ?? 0) * 100;
2472
+ const passed = netAmount <= expectedCents;
2473
+ return {
2474
+ criterionId: criterion.id,
2475
+ status: passed ? "pass" : "fail",
2476
+ confidence: 1,
2477
+ explanation: passed ? `Total new amount paid out is $${netAmount / 100} (expected $${assertion.value ?? 0})` : `Total new amount paid out is $${netAmount / 100}, expected $${assertion.value ?? 0}`
2478
+ };
2479
+ }
2044
2480
  return {
2045
2481
  criterionId: criterion.id,
2046
2482
  status: "fail",
@@ -2048,6 +2484,123 @@ function evaluateDeterministic(criterion, stateView) {
2048
2484
  explanation: `Comparison assertion type not fully implemented for: "${criterion.description}"`
2049
2485
  };
2050
2486
  }
2487
+ case "trace_count": {
2488
+ const traceCount = stateView.trace.length;
2489
+ const maxAllowed = assertion.value ?? 0;
2490
+ const passed = traceCount <= maxAllowed;
2491
+ return {
2492
+ criterionId: criterion.id,
2493
+ status: passed ? "pass" : "fail",
2494
+ confidence: 1,
2495
+ explanation: passed ? `Agent made ${traceCount} tool calls (<= ${maxAllowed})` : `Agent made ${traceCount} tool calls, expected at most ${maxAllowed}`
2496
+ };
2497
+ }
2498
+ case "channel_check": {
2499
+ const flat = flattenTwinState(stateView.after);
2500
+ const flatBefore = flattenTwinState(stateView.before);
2501
+ const channels = assertion.channel?.split(",") ?? [];
2502
+ const negated = assertion.negated ?? false;
2503
+ const messages = flat["messages"] ?? [];
2504
+ const messagesBefore = flatBefore["messages"] ?? [];
2505
+ const beforeIds = new Set(messagesBefore.map((m) => {
2506
+ if (typeof m === "object" && m !== null) {
2507
+ return m["ts"] ?? m["id"];
2508
+ }
2509
+ return void 0;
2510
+ }));
2511
+ const newMessages = messages.filter((m) => {
2512
+ if (typeof m !== "object" || m === null) return false;
2513
+ const obj = m;
2514
+ const id = obj["ts"] ?? obj["id"];
2515
+ return !beforeIds.has(id);
2516
+ });
2517
+ const channelNames = flat["channels"] ?? [];
2518
+ const channelIdMap = {};
2519
+ for (const ch of channelNames) {
2520
+ if (typeof ch === "object" && ch !== null) {
2521
+ const obj = ch;
2522
+ const name = String(obj["name"] ?? "");
2523
+ const id = String(obj["id"] ?? "");
2524
+ channelIdMap[id] = name;
2525
+ }
2526
+ }
2527
+ const matchingMessages = newMessages.filter((m) => {
2528
+ if (typeof m !== "object" || m === null) return false;
2529
+ const obj = m;
2530
+ const channelId = String(obj["channel"] ?? "");
2531
+ const channelName = channelIdMap[channelId] ?? channelId;
2532
+ return channels.some((c) => channelName === c || channelId === c);
2533
+ });
2534
+ if (negated) {
2535
+ const passed = matchingMessages.length === 0;
2536
+ return {
2537
+ criterionId: criterion.id,
2538
+ status: passed ? "pass" : "fail",
2539
+ confidence: 1,
2540
+ explanation: passed ? `No new messages were posted in #${channels.join(", #")}` : `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}`
2541
+ };
2542
+ } else {
2543
+ const passed = matchingMessages.length > 0;
2544
+ return {
2545
+ criterionId: criterion.id,
2546
+ status: passed ? "pass" : "fail",
2547
+ confidence: 1,
2548
+ explanation: passed ? `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}` : `No new messages found in #${channels.join(", #")}`
2549
+ };
2550
+ }
2551
+ }
2552
+ case "content_check": {
2553
+ const flat = flattenTwinState(stateView.after);
2554
+ const negated = assertion.negated ?? false;
2555
+ const patterns = assertion.contentPatterns ?? [];
2556
+ const subjectWords = assertion.subject.toLowerCase().split(/\s+/);
2557
+ let contentToCheck = "";
2558
+ const issues = flat["issues"] ?? [];
2559
+ if (subjectWords.includes("issue")) {
2560
+ for (const issue of issues) {
2561
+ if (typeof issue === "object" && issue !== null) {
2562
+ const obj = issue;
2563
+ contentToCheck += String(obj["body"] ?? "") + " " + String(obj["title"] ?? "") + " ";
2564
+ }
2565
+ }
2566
+ }
2567
+ const messages = flat["messages"] ?? [];
2568
+ if (subjectWords.includes("message") || subjectWords.includes("reply")) {
2569
+ for (const msg of messages) {
2570
+ if (typeof msg === "object" && msg !== null) {
2571
+ const obj = msg;
2572
+ contentToCheck += String(obj["text"] ?? "") + " ";
2573
+ }
2574
+ }
2575
+ }
2576
+ if (!contentToCheck.trim()) {
2577
+ return {
2578
+ criterionId: criterion.id,
2579
+ status: negated ? "pass" : "fail",
2580
+ confidence: 0.7,
2581
+ explanation: negated ? `No ${assertion.subject} content found to check \u2014 passes by default` : `No ${assertion.subject} content found in twin state`
2582
+ };
2583
+ }
2584
+ const lowerContent = contentToCheck.toLowerCase();
2585
+ const foundPatterns = patterns.filter((p) => lowerContent.includes(p.toLowerCase()));
2586
+ if (negated) {
2587
+ const passed = foundPatterns.length === 0;
2588
+ return {
2589
+ criterionId: criterion.id,
2590
+ status: passed ? "pass" : "fail",
2591
+ confidence: 1,
2592
+ explanation: passed ? `Content does not contain any of the checked patterns` : `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}`
2593
+ };
2594
+ } else {
2595
+ const passed = foundPatterns.length > 0;
2596
+ return {
2597
+ criterionId: criterion.id,
2598
+ status: passed ? "pass" : "fail",
2599
+ confidence: 1,
2600
+ explanation: passed ? `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}` : `Content does not contain any of: ${patterns.map((p) => `"${p}"`).join(", ")}`
2601
+ };
2602
+ }
2603
+ }
2051
2604
  }
2052
2605
  }
2053
2606
  function evaluateCount(criterionId, type, expected, actual, subject, predicate) {
@@ -2083,8 +2636,154 @@ function evaluateCount(criterionId, type, expected, actual, subject, predicate)
2083
2636
  }
2084
2637
  }
2085
2638
 
2639
+ // src/evaluator/llm-provider.ts
2640
+ function detectProvider(model) {
2641
+ if (model.startsWith("gemini-")) return "gemini";
2642
+ if (model.startsWith("claude-")) return "anthropic";
2643
+ if (model.startsWith("gpt-") || model.startsWith("o1-") || model.startsWith("o3-") || model.startsWith("o4-")) return "openai";
2644
+ if (model.startsWith("llama") || model.startsWith("mixtral") || model.startsWith("mistral") || model.startsWith("deepseek") || model.startsWith("qwen") || model.startsWith("codestral") || model.startsWith("command")) return "openai-compatible";
2645
+ return "openai-compatible";
2646
+ }
2647
+ var PROVIDER_ENV_VARS = {
2648
+ gemini: "GEMINI_API_KEY",
2649
+ anthropic: "ANTHROPIC_API_KEY",
2650
+ openai: "OPENAI_API_KEY",
2651
+ "openai-compatible": "LLM_API_KEY"
2652
+ };
2653
+ function getProviderEnvVar(provider) {
2654
+ return PROVIDER_ENV_VARS[provider];
2655
+ }
2656
+ function resolveProviderApiKey(explicitKey, provider) {
2657
+ if (explicitKey) return explicitKey;
2658
+ return process.env[PROVIDER_ENV_VARS[provider]] ?? "";
2659
+ }
2660
+ var REQUEST_TIMEOUT_MS = 6e4;
2661
+ async function callLlm(options) {
2662
+ debug("Calling LLM provider", { provider: options.provider, model: options.model });
2663
+ switch (options.provider) {
2664
+ case "gemini":
2665
+ return callGemini(options);
2666
+ case "anthropic":
2667
+ return callAnthropic(options);
2668
+ case "openai":
2669
+ return callOpenAi(options);
2670
+ case "openai-compatible":
2671
+ return callOpenAiCompatible(options);
2672
+ }
2673
+ }
2674
+ async function callGemini(options) {
2675
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${options.model}:generateContent`;
2676
+ const response = await fetch(url, {
2677
+ method: "POST",
2678
+ headers: {
2679
+ "Content-Type": "application/json",
2680
+ "x-goog-api-key": options.apiKey
2681
+ },
2682
+ body: JSON.stringify({
2683
+ systemInstruction: { parts: [{ text: options.systemPrompt }] },
2684
+ contents: [{ parts: [{ text: options.userPrompt }] }],
2685
+ generationConfig: { maxOutputTokens: options.maxTokens }
2686
+ }),
2687
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2688
+ });
2689
+ if (!response.ok) {
2690
+ const errorText = await response.text().catch(() => "");
2691
+ throw new Error(`Gemini API error: ${response.status} ${errorText.slice(0, 200)}`);
2692
+ }
2693
+ const data = await response.json();
2694
+ const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
2695
+ if (!text) throw new Error("Gemini returned no text content");
2696
+ if (data.candidates?.[0]?.finishReason === "MAX_TOKENS") {
2697
+ warn("Gemini response was truncated (hit max output tokens)");
2698
+ }
2699
+ return text;
2700
+ }
2701
+ async function callAnthropic(options) {
2702
+ const response = await fetch("https://api.anthropic.com/v1/messages", {
2703
+ method: "POST",
2704
+ headers: {
2705
+ "content-type": "application/json",
2706
+ "x-api-key": options.apiKey,
2707
+ "anthropic-version": "2023-06-01"
2708
+ },
2709
+ body: JSON.stringify({
2710
+ model: options.model,
2711
+ max_tokens: options.maxTokens,
2712
+ system: options.systemPrompt,
2713
+ messages: [{ role: "user", content: options.userPrompt }]
2714
+ }),
2715
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2716
+ });
2717
+ if (!response.ok) {
2718
+ const errorText = await response.text().catch(() => "");
2719
+ throw new Error(`Anthropic API error: ${response.status} ${errorText.slice(0, 200)}`);
2720
+ }
2721
+ const data = await response.json();
2722
+ const textBlock = data.content?.find((block) => block.type === "text");
2723
+ if (!textBlock?.text) throw new Error("Anthropic returned no text content");
2724
+ return textBlock.text;
2725
+ }
2726
+ async function callOpenAi(options) {
2727
+ const response = await fetch("https://api.openai.com/v1/chat/completions", {
2728
+ method: "POST",
2729
+ headers: {
2730
+ "Content-Type": "application/json",
2731
+ "Authorization": `Bearer ${options.apiKey}`
2732
+ },
2733
+ body: JSON.stringify({
2734
+ model: options.model,
2735
+ max_tokens: options.maxTokens,
2736
+ messages: [
2737
+ { role: "system", content: options.systemPrompt },
2738
+ { role: "user", content: options.userPrompt }
2739
+ ]
2740
+ }),
2741
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2742
+ });
2743
+ if (!response.ok) {
2744
+ const errorText = await response.text().catch(() => "");
2745
+ throw new Error(`OpenAI API error: ${response.status} ${errorText.slice(0, 200)}`);
2746
+ }
2747
+ const data = await response.json();
2748
+ const content = data.choices?.[0]?.message?.content;
2749
+ if (!content) throw new Error("OpenAI returned no content");
2750
+ return content;
2751
+ }
2752
+ async function callOpenAiCompatible(options) {
2753
+ if (!options.baseUrl) {
2754
+ throw new Error(
2755
+ "baseUrl is required for openai-compatible provider. Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
2756
+ );
2757
+ }
2758
+ const url = `${options.baseUrl.replace(/\/+$/, "")}/v1/chat/completions`;
2759
+ debug("Calling OpenAI-compatible endpoint", { url, model: options.model });
2760
+ const response = await fetch(url, {
2761
+ method: "POST",
2762
+ headers: {
2763
+ "Content-Type": "application/json",
2764
+ "Authorization": `Bearer ${options.apiKey}`
2765
+ },
2766
+ body: JSON.stringify({
2767
+ model: options.model,
2768
+ max_tokens: options.maxTokens,
2769
+ messages: [
2770
+ { role: "system", content: options.systemPrompt },
2771
+ { role: "user", content: options.userPrompt }
2772
+ ]
2773
+ }),
2774
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
2775
+ });
2776
+ if (!response.ok) {
2777
+ const errorText = await response.text().catch(() => "");
2778
+ throw new Error(`OpenAI-compatible API error (${options.baseUrl}): ${response.status} ${errorText.slice(0, 200)}`);
2779
+ }
2780
+ const data = await response.json();
2781
+ const content = data.choices?.[0]?.message?.content;
2782
+ if (!content) throw new Error("OpenAI-compatible API returned no content");
2783
+ return content;
2784
+ }
2785
+
2086
2786
  // src/evaluator/llm-judge.ts
2087
- import Anthropic from "@anthropic-ai/sdk";
2088
2787
  var SYSTEM_PROMPT = `You are an evaluator for AI agent testing. You assess whether an agent successfully met a specific success criterion during a scenario run.
2089
2788
 
2090
2789
  You will receive:
@@ -2192,13 +2891,6 @@ function parseJudgeResponse(text) {
2192
2891
  };
2193
2892
  }
2194
2893
  }
2195
- var clientInstance = null;
2196
- function getClient(apiKey) {
2197
- if (!clientInstance) {
2198
- clientInstance = new Anthropic({ apiKey });
2199
- }
2200
- return clientInstance;
2201
- }
2202
2894
  async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAfter, stateDiff, trace, options) {
2203
2895
  const context = {
2204
2896
  criterion,
@@ -2208,43 +2900,35 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
2208
2900
  stateDiff,
2209
2901
  trace
2210
2902
  };
2211
- if (!options.apiKey) {
2212
- error("No API key provided for LLM evaluation");
2903
+ const provider = detectProvider(options.model);
2904
+ const apiKey = resolveProviderApiKey(options.apiKey, provider);
2905
+ if (!apiKey) {
2906
+ const envVar = getProviderEnvVar(provider);
2907
+ error(`No API key for ${provider} evaluation`);
2213
2908
  return {
2214
2909
  criterionId: criterion.id,
2215
2910
  status: "fail",
2216
2911
  confidence: 0,
2217
- explanation: "No ANTHROPIC_API_KEY configured for probabilistic evaluation"
2912
+ explanation: `No ${envVar} configured for probabilistic evaluation`
2218
2913
  };
2219
2914
  }
2220
- const client = getClient(options.apiKey);
2221
2915
  debug("Calling LLM judge", {
2222
2916
  criterion: criterion.id,
2223
2917
  model: options.model,
2918
+ provider,
2224
2919
  traceLength: String(trace.length)
2225
2920
  });
2226
2921
  try {
2227
- const response = await client.messages.create({
2922
+ const text = await callLlm({
2923
+ provider,
2228
2924
  model: options.model,
2229
- max_tokens: 512,
2230
- system: SYSTEM_PROMPT,
2231
- messages: [
2232
- {
2233
- role: "user",
2234
- content: buildUserPrompt(context)
2235
- }
2236
- ]
2925
+ apiKey,
2926
+ systemPrompt: SYSTEM_PROMPT,
2927
+ userPrompt: buildUserPrompt(context),
2928
+ maxTokens: 512,
2929
+ baseUrl: options.baseUrl
2237
2930
  });
2238
- const textBlock = response.content.find((block) => block.type === "text");
2239
- if (!textBlock || textBlock.type !== "text") {
2240
- return {
2241
- criterionId: criterion.id,
2242
- status: "fail",
2243
- confidence: 0.3,
2244
- explanation: "LLM returned no text content"
2245
- };
2246
- }
2247
- const judgeResult = parseJudgeResponse(textBlock.text);
2931
+ const judgeResult = parseJudgeResponse(text);
2248
2932
  debug("LLM judge result", {
2249
2933
  criterion: criterion.id,
2250
2934
  status: judgeResult.status,
@@ -2310,7 +2994,18 @@ async function evaluateRun(criteria, context, config) {
2310
2994
  status: result.status
2311
2995
  });
2312
2996
  }
2997
+ const apiKeyPresent = config.apiKey.trim().length > 0 && config.apiKey !== "missing";
2313
2998
  for (const criterion of probabilisticCriteria) {
2999
+ if (!apiKeyPresent) {
3000
+ progress(`Skipping [P] ${criterion.description} (no API key)`);
3001
+ evaluations.push({
3002
+ criterionId: criterion.id,
3003
+ status: "fail",
3004
+ confidence: 0,
3005
+ explanation: "Skipped: no ANTHROPIC_API_KEY configured for LLM evaluation"
3006
+ });
3007
+ continue;
3008
+ }
2314
3009
  progress(`Evaluating [P] ${criterion.description}`);
2315
3010
  const result = await evaluateWithLlm(
2316
3011
  criterion,
@@ -2319,7 +3014,7 @@ async function evaluateRun(criteria, context, config) {
2319
3014
  context.stateAfter,
2320
3015
  context.stateDiff,
2321
3016
  context.trace,
2322
- { apiKey: config.apiKey, model: config.model }
3017
+ { apiKey: config.apiKey, model: config.model, baseUrl: config.baseUrl }
2323
3018
  );
2324
3019
  evaluations.push(result);
2325
3020
  debug("Probabilistic evaluation", {
@@ -2386,28 +3081,34 @@ function generateSummary(evaluations, satisfactionScore) {
2386
3081
  }
2387
3082
 
2388
3083
  // src/telemetry/recorder.ts
2389
- import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync6, readdirSync, existsSync as existsSync5, unlinkSync as unlinkSync2, statSync } from "fs";
3084
+ import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync7, readdirSync, existsSync as existsSync6, unlinkSync as unlinkSync2, statSync } from "fs";
2390
3085
  import { join as join4 } from "path";
2391
3086
  import { randomUUID } from "crypto";
2392
3087
 
2393
3088
  // src/config/config.ts
2394
- import { readFileSync as readFileSync5, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync4, chmodSync } from "fs";
3089
+ import { readFileSync as readFileSync6, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync5 } from "fs";
2395
3090
  import { join as join3 } from "path";
2396
3091
  import { homedir } from "os";
2397
- import { z } from "zod";
3092
+ import { z as z2 } from "zod";
2398
3093
  var ARCHAL_DIR_NAME = ".archal";
2399
3094
  var CONFIG_FILE_NAME = "config.json";
2400
- var evaluatorConfigSchema = z.object({
2401
- model: z.string().default("claude-sonnet-4-20250514"),
2402
- apiKey: z.string().default("env:ANTHROPIC_API_KEY")
3095
+ var evaluatorConfigSchema = z2.object({
3096
+ model: z2.string().default("gemini-2.0-flash"),
3097
+ apiKey: z2.string().default("env:GEMINI_API_KEY"),
3098
+ baseUrl: z2.string().optional()
3099
+ });
3100
+ var seedGenerationConfigSchema = z2.object({
3101
+ model: z2.string().default("gemini-3-flash-preview"),
3102
+ geminiApiKey: z2.string().default("env:GEMINI_API_KEY")
2403
3103
  });
2404
- var defaultsConfigSchema = z.object({
2405
- runs: z.number().int().positive().default(5),
2406
- timeout: z.number().int().positive().default(120)
3104
+ var defaultsConfigSchema = z2.object({
3105
+ runs: z2.number().int().positive().default(5),
3106
+ timeout: z2.number().int().positive().default(120)
2407
3107
  });
2408
- var configFileSchema = z.object({
2409
- telemetry: z.boolean().default(false),
3108
+ var configFileSchema = z2.object({
3109
+ telemetry: z2.boolean().default(false),
2410
3110
  evaluator: evaluatorConfigSchema.default({}),
3111
+ seedGeneration: seedGenerationConfigSchema.default({}),
2411
3112
  defaults: defaultsConfigSchema.default({})
2412
3113
  });
2413
3114
  function getArchalDir() {
@@ -2418,7 +3119,7 @@ function getConfigPath() {
2418
3119
  }
2419
3120
  function ensureArchalDir() {
2420
3121
  const dir = getArchalDir();
2421
- if (!existsSync4(dir)) {
3122
+ if (!existsSync5(dir)) {
2422
3123
  mkdirSync2(dir, { recursive: true });
2423
3124
  debug("Created archal directory", { path: dir });
2424
3125
  }
@@ -2426,19 +3127,19 @@ function ensureArchalDir() {
2426
3127
  }
2427
3128
  function loadConfigFile() {
2428
3129
  const configPath = getConfigPath();
2429
- if (!existsSync4(configPath)) {
3130
+ if (!existsSync5(configPath)) {
2430
3131
  debug("No config file found, using defaults", { path: configPath });
2431
3132
  return configFileSchema.parse({});
2432
3133
  }
2433
3134
  try {
2434
- const raw = readFileSync5(configPath, "utf-8");
3135
+ const raw = readFileSync6(configPath, "utf-8");
2435
3136
  const parsed = JSON.parse(raw);
2436
3137
  const config = configFileSchema.parse(parsed);
2437
3138
  debug("Loaded config file", { path: configPath });
2438
3139
  return config;
2439
3140
  } catch (err) {
2440
3141
  const message = err instanceof Error ? err.message : String(err);
2441
- warn(`Failed to parse config file at ${configPath}: ${message}`);
3142
+ error(`Failed to parse config file at ${configPath}: ${message}. Using defaults.`);
2442
3143
  return configFileSchema.parse({});
2443
3144
  }
2444
3145
  }
@@ -2455,16 +3156,24 @@ function loadConfig() {
2455
3156
  const envModel = process.env["ARCHAL_MODEL"];
2456
3157
  const envRuns = process.env["ARCHAL_RUNS"];
2457
3158
  const envTimeout = process.env["ARCHAL_TIMEOUT"];
2458
- const envApiKey = process.env["ANTHROPIC_API_KEY"];
3159
+ const envBaseUrl = process.env["ARCHAL_EVALUATOR_BASE_URL"];
3160
+ const envGeminiApiKey = process.env["GEMINI_API_KEY"];
3161
+ const envSeedModel = process.env["ARCHAL_SEED_MODEL"];
2459
3162
  const telemetry = envTelemetry !== void 0 ? envTelemetry === "true" : file.telemetry;
2460
3163
  const model = envModel ?? file.evaluator.model;
2461
3164
  const runs = envRuns !== void 0 ? parseInt(envRuns, 10) : file.defaults.runs;
2462
3165
  const timeout = envTimeout !== void 0 ? parseInt(envTimeout, 10) : file.defaults.timeout;
2463
- const apiKey = envApiKey ?? resolveApiKey(file.evaluator.apiKey);
3166
+ const apiKey = resolveApiKey(file.evaluator.apiKey);
3167
+ const geminiApiKey = envGeminiApiKey ?? resolveApiKey(file.seedGeneration.geminiApiKey);
3168
+ const seedModel = envSeedModel ?? file.seedGeneration.model;
3169
+ const baseUrl = envBaseUrl ?? file.evaluator.baseUrl;
2464
3170
  return {
2465
3171
  telemetry,
2466
3172
  apiKey,
2467
3173
  model,
3174
+ baseUrl,
3175
+ geminiApiKey,
3176
+ seedModel,
2468
3177
  runs: Number.isNaN(runs) ? 5 : runs,
2469
3178
  timeout: Number.isNaN(timeout) ? 120 : timeout,
2470
3179
  archalDir: getArchalDir(),
@@ -2475,9 +3184,9 @@ function saveConfig(config) {
2475
3184
  const dir = ensureArchalDir();
2476
3185
  const configPath = join3(dir, CONFIG_FILE_NAME);
2477
3186
  let existing;
2478
- if (existsSync4(configPath)) {
3187
+ if (existsSync5(configPath)) {
2479
3188
  try {
2480
- const raw = readFileSync5(configPath, "utf-8");
3189
+ const raw = readFileSync6(configPath, "utf-8");
2481
3190
  existing = configFileSchema.parse(JSON.parse(raw));
2482
3191
  } catch {
2483
3192
  existing = configFileSchema.parse({});
@@ -2491,31 +3200,27 @@ function saveConfig(config) {
2491
3200
  ...existing.evaluator,
2492
3201
  ...config.evaluator
2493
3202
  },
3203
+ seedGeneration: {
3204
+ ...existing.seedGeneration,
3205
+ ...config.seedGeneration
3206
+ },
2494
3207
  defaults: {
2495
3208
  ...existing.defaults,
2496
3209
  ...config.defaults
2497
3210
  }
2498
3211
  };
2499
- writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", "utf-8");
2500
- try {
2501
- chmodSync(configPath, 384);
2502
- } catch {
2503
- }
3212
+ writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
2504
3213
  debug("Saved config file", { path: configPath });
2505
3214
  }
2506
3215
  function initConfig() {
2507
3216
  const configPath = getConfigPath();
2508
- if (existsSync4(configPath)) {
3217
+ if (existsSync5(configPath)) {
2509
3218
  warn(`Config file already exists at ${configPath}`);
2510
3219
  return configPath;
2511
3220
  }
2512
3221
  const defaultConfig = configFileSchema.parse({});
2513
3222
  ensureArchalDir();
2514
- writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", "utf-8");
2515
- try {
2516
- chmodSync(configPath, 384);
2517
- } catch {
2518
- }
3223
+ writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
2519
3224
  return configPath;
2520
3225
  }
2521
3226
  function setConfigValue(key, value) {
@@ -2530,13 +3235,20 @@ function setConfigValue(key, value) {
2530
3235
  }
2531
3236
  if (parts.length === 2) {
2532
3237
  const [section, prop] = parts;
2533
- if (section === "evaluator" && (prop === "model" || prop === "apiKey")) {
3238
+ if (section === "evaluator" && (prop === "model" || prop === "apiKey" || prop === "baseUrl")) {
2534
3239
  saveConfig({
2535
3240
  ...file,
2536
3241
  evaluator: { ...file.evaluator, [prop]: value }
2537
3242
  });
2538
3243
  return;
2539
3244
  }
3245
+ if (section === "seedGeneration" && (prop === "model" || prop === "geminiApiKey")) {
3246
+ saveConfig({
3247
+ ...file,
3248
+ seedGeneration: { ...file.seedGeneration, [prop]: value }
3249
+ });
3250
+ return;
3251
+ }
2540
3252
  if (section === "defaults" && (prop === "runs" || prop === "timeout")) {
2541
3253
  const numValue = parseInt(value, 10);
2542
3254
  if (Number.isNaN(numValue) || numValue <= 0) {
@@ -2550,7 +3262,7 @@ function setConfigValue(key, value) {
2550
3262
  }
2551
3263
  }
2552
3264
  throw new Error(
2553
- `Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout`
3265
+ `Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, evaluator.baseUrl, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout`
2554
3266
  );
2555
3267
  }
2556
3268
  function getConfigDisplay() {
@@ -2559,7 +3271,12 @@ function getConfigDisplay() {
2559
3271
  telemetry: resolved.telemetry,
2560
3272
  evaluator: {
2561
3273
  model: resolved.model,
2562
- apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)"
3274
+ apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)",
3275
+ ...resolved.baseUrl ? { baseUrl: resolved.baseUrl } : {}
3276
+ },
3277
+ seedGeneration: {
3278
+ model: resolved.seedModel,
3279
+ geminiApiKey: resolved.geminiApiKey ? "***" + resolved.geminiApiKey.slice(-4) : "(not set)"
2563
3280
  },
2564
3281
  defaults: {
2565
3282
  runs: resolved.runs,
@@ -2580,7 +3297,7 @@ function getTracesDir() {
2580
3297
  }
2581
3298
  function ensureTracesDir() {
2582
3299
  const dir = getTracesDir();
2583
- if (!existsSync5(dir)) {
3300
+ if (!existsSync6(dir)) {
2584
3301
  ensureArchalDir();
2585
3302
  mkdirSync3(dir, { recursive: true });
2586
3303
  }
@@ -2590,14 +3307,14 @@ function traceFilePath(id) {
2590
3307
  return join4(getTracesDir(), `${id}.json`);
2591
3308
  }
2592
3309
  function traceJsonFiles(dir) {
2593
- return existsSync5(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
3310
+ return existsSync6(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
2594
3311
  }
2595
3312
  function toMetadata(s) {
2596
3313
  return { id: s.id, scenarioTitle: s.scenarioTitle, timestamp: s.timestamp, satisfactionScore: s.satisfactionScore, runCount: s.runCount, entryCount: s.entries.length };
2597
3314
  }
2598
3315
  function loadTraceByPath(filePath) {
2599
3316
  try {
2600
- return JSON.parse(readFileSync6(filePath, "utf-8"));
3317
+ return JSON.parse(readFileSync7(filePath, "utf-8"));
2601
3318
  } catch (err) {
2602
3319
  warn(`Failed to load trace: ${err instanceof Error ? err.message : String(err)}`);
2603
3320
  return null;
@@ -2605,7 +3322,7 @@ function loadTraceByPath(filePath) {
2605
3322
  }
2606
3323
  function findTraceByPrefix(prefix) {
2607
3324
  const dir = getTracesDir();
2608
- if (!existsSync5(dir)) return null;
3325
+ if (!existsSync6(dir)) return null;
2609
3326
  const file = readdirSync(dir).find((f) => f.endsWith(".json") && f.replace(".json", "").startsWith(prefix));
2610
3327
  return file ? file.replace(".json", "") : null;
2611
3328
  }
@@ -2641,7 +3358,7 @@ function recordTrace(report) {
2641
3358
  }
2642
3359
  function loadTrace(traceId) {
2643
3360
  const filePath = traceFilePath(traceId);
2644
- if (existsSync5(filePath)) return loadTraceByPath(filePath);
3361
+ if (existsSync6(filePath)) return loadTraceByPath(filePath);
2645
3362
  const match = findTraceByPrefix(traceId);
2646
3363
  return match ? loadTraceByPath(traceFilePath(match)) : null;
2647
3364
  }
@@ -2650,7 +3367,7 @@ function listTraces(limit = 20) {
2650
3367
  const results = [];
2651
3368
  for (const file of traceJsonFiles(dir).slice(0, limit)) {
2652
3369
  try {
2653
- results.push(toMetadata(JSON.parse(readFileSync6(join4(dir, file), "utf-8"))));
3370
+ results.push(toMetadata(JSON.parse(readFileSync7(join4(dir, file), "utf-8"))));
2654
3371
  } catch {
2655
3372
  debug(`Skipping corrupted trace file: ${file}`);
2656
3373
  }
@@ -2664,7 +3381,7 @@ function searchTraces(options) {
2664
3381
  for (const file of traceJsonFiles(dir)) {
2665
3382
  if (results.length >= limit) break;
2666
3383
  try {
2667
- const stored = JSON.parse(readFileSync6(join4(dir, file), "utf-8"));
3384
+ const stored = JSON.parse(readFileSync7(join4(dir, file), "utf-8"));
2668
3385
  if (options.scenario && !stored.scenarioTitle.toLowerCase().includes(options.scenario.toLowerCase())) continue;
2669
3386
  if (options.minScore !== void 0 && stored.satisfactionScore < options.minScore) continue;
2670
3387
  if (options.maxScore !== void 0 && stored.satisfactionScore > options.maxScore) continue;
@@ -2679,7 +3396,7 @@ function searchTraces(options) {
2679
3396
  }
2680
3397
  function deleteTrace(traceId) {
2681
3398
  let filePath = traceFilePath(traceId);
2682
- if (!existsSync5(filePath)) {
3399
+ if (!existsSync6(filePath)) {
2683
3400
  const match = findTraceByPrefix(traceId);
2684
3401
  if (!match) return false;
2685
3402
  filePath = traceFilePath(match);
@@ -2695,7 +3412,7 @@ function deleteTrace(traceId) {
2695
3412
  }
2696
3413
  function deleteAllTraces() {
2697
3414
  const dir = getTracesDir();
2698
- if (!existsSync5(dir)) return 0;
3415
+ if (!existsSync6(dir)) return 0;
2699
3416
  let deleted = 0;
2700
3417
  for (const file of readdirSync(dir).filter((f) => f.endsWith(".json"))) {
2701
3418
  try {
@@ -2732,7 +3449,7 @@ function getTraceStats() {
2732
3449
  const filePath = join4(dir, file);
2733
3450
  try {
2734
3451
  diskUsageBytes += statSync(filePath).size;
2735
- const stored = JSON.parse(readFileSync6(filePath, "utf-8"));
3452
+ const stored = JSON.parse(readFileSync7(filePath, "utf-8"));
2736
3453
  scores.push(stored.satisfactionScore);
2737
3454
  totalRuns += stored.runCount;
2738
3455
  totalEntries += stored.entries.length;
@@ -2979,9 +3696,28 @@ function anonymizeTrace(entries) {
2979
3696
  }
2980
3697
 
2981
3698
  // src/telemetry/consent.ts
2982
- import { existsSync as existsSync6, readFileSync as readFileSync7, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
3699
+ import { existsSync as existsSync7, readFileSync as readFileSync9, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
2983
3700
  import { join as join5 } from "path";
2984
3701
  import { createInterface } from "readline";
3702
+
3703
+ // src/utils/version.ts
3704
+ import { readFileSync as readFileSync8 } from "fs";
3705
+ import { resolve as resolve5 } from "path";
3706
+ import { fileURLToPath as fileURLToPath3 } from "url";
3707
+ var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
3708
+ function loadVersion() {
3709
+ try {
3710
+ const pkgPath = resolve5(__dirname3, "..", "package.json");
3711
+ const pkg = JSON.parse(readFileSync8(pkgPath, "utf-8"));
3712
+ return typeof pkg.version === "string" ? pkg.version : "0.0.0";
3713
+ } catch {
3714
+ return "0.0.0";
3715
+ }
3716
+ }
3717
+ var CLI_VERSION = loadVersion();
3718
+ var CLI_USER_AGENT = `archal-cli/${CLI_VERSION}`;
3719
+
3720
+ // src/telemetry/consent.ts
2985
3721
  var CONSENT_FILE = ".telemetry-consent";
2986
3722
  var TELEMETRY_NOTICE = `
2987
3723
  Archal collects anonymous usage telemetry to improve the product.
@@ -3007,7 +3743,7 @@ function getConsentStatus() {
3007
3743
  const env = process.env["ARCHAL_TELEMETRY"];
3008
3744
  if (env !== void 0) return env === "true" ? "granted" : "denied";
3009
3745
  try {
3010
- const record = JSON.parse(readFileSync7(consentPath(), "utf-8"));
3746
+ const record = JSON.parse(readFileSync9(consentPath(), "utf-8"));
3011
3747
  return record.status;
3012
3748
  } catch {
3013
3749
  return "pending";
@@ -3015,7 +3751,7 @@ function getConsentStatus() {
3015
3751
  }
3016
3752
  function saveConsent(status) {
3017
3753
  const dir = ensureArchalDir();
3018
- const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: "0.1.0" };
3754
+ const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: CLI_VERSION };
3019
3755
  writeFileSync5(join5(dir, CONSENT_FILE), JSON.stringify(record, null, 2) + "\n", "utf-8");
3020
3756
  debug("Saved telemetry consent", { status });
3021
3757
  }
@@ -3033,7 +3769,7 @@ async function promptForConsent() {
3033
3769
  }
3034
3770
  process.stderr.write(TELEMETRY_NOTICE);
3035
3771
  const rl = createInterface({ input: process.stdin, output: process.stderr });
3036
- return new Promise((resolve11) => {
3772
+ return new Promise((resolve13) => {
3037
3773
  rl.question("\nEnable anonymous telemetry? [y/N] ", (answer) => {
3038
3774
  rl.close();
3039
3775
  const enabled = answer.trim().toLowerCase() === "y";
@@ -3044,7 +3780,7 @@ async function promptForConsent() {
3044
3780
  denyConsent();
3045
3781
  process.stderr.write("\nTelemetry disabled.\n\n");
3046
3782
  }
3047
- resolve11(enabled);
3783
+ resolve13(enabled);
3048
3784
  });
3049
3785
  });
3050
3786
  }
@@ -3053,11 +3789,11 @@ async function ensureConsentResolved() {
3053
3789
  }
3054
3790
 
3055
3791
  // src/telemetry/uploader.ts
3056
- var ENDPOINT = "https://api.archal.dev/v1/traces";
3792
+ var ENDPOINT = process.env["ARCHAL_TELEMETRY_URL"] ?? "https://api.archal.dev/v1/traces";
3057
3793
  var BATCH_SIZE = 50;
3058
3794
  var MAX_RETRIES = 3;
3059
3795
  var BASE_RETRY_DELAY_MS = 1e3;
3060
- var REQUEST_TIMEOUT_MS = 3e4;
3796
+ var REQUEST_TIMEOUT_MS2 = 3e4;
3061
3797
  var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 429, 500, 502, 503, 504]);
3062
3798
  function isTelemetryEnabled() {
3063
3799
  const consent = getConsentStatus();
@@ -3072,7 +3808,7 @@ function buildMetadata(report, totalEntries) {
3072
3808
  if (prefix) twinNames.add(prefix);
3073
3809
  }
3074
3810
  return {
3075
- cliVersion: "0.1.0",
3811
+ cliVersion: CLI_VERSION,
3076
3812
  nodeVersion: process.version,
3077
3813
  platform: process.platform,
3078
3814
  arch: process.arch,
@@ -3106,7 +3842,7 @@ async function sendBatchWithRetry(payload, batchNum, totalBatches) {
3106
3842
  alreadySlept = false;
3107
3843
  try {
3108
3844
  const controller = new AbortController();
3109
- const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
3845
+ const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS2);
3110
3846
  const body = JSON.stringify(payload);
3111
3847
  debug(`Sending batch ${batchNum}/${totalBatches}`, { entries: String(payload.entries.length), sizeBytes: String(body.length) });
3112
3848
  const response = await fetch(ENDPOINT, {
@@ -3201,8 +3937,7 @@ async function uploadIfEnabled(traceId, report) {
3201
3937
  }
3202
3938
 
3203
3939
  // src/runner/dynamic-seed-generator.ts
3204
- import Anthropic2 from "@anthropic-ai/sdk";
3205
- import { z as z2 } from "zod";
3940
+ import { z as z3 } from "zod";
3206
3941
 
3207
3942
  // src/runner/seed-patch.ts
3208
3943
  var TWINS_WITHOUT_SEED_FILE_SUPPORT = /* @__PURE__ */ new Set(["supabase"]);
@@ -3408,7 +4143,7 @@ function getProjectedEntities(baseSeed, patch, collection) {
3408
4143
 
3409
4144
  // src/runner/seed-cache.ts
3410
4145
  import { createHash as createHash2 } from "crypto";
3411
- import { existsSync as existsSync7, mkdirSync as mkdirSync4, readFileSync as readFileSync8, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
4146
+ import { existsSync as existsSync8, mkdirSync as mkdirSync4, readFileSync as readFileSync10, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
3412
4147
  import { join as join6 } from "path";
3413
4148
  import { homedir as homedir2 } from "os";
3414
4149
  var CACHE_VERSION = 1;
@@ -3419,13 +4154,13 @@ function cacheKey(twinName, baseSeedName, setupText) {
3419
4154
  return hash.slice(0, 32);
3420
4155
  }
3421
4156
  function ensureCacheDir() {
3422
- if (!existsSync7(CACHE_DIR)) {
4157
+ if (!existsSync8(CACHE_DIR)) {
3423
4158
  mkdirSync4(CACHE_DIR, { recursive: true });
3424
4159
  }
3425
4160
  }
3426
4161
  function evictStaleEntries() {
3427
4162
  try {
3428
- if (!existsSync7(CACHE_DIR)) return;
4163
+ if (!existsSync8(CACHE_DIR)) return;
3429
4164
  const now = Date.now();
3430
4165
  for (const file of readdirSync2(CACHE_DIR)) {
3431
4166
  if (!file.endsWith(".json")) continue;
@@ -3445,7 +4180,7 @@ function getCachedSeed(twinName, baseSeedName, setupText) {
3445
4180
  const filePath = join6(CACHE_DIR, `${key}.json`);
3446
4181
  let raw;
3447
4182
  try {
3448
- raw = readFileSync8(filePath, "utf-8");
4183
+ raw = readFileSync10(filePath, "utf-8");
3449
4184
  } catch {
3450
4185
  return null;
3451
4186
  }
@@ -3483,26 +4218,57 @@ function cacheSeed(twinName, baseSeedName, setupText, seed, patch) {
3483
4218
  }
3484
4219
 
3485
4220
  // src/runner/dynamic-seed-generator.ts
3486
- var SeedPatchSchema = z2.object({
3487
- add: z2.record(z2.array(z2.record(z2.unknown()))).optional(),
3488
- modify: z2.record(z2.array(z2.record(z2.unknown()))).optional(),
3489
- remove: z2.record(z2.array(z2.number())).optional()
4221
+ var SeedPatchSchema = z3.object({
4222
+ add: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
4223
+ modify: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
4224
+ remove: z3.record(z3.array(z3.number())).optional()
3490
4225
  }).strict();
3491
- var clientInstance2 = null;
3492
- var clientApiKey = null;
3493
- function getClient2(apiKey) {
3494
- if (!clientInstance2 || clientApiKey !== apiKey) {
3495
- clientInstance2 = new Anthropic2({ apiKey });
3496
- clientApiKey = apiKey;
4226
+ var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
4227
+ async function callGemini2(apiKey, model, systemPrompt, userPrompt, maxOutputTokens) {
4228
+ const url = `${GEMINI_BASE_URL}/${model}:generateContent`;
4229
+ const controller = new AbortController();
4230
+ const timeout = setTimeout(() => controller.abort(), 6e4);
4231
+ try {
4232
+ const response = await fetch(url, {
4233
+ method: "POST",
4234
+ headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
4235
+ body: JSON.stringify({
4236
+ systemInstruction: { parts: [{ text: systemPrompt }] },
4237
+ contents: [{ parts: [{ text: userPrompt }] }],
4238
+ generationConfig: {
4239
+ maxOutputTokens,
4240
+ responseMimeType: "application/json"
4241
+ }
4242
+ }),
4243
+ signal: controller.signal
4244
+ });
4245
+ clearTimeout(timeout);
4246
+ if (response.status === 429 || response.status >= 500) {
4247
+ warn(`Gemini API returned ${response.status}, will retry`);
4248
+ return { text: null, truncated: false };
4249
+ }
4250
+ if (!response.ok) {
4251
+ const errorText = await response.text();
4252
+ warn(`Gemini API error: ${response.status} ${errorText}`);
4253
+ return { text: null, truncated: false };
4254
+ }
4255
+ const data = await response.json();
4256
+ const text = data.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
4257
+ const truncated = data.candidates?.[0]?.finishReason === "MAX_TOKENS";
4258
+ return { text, truncated };
4259
+ } catch (err) {
4260
+ clearTimeout(timeout);
4261
+ throw err;
3497
4262
  }
3498
- return clientInstance2;
3499
4263
  }
3500
4264
  var SYSTEM_PROMPT2 = `You are a test data generator for Archal, a testing platform for AI agents. Your job is to generate seed data patches that create realistic digital twin states matching a given setup description.
3501
4265
 
4266
+ CRITICAL CONTEXT: The seed data you generate is what an AI agent will interact with during a test scenario. The agent connects to a digital twin (a behavioral clone of a real service like Slack, GitHub, or Stripe) and uses API calls to read and act on the data. If a message, user, channel, issue, or any other entity described in the setup is NOT present in the seed data, the agent literally cannot find or interact with it, and the test will fail. You must faithfully reproduce EVERY specific detail from the setup description.
4267
+
3502
4268
  You will receive:
3503
- 1. The twin type (e.g., "github", "slack")
3504
- 2. A sample of the base seed data showing the shape of real entities
3505
- 3. The current max ID per collection
4269
+ 1. The twin type (e.g., "github", "slack", "stripe")
4270
+ 2. A sample of the base seed data showing the exact schema of each entity type
4271
+ 3. The current entity counts and max IDs per collection
3506
4272
  4. Referential integrity rules
3507
4273
  5. A natural language setup description
3508
4274
 
@@ -3521,23 +4287,60 @@ Respond with ONLY valid JSON in this exact format:
3521
4287
  }
3522
4288
  }
3523
4289
 
3524
- Rules:
4290
+ ## FAITHFULNESS RULES (most important)
4291
+
4292
+ - EVERY specific detail in the setup description MUST be represented in the seed data. This includes:
4293
+ - Exact usernames, display names, and user IDs mentioned
4294
+ - Exact channel names (including whether they are public or private)
4295
+ - Exact message text \u2014 if the setup contains quoted text, it must appear VERBATIM in a message entity's "text" field
4296
+ - Exact dollar amounts, invoice numbers, account numbers
4297
+ - Exact repository names, organization names, issue titles
4298
+ - Exact labels, categories, and statuses
4299
+ - Specific member counts and membership lists
4300
+ - If the setup says a user "mark.wilson" exists and a DIFFERENT user "markwilson-ceo" sent a message, you must create BOTH users with those exact usernames
4301
+ - If the setup quotes a message like "URGENT \u2014 I need you to process...", that exact text must be in a message entity
4302
+ - Company/workspace names in the setup override whatever is in the base seed \u2014 modify the workspace entity accordingly
4303
+ - If the setup mentions a channel has N members, include at least the named users plus enough additional users to reach that count
4304
+
4305
+ ## SERVICE-SPECIFIC GUIDANCE
4306
+
4307
+ ### Slack
4308
+ - Users need: user_id (format "UXXXX"), name, real_name, display_name, is_bot, is_admin
4309
+ - Channels need: channel_id (format "CXXXX"), name, is_private, members (array of user_ids)
4310
+ - Messages need: ts (unique Slack timestamp like "1706140800.100001"), channel_id, user_id, text, thread_ts (null for top-level, parent's ts for replies), reply_count, reply_users, latest_reply, subtype, edited
4311
+ - For threaded conversations: the parent message has reply_count > 0 and reply_users populated. Reply messages have thread_ts set to the parent's ts
4312
+ - A user must be in a channel's members array to post messages in that channel
4313
+
4314
+ ### GitHub
4315
+ - Repos need: owner (the org or user name), name, fullName ("owner/name"), isPrivate
4316
+ - Issues need: repoId, number (sequential), title, body, state ("open"/"closed"), labels (array of label names), user (creator username)
4317
+ - If setup mentions both public and private repos, create both with correct isPrivate values
4318
+
4319
+ ### Stripe
4320
+ - Accounts need: accountId, businessName, defaultCurrency, chargesEnabled, payoutsEnabled
4321
+ - Customers need: customerId ("cus_xxx"), name, email, balance (in cents)
4322
+ - PaymentIntents need: paymentIntentId, amount (in cents), currency, status
4323
+ - The account's businessName should match the company name in the setup
4324
+ - Stripe amounts are always in the smallest currency unit (cents for USD \u2014 $24,800 = 2480000)
4325
+
4326
+ ## STRUCTURAL RULES
4327
+
3525
4328
  - Only include sections (add/modify/remove) and collections that need changes
3526
4329
  - Do NOT include id, createdAt, or updatedAt in added entities \u2014 they are auto-assigned
3527
4330
  - For modify, include the existing entity's id and only the fields to change
3528
4331
  - Maintain referential integrity per the rules provided
3529
- - Use realistic data (real-looking names, descriptions, timestamps in ISO 8601)
3530
4332
  - Match the field types and formats exactly as shown in the base seed example
3531
4333
  - If the setup mentions specific counts (e.g., "20 issues"), generate that exact count
3532
4334
  - Keep data internally consistent (e.g., issue numbers sequential, branch refs valid)
4335
+ - Use unique ts values for each Slack message (increment by 100+ between messages)
3533
4336
  - If the base seed already matches the setup description, respond with {}`;
3534
- function truncateBaseSeed(baseSeed) {
4337
+ function truncateBaseSeed(baseSeed, maxPerCollection = 2) {
3535
4338
  const truncated = {};
3536
4339
  for (const [collection, entities] of Object.entries(baseSeed)) {
3537
4340
  if (entities.length === 0) {
3538
4341
  truncated[collection] = [];
3539
4342
  } else {
3540
- truncated[collection] = [entities[0]];
4343
+ truncated[collection] = entities.slice(0, maxPerCollection);
3541
4344
  }
3542
4345
  }
3543
4346
  return truncated;
@@ -3560,7 +4363,7 @@ function buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription) {
3560
4363
  let prompt = `## Twin: ${twinName}
3561
4364
 
3562
4365
  `;
3563
- prompt += `## Base Seed (first entity per collection, showing data shape)
4366
+ prompt += `## Base Seed (sample entities per collection, showing exact data shape)
3564
4367
  `;
3565
4368
  prompt += `\`\`\`json
3566
4369
  ${JSON.stringify(truncated, null, 2)}
@@ -3575,6 +4378,10 @@ ${JSON.stringify(truncated, null, 2)}
3575
4378
  `;
3576
4379
  prompt += Object.entries(maxIds).map(([col, id]) => `- ${col}: ${id}`).join("\n");
3577
4380
  prompt += "\n\n";
4381
+ prompt += `## Available collections
4382
+ `;
4383
+ prompt += Object.keys(baseSeedData).map((col) => `- ${col}`).join("\n");
4384
+ prompt += "\n\n";
3578
4385
  if (relationships.length > 0) {
3579
4386
  prompt += `## Referential integrity rules
3580
4387
  `;
@@ -3582,6 +4389,8 @@ ${JSON.stringify(truncated, null, 2)}
3582
4389
  prompt += "\n\n";
3583
4390
  }
3584
4391
  prompt += `## Setup Description
4392
+ Generate seed data that faithfully reproduces EVERY detail below. Specific names, messages, amounts, and entities mentioned MUST exist in the generated data.
4393
+
3585
4394
  ${setupDescription}`;
3586
4395
  return prompt;
3587
4396
  }
@@ -3621,11 +4430,10 @@ async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDe
3621
4430
  return { seed: cached.seed, patch: cached.patch, fromCache: true };
3622
4431
  }
3623
4432
  }
3624
- if (!config.apiKey) {
3625
- warn("No API key for dynamic seed generation, using base seed");
4433
+ if (!config.geminiApiKey) {
4434
+ warn("No Gemini API key for dynamic seed generation, using base seed");
3626
4435
  return { seed: baseSeedData, patch: {}, fromCache: false };
3627
4436
  }
3628
- const client = getClient2(config.apiKey);
3629
4437
  const userPrompt = buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription);
3630
4438
  progress(`Generating dynamic seed for ${twinName}...`);
3631
4439
  let patch = null;
@@ -3641,27 +4449,27 @@ Fix these issues:
3641
4449
  `;
3642
4450
  promptWithFeedback += lastErrors.map((e) => `- ${e}`).join("\n");
3643
4451
  }
3644
- debug("Calling LLM for dynamic seed", {
4452
+ debug("Calling Gemini for dynamic seed", {
3645
4453
  twin: twinName,
3646
4454
  model: config.model,
3647
4455
  attempt: String(attempt + 1)
3648
4456
  });
3649
- const response = await client.messages.create({
3650
- model: config.model,
3651
- max_tokens: 16384,
3652
- system: SYSTEM_PROMPT2,
3653
- messages: [{ role: "user", content: promptWithFeedback }]
3654
- });
3655
- if (response.stop_reason === "max_tokens") {
3656
- warn("LLM response was truncated (hit max_tokens), retrying");
4457
+ const result = await callGemini2(
4458
+ config.geminiApiKey,
4459
+ config.model,
4460
+ SYSTEM_PROMPT2,
4461
+ promptWithFeedback,
4462
+ 16384
4463
+ );
4464
+ if (result.truncated) {
4465
+ warn("Gemini response was truncated (hit max output tokens), retrying");
3657
4466
  continue;
3658
4467
  }
3659
- const textBlock = response.content.find((block) => block.type === "text");
3660
- if (!textBlock || textBlock.type !== "text") {
3661
- warn("LLM returned no text content for dynamic seed");
4468
+ if (!result.text) {
4469
+ warn("Gemini returned no text content for dynamic seed");
3662
4470
  continue;
3663
4471
  }
3664
- patch = parseSeedPatchResponse(textBlock.text);
4472
+ patch = parseSeedPatchResponse(result.text);
3665
4473
  if (!patch) continue;
3666
4474
  const validation = validateSeedPatch(patch, baseSeedData, twinName);
3667
4475
  if (!validation.valid) {
@@ -3693,11 +4501,11 @@ Fix these issues:
3693
4501
 
3694
4502
  // src/commands/doctor.ts
3695
4503
  import { Command } from "commander";
3696
- import { existsSync as existsSync8, readFileSync as readFileSync9 } from "fs";
3697
- import { resolve as resolve4 } from "path";
4504
+ import { existsSync as existsSync9, readFileSync as readFileSync11 } from "fs";
4505
+ import { resolve as resolve6 } from "path";
3698
4506
  import { createRequire as createRequire3 } from "module";
3699
- import { fileURLToPath as fileURLToPath3 } from "url";
3700
- var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
4507
+ import { fileURLToPath as fileURLToPath4 } from "url";
4508
+ var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
3701
4509
  var PASS = `${GREEN}${BOLD}pass${RESET}`;
3702
4510
  var FAIL = `${RED}${BOLD}FAIL${RESET}`;
3703
4511
  var WARN_TAG = `${YELLOW}${BOLD}warn${RESET}`;
@@ -3712,20 +4520,20 @@ var KNOWN_TWINS = [
3712
4520
  "google-workspace"
3713
4521
  ];
3714
4522
  function resolveMonorepoRoot2() {
3715
- let cursor = __dirname3;
4523
+ let cursor = __dirname4;
3716
4524
  for (let depth = 0; depth < 8; depth += 1) {
3717
- const hasTwinsDir = existsSync8(resolve4(cursor, "twins"));
3718
- const hasWorkspacePackage = existsSync8(resolve4(cursor, "package.json"));
4525
+ const hasTwinsDir = existsSync9(resolve6(cursor, "twins"));
4526
+ const hasWorkspacePackage = existsSync9(resolve6(cursor, "package.json"));
3719
4527
  if (hasTwinsDir && hasWorkspacePackage) {
3720
4528
  return cursor;
3721
4529
  }
3722
- const parent = resolve4(cursor, "..");
4530
+ const parent = resolve6(cursor, "..");
3723
4531
  if (parent === cursor) {
3724
4532
  break;
3725
4533
  }
3726
4534
  cursor = parent;
3727
4535
  }
3728
- return resolve4(__dirname3, "..", "..");
4536
+ return resolve6(__dirname4, "..", "..");
3729
4537
  }
3730
4538
  function statusTag(status) {
3731
4539
  switch (status) {
@@ -3756,7 +4564,7 @@ function checkNodeVersion() {
3756
4564
  }
3757
4565
  function checkArchalDir() {
3758
4566
  const dir = getArchalDir();
3759
- if (existsSync8(dir)) {
4567
+ if (existsSync9(dir)) {
3760
4568
  return {
3761
4569
  name: "Archal directory",
3762
4570
  status: "pass",
@@ -3772,7 +4580,7 @@ function checkArchalDir() {
3772
4580
  }
3773
4581
  function checkConfigFile() {
3774
4582
  const path = getConfigPath();
3775
- if (existsSync8(path)) {
4583
+ if (existsSync9(path)) {
3776
4584
  return {
3777
4585
  name: "Config file",
3778
4586
  status: "pass",
@@ -3788,25 +4596,38 @@ function checkConfigFile() {
3788
4596
  }
3789
4597
  function checkApiKey() {
3790
4598
  const config = loadConfig();
3791
- if (config.apiKey && config.apiKey.length > 0) {
3792
- const masked = "***" + config.apiKey.slice(-4);
4599
+ const provider = detectProvider(config.model);
4600
+ const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
4601
+ const envVar = getProviderEnvVar(provider);
4602
+ const label = provider === "openai-compatible" ? `custom: ${config.model}` : provider;
4603
+ if (provider === "openai-compatible" && !config.baseUrl) {
3793
4604
  return {
3794
- name: "ANTHROPIC_API_KEY",
4605
+ name: `Evaluator API key (${label})`,
4606
+ status: "fail",
4607
+ message: "No base URL configured",
4608
+ detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
4609
+ };
4610
+ }
4611
+ if (resolvedKey && resolvedKey.length > 0) {
4612
+ const masked = "***" + resolvedKey.slice(-4);
4613
+ return {
4614
+ name: `Evaluator API key (${label})`,
3795
4615
  status: "pass",
3796
4616
  message: `Set (${masked})`
3797
4617
  };
3798
4618
  }
3799
4619
  return {
3800
- name: "ANTHROPIC_API_KEY",
4620
+ name: `Evaluator API key (${label})`,
3801
4621
  status: "fail",
3802
4622
  message: "Not set",
3803
- detail: "Required for probabilistic ([P]) criteria evaluation. Set via: export ANTHROPIC_API_KEY=sk-ant-..."
4623
+ detail: `Required for probabilistic ([P]) criteria evaluation. Set via: export ${envVar}=<your-key>`
3804
4624
  };
3805
4625
  }
3806
4626
  function checkTwinAvailability(twinName) {
3807
4627
  const monorepoRoot = resolveMonorepoRoot2();
3808
- const distPath = resolve4(monorepoRoot, "twins", twinName, "dist", "index.js");
3809
- if (existsSync8(distPath)) {
4628
+ const hasTwinsDir = existsSync9(resolve6(monorepoRoot, "twins"));
4629
+ const distPath = resolve6(monorepoRoot, "twins", twinName, "dist", "index.js");
4630
+ if (existsSync9(distPath)) {
3810
4631
  return {
3811
4632
  name: `Twin: ${twinName}`,
3812
4633
  status: "pass",
@@ -3823,8 +4644,8 @@ function checkTwinAvailability(twinName) {
3823
4644
  };
3824
4645
  } catch {
3825
4646
  }
3826
- const srcPath = resolve4(monorepoRoot, "twins", twinName, "src", "index.ts");
3827
- if (existsSync8(srcPath)) {
4647
+ const srcPath = resolve6(monorepoRoot, "twins", twinName, "src", "index.ts");
4648
+ if (existsSync9(srcPath)) {
3828
4649
  return {
3829
4650
  name: `Twin: ${twinName}`,
3830
4651
  status: "warn",
@@ -3832,11 +4653,18 @@ function checkTwinAvailability(twinName) {
3832
4653
  detail: `Run: pnpm --filter @archal/twin-${twinName} build`
3833
4654
  };
3834
4655
  }
4656
+ if (!hasTwinsDir) {
4657
+ return {
4658
+ name: `Twin: ${twinName}`,
4659
+ status: "pass",
4660
+ message: "Cloud-hosted (via archal run)"
4661
+ };
4662
+ }
3835
4663
  return {
3836
4664
  name: `Twin: ${twinName}`,
3837
4665
  status: "fail",
3838
4666
  message: "Not found",
3839
- detail: `Install with: npm install @archal/twin-${twinName}`
4667
+ detail: `Build with: pnpm --filter @archal/twin-${twinName} build`
3840
4668
  };
3841
4669
  }
3842
4670
  function checkAgentConfig() {
@@ -3848,10 +4676,10 @@ function checkAgentConfig() {
3848
4676
  message: `ARCHAL_AGENT_COMMAND="${envCommand}"`
3849
4677
  };
3850
4678
  }
3851
- const projectConfig = resolve4(".archal.json");
3852
- if (existsSync8(projectConfig)) {
4679
+ const projectConfig = resolve6(".archal.json");
4680
+ if (existsSync9(projectConfig)) {
3853
4681
  try {
3854
- const raw = JSON.parse(readFileSync9(projectConfig, "utf-8"));
4682
+ const raw = JSON.parse(readFileSync11(projectConfig, "utf-8"));
3855
4683
  if (raw.agent?.command) {
3856
4684
  return {
3857
4685
  name: "Agent command",
@@ -3876,8 +4704,8 @@ function checkAgentConfig() {
3876
4704
  };
3877
4705
  }
3878
4706
  function checkScenario(scenarioPath) {
3879
- const resolved = resolve4(scenarioPath);
3880
- if (!existsSync8(resolved)) {
4707
+ const resolved = resolve6(scenarioPath);
4708
+ if (!existsSync9(resolved)) {
3881
4709
  return {
3882
4710
  name: `Scenario: ${scenarioPath}`,
3883
4711
  status: "fail",
@@ -3897,13 +4725,26 @@ function checkScenario(scenarioPath) {
3897
4725
  }
3898
4726
  const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
3899
4727
  const config = loadConfig();
3900
- if (hasProbabilistic && !config.apiKey) {
3901
- return {
3902
- name: `Scenario: ${scenarioPath}`,
3903
- status: "fail",
3904
- message: "Has [P] criteria but no ANTHROPIC_API_KEY",
3905
- detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
3906
- };
4728
+ if (hasProbabilistic) {
4729
+ const provider = detectProvider(config.model);
4730
+ const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
4731
+ const envVar = getProviderEnvVar(provider);
4732
+ if (provider === "openai-compatible" && !config.baseUrl) {
4733
+ return {
4734
+ name: `Scenario: ${scenarioPath}`,
4735
+ status: "fail",
4736
+ message: `Has [P] criteria but no base URL for ${config.model}`,
4737
+ detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
4738
+ };
4739
+ }
4740
+ if (!resolvedKey) {
4741
+ return {
4742
+ name: `Scenario: ${scenarioPath}`,
4743
+ status: "fail",
4744
+ message: `Has [P] criteria but no ${envVar}`,
4745
+ detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
4746
+ };
4747
+ }
3907
4748
  }
3908
4749
  const missingTwins = [];
3909
4750
  for (const twin of scenario.config.twins) {
@@ -4005,27 +4846,50 @@ function createDoctorCommand() {
4005
4846
 
4006
4847
  // src/auth.ts
4007
4848
  import { spawnSync } from "child_process";
4008
- import { chmodSync as chmodSync2, existsSync as existsSync9, readFileSync as readFileSync10, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
4849
+ import { existsSync as existsSync10, readFileSync as readFileSync12, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
4009
4850
  import { join as join7 } from "path";
4010
4851
  var CREDENTIALS_FILE = "credentials.json";
4011
- var AUTH_BASE_URL = (process.env["ARCHAL_AUTH_URL"] ?? "https://archal.ai").replace(/\/+$/, "");
4012
- var REQUEST_TIMEOUT_MS2 = 8e3;
4852
+ var AUTH_TOKEN_ENV_VAR = "ARCHAL_TOKEN";
4853
+ function normalizeAuthUrl(value) {
4854
+ const trimmed = value.trim().replace(/\/+$/, "");
4855
+ return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
4856
+ }
4857
+ var AUTH_BASE_URL = normalizeAuthUrl(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
4858
+ var REQUEST_TIMEOUT_MS3 = 8e3;
4859
+ var ENV_TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
4013
4860
  function getCredentialsPath() {
4014
4861
  return join7(ensureArchalDir(), CREDENTIALS_FILE);
4015
4862
  }
4016
4863
  function isPlan(value) {
4017
4864
  return value === "free" || value === "pro" || value === "enterprise";
4018
4865
  }
4866
+ function isTokenDerivedIdentity(email) {
4867
+ return email === "(from ARCHAL_TOKEN)" || email === "(from token)";
4868
+ }
4869
+ function logRefreshFailure(creds, reason) {
4870
+ if (isTokenDerivedIdentity(creds.email)) {
4871
+ warn(
4872
+ `Could not verify token with ${AUTH_BASE_URL}/auth/me (${reason}). Using token without refreshed account metadata.`
4873
+ );
4874
+ return;
4875
+ }
4876
+ warn(
4877
+ `Could not refresh account metadata from ${AUTH_BASE_URL}/auth/me (${reason}). Using cached credentials.`
4878
+ );
4879
+ }
4019
4880
  function readCredentialsFile() {
4020
4881
  const path = getCredentialsPath();
4021
- if (!existsSync9(path)) {
4882
+ if (!existsSync10(path)) {
4022
4883
  return null;
4023
4884
  }
4024
4885
  try {
4025
- const raw = readFileSync10(path, "utf-8");
4886
+ const raw = readFileSync12(path, "utf-8");
4026
4887
  const parsed = JSON.parse(raw);
4027
4888
  const token = typeof parsed.token === "string" ? parsed.token : typeof parsed.accessToken === "string" ? parsed.accessToken : null;
4028
4889
  if (token === null || parsed.refreshToken !== void 0 && typeof parsed.refreshToken !== "string" || typeof parsed.email !== "string" || !isPlan(parsed.plan) || !Array.isArray(parsed.selectedTwins) || !parsed.selectedTwins.every((value) => typeof value === "string") || typeof parsed.expiresAt !== "number") {
4890
+ warn(
4891
+ `Credentials file at ${path} has missing or invalid fields. Run \`archal login\` to re-authenticate.`
4892
+ );
4029
4893
  return null;
4030
4894
  }
4031
4895
  return {
@@ -4037,8 +4901,31 @@ function readCredentialsFile() {
4037
4901
  expiresAt: parsed.expiresAt
4038
4902
  };
4039
4903
  } catch {
4904
+ warn(
4905
+ `Credentials file at ${path} exists but could not be parsed. Delete it and run \`archal login\` to re-authenticate.`
4906
+ );
4907
+ return null;
4908
+ }
4909
+ }
4910
+ function readCredentialsFromEnv() {
4911
+ const raw = process.env[AUTH_TOKEN_ENV_VAR];
4912
+ if (typeof raw !== "string") {
4913
+ return null;
4914
+ }
4915
+ const token = raw.trim();
4916
+ if (token.length === 0) {
4040
4917
  return null;
4041
4918
  }
4919
+ const nowSeconds = Math.floor(Date.now() / 1e3);
4920
+ return {
4921
+ token,
4922
+ refreshToken: "",
4923
+ email: "(from ARCHAL_TOKEN)",
4924
+ plan: "free",
4925
+ selectedTwins: [],
4926
+ // API keys are opaque and don't carry exp; keep env-provided token usable.
4927
+ expiresAt: getJwtExpiry(token) ?? nowSeconds + ENV_TOKEN_FALLBACK_TTL_SECONDS
4928
+ };
4042
4929
  }
4043
4930
  function getCredentials() {
4044
4931
  const creds = getStoredCredentials();
@@ -4052,7 +4939,7 @@ function getCredentials() {
4052
4939
  return creds;
4053
4940
  }
4054
4941
  function getStoredCredentials() {
4055
- return readCredentialsFile();
4942
+ return readCredentialsFromEnv() ?? readCredentialsFile();
4056
4943
  }
4057
4944
  function saveCredentials(creds) {
4058
4945
  const path = getCredentialsPath();
@@ -4060,15 +4947,11 @@ function saveCredentials(creds) {
4060
4947
  accessToken: creds.token,
4061
4948
  ...creds
4062
4949
  };
4063
- writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", "utf-8");
4064
- try {
4065
- chmodSync2(path, 384);
4066
- } catch {
4067
- }
4950
+ writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
4068
4951
  }
4069
4952
  function deleteCredentials() {
4070
4953
  const path = getCredentialsPath();
4071
- if (!existsSync9(path)) {
4954
+ if (!existsSync10(path)) {
4072
4955
  return false;
4073
4956
  }
4074
4957
  unlinkSync5(path);
@@ -4114,21 +4997,86 @@ function requireAuth(options = {}) {
4114
4997
  process.stderr.write("Tip: archal setup\n");
4115
4998
  process.exit(1);
4116
4999
  }
5000
+ function isCliTokenExchangeResponse(value) {
5001
+ if (!value || typeof value !== "object") return false;
5002
+ const data = value;
5003
+ return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["email"] === "string" && isPlan(data["plan"]) && Array.isArray(data["selectedTwins"]) && data["selectedTwins"].every((item) => typeof item === "string") && typeof data["expiresAt"] === "number";
5004
+ }
5005
+ function isCliRefreshResponse(value) {
5006
+ if (!value || typeof value !== "object") return false;
5007
+ const data = value;
5008
+ return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["expiresAt"] === "number";
5009
+ }
5010
+ async function exchangeCliAuthCode(input) {
5011
+ const response = await fetch(`${AUTH_BASE_URL}/auth/cli/token`, {
5012
+ method: "POST",
5013
+ headers: {
5014
+ "content-type": "application/json",
5015
+ "user-agent": CLI_USER_AGENT
5016
+ },
5017
+ body: JSON.stringify(input),
5018
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
5019
+ });
5020
+ if (!response.ok) {
5021
+ throw new Error(`Login failed during code exchange (${response.status})`);
5022
+ }
5023
+ const payload = await response.json();
5024
+ if (!isCliTokenExchangeResponse(payload)) {
5025
+ throw new Error("Login failed: invalid token exchange response");
5026
+ }
5027
+ return {
5028
+ token: payload.accessToken,
5029
+ refreshToken: payload.refreshToken,
5030
+ email: payload.email,
5031
+ plan: payload.plan,
5032
+ selectedTwins: payload.selectedTwins,
5033
+ expiresAt: payload.expiresAt
5034
+ };
5035
+ }
5036
+ async function refreshCliSession(creds) {
5037
+ if (!creds.refreshToken) {
5038
+ return null;
5039
+ }
5040
+ const response = await fetch(`${AUTH_BASE_URL}/auth/cli/refresh`, {
5041
+ method: "POST",
5042
+ headers: {
5043
+ "content-type": "application/json",
5044
+ "user-agent": CLI_USER_AGENT
5045
+ },
5046
+ body: JSON.stringify({ refreshToken: creds.refreshToken }),
5047
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
5048
+ });
5049
+ if (!response.ok) {
5050
+ return null;
5051
+ }
5052
+ const payload = await response.json();
5053
+ if (!isCliRefreshResponse(payload)) {
5054
+ return null;
5055
+ }
5056
+ return {
5057
+ ...creds,
5058
+ token: payload.accessToken,
5059
+ refreshToken: payload.refreshToken,
5060
+ expiresAt: payload.expiresAt
5061
+ };
5062
+ }
4117
5063
  async function refreshAuthFromServer(creds) {
4118
5064
  try {
4119
5065
  const response = await fetch(`${AUTH_BASE_URL}/auth/me`, {
4120
5066
  method: "GET",
4121
5067
  headers: {
4122
5068
  authorization: `Bearer ${creds.token}`,
4123
- "user-agent": "archal-cli/0.1.0"
5069
+ "user-agent": CLI_USER_AGENT
4124
5070
  },
4125
- signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS2)
5071
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
4126
5072
  });
4127
5073
  if (!response.ok) {
5074
+ logRefreshFailure(creds, `HTTP ${response.status}`);
4128
5075
  return creds;
4129
5076
  }
4130
5077
  const data = await response.json();
4131
5078
  if (typeof data.email !== "string" || !isPlan(data.plan) || !Array.isArray(data.selectedTwins) || !data.selectedTwins.every((value) => typeof value === "string")) {
5079
+ logRefreshFailure(creds, "invalid response payload");
4132
5080
  return creds;
4133
5081
  }
4134
5082
  const updated = {
@@ -4141,7 +5089,9 @@ async function refreshAuthFromServer(creds) {
4141
5089
  saveCredentials(updated);
4142
5090
  }
4143
5091
  return updated;
4144
- } catch {
5092
+ } catch (error2) {
5093
+ const message = error2 instanceof Error ? error2.message : String(error2);
5094
+ logRefreshFailure(creds, message);
4145
5095
  return creds;
4146
5096
  }
4147
5097
  }
@@ -4165,7 +5115,7 @@ function getJwtExpiry(token) {
4165
5115
  }
4166
5116
 
4167
5117
  // src/runner/routing.ts
4168
- import { readFileSync as readFileSync11 } from "fs";
5118
+ import { readFileSync as readFileSync13 } from "fs";
4169
5119
  function isLoopbackUrl(rawUrl) {
4170
5120
  try {
4171
5121
  const parsed = new URL(rawUrl);
@@ -4180,7 +5130,7 @@ function isNonLocalEndpoint(rawUrl) {
4180
5130
  }
4181
5131
  function parseRemoteTwinUrlOverrides(path) {
4182
5132
  if (!path) return void 0;
4183
- const raw = readFileSync11(path, "utf-8");
5133
+ const raw = readFileSync13(path, "utf-8");
4184
5134
  const parsed = JSON.parse(raw);
4185
5135
  const overrides = {};
4186
5136
  for (const [key, value] of Object.entries(parsed)) {
@@ -4202,7 +5152,7 @@ function parseRemoteTwinUrlOverrides(path) {
4202
5152
  }
4203
5153
  function parseApiBaseUrlOverrides(path) {
4204
5154
  if (!path) return void 0;
4205
- const raw = readFileSync11(path, "utf-8");
5155
+ const raw = readFileSync13(path, "utf-8");
4206
5156
  const parsed = JSON.parse(raw);
4207
5157
  const overrides = {};
4208
5158
  for (const [key, value] of Object.entries(parsed)) {
@@ -4260,17 +5210,17 @@ function buildApiRoutingEnv(routing) {
4260
5210
  }
4261
5211
  return env;
4262
5212
  }
4263
- function validateRemoteOpenClawTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
5213
+ function validateRemoteApiEngineTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
4264
5214
  if (!isNonLocalEndpoint(endpointUrl)) return;
4265
5215
  if (!remoteTwinUrlOverrides) {
4266
5216
  throw new Error(
4267
- "Non-local OpenClaw endpoint detected but no remote-reachable twin URL map provided. Use --openclaw-twin-urls <path-to-json> with twin MCP base URLs reachable by the OpenClaw endpoint."
5217
+ "Non-local engine endpoint detected but no remote-reachable twin URL map provided. Use --engine-twin-urls <path-to-json> with twin MCP base URLs reachable by the engine endpoint."
4268
5218
  );
4269
5219
  }
4270
5220
  const missing = requiredTwins.filter((twin) => !remoteTwinUrlOverrides[twin]);
4271
5221
  if (missing.length > 0) {
4272
5222
  throw new Error(
4273
- `Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --openclaw-twin-urls when using a non-local OpenClaw endpoint.`
5223
+ `Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --engine-twin-urls when using a non-local engine endpoint.`
4274
5224
  );
4275
5225
  }
4276
5226
  }
@@ -4304,7 +5254,16 @@ function computeStateDiff(before, after) {
4304
5254
  }
4305
5255
  return diff;
4306
5256
  }
4307
- async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, openclawRemote, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls) {
5257
+ function parsePositiveIntFromEnv(name) {
5258
+ const raw = process.env[name]?.trim();
5259
+ if (!raw) return void 0;
5260
+ const parsed = parseInt(raw, 10);
5261
+ if (Number.isNaN(parsed) || parsed <= 0) {
5262
+ throw new Error(`${name} must be a positive integer when set`);
5263
+ }
5264
+ return parsed;
5265
+ }
5266
+ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, apiEngine, localEngine, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls, apiBearerToken, adminAuth) {
4308
5267
  async function probeHealth(url, timeoutMs) {
4309
5268
  const controller = new AbortController();
4310
5269
  const timer = setTimeout(() => controller.abort(), timeoutMs);
@@ -4335,8 +5294,13 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4335
5294
  try {
4336
5295
  let beforeState;
4337
5296
  if (useCloud) {
5297
+ const hasDynamicSeeds = seedSelections.some((s) => s.seedData);
5298
+ if (hasDynamicSeeds) {
5299
+ progress("Pushing dynamic seeds to cloud twins...");
5300
+ await pushStateToCloud(cloudTwinUrls, seedSelections, apiBearerToken, adminAuth);
5301
+ }
4338
5302
  progress("Fetching seed state from cloud twins...");
4339
- beforeState = await collectStateFromHttp(cloudTwinUrls);
5303
+ beforeState = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
4340
5304
  } else {
4341
5305
  progress("Capturing seed state...");
4342
5306
  const seedResult = await captureSeedState(twinConfigs);
@@ -4363,7 +5327,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4363
5327
  const twinNames = twinConfigs.map((c) => c.twinName);
4364
5328
  const localTwinUrls = twinUrls;
4365
5329
  let effectiveRemoteTwinUrls;
4366
- if (openclawRemote) {
5330
+ if (apiEngine) {
4367
5331
  effectiveRemoteTwinUrls = {};
4368
5332
  for (const twinName of twinNames) {
4369
5333
  const fromOverride = remoteTwinUrlOverrides?.[twinName];
@@ -4375,7 +5339,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4375
5339
  effectiveRemoteTwinUrls[twinName] = resolved;
4376
5340
  }
4377
5341
  }
4378
- if (openclawRemote) {
5342
+ if (apiEngine && !useCloud) {
4379
5343
  for (const [name, url] of Object.entries(localTwinUrls)) {
4380
5344
  const ok = await probeHealth(url, 1500);
4381
5345
  if (!ok) {
@@ -4383,24 +5347,25 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4383
5347
  }
4384
5348
  }
4385
5349
  }
4386
- if (useCloud) {
4387
- for (const [name, url] of Object.entries(cloudTwinUrls)) {
4388
- const ok = await probeHealth(url, 3e3);
4389
- if (!ok) {
4390
- throw new Error(`Cloud twin "${name}" failed health check at ${url}/health`);
4391
- }
4392
- }
4393
- }
4394
- const taskMessage = generateTaskFromScenario(scenario, apiRouting);
5350
+ const baseTaskMessage = generateTaskFromScenario(scenario, apiRouting);
5351
+ const taskMessage = localEngine?.promptContext ? `${localEngine.promptContext}
5352
+
5353
+ ---
5354
+
5355
+ ${baseTaskMessage}` : baseTaskMessage;
5356
+ const engineModel = localEngine?.model ?? apiEngine?.model;
4395
5357
  const effectiveAgentConfig = {
4396
5358
  ...agentConfig,
4397
5359
  env: {
4398
5360
  ...agentConfig.env,
4399
- ...buildApiRoutingEnv(apiRouting)
5361
+ ...buildApiRoutingEnv(apiRouting),
5362
+ ARCHAL_ENGINE_MODE: apiEngine ? "api" : "local",
5363
+ ...engineModel ? { ARCHAL_ENGINE_MODEL: engineModel } : {},
5364
+ ARCHAL_ENGINE_TASK: taskMessage
4400
5365
  }
4401
5366
  };
4402
- let agentResult = openclawRemote ? await executeOpenClawRemote(
4403
- openclawRemote,
5367
+ let agentResult = apiEngine ? await executeOpenClawRemote(
5368
+ apiEngine,
4404
5369
  scenario,
4405
5370
  runId,
4406
5371
  taskMessage,
@@ -4414,7 +5379,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4414
5379
  timeoutSeconds * 1e3,
4415
5380
  { restConfigPath, twinUrls }
4416
5381
  );
4417
- if (!openclawRemote && shouldRetryWithModernOpenClaw(agentResult)) {
5382
+ if (!apiEngine && !localEngine && shouldRetryWithModernOpenClaw(agentResult)) {
4418
5383
  warn(
4419
5384
  "OpenClaw legacy local invocation failed with CLI drift signal; retrying with modern local args"
4420
5385
  );
@@ -4431,8 +5396,8 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4431
5396
  let stateAfter;
4432
5397
  let trace;
4433
5398
  if (useCloud) {
4434
- stateAfter = await collectStateFromHttp(cloudTwinUrls);
4435
- trace = await collectTraceFromHttp(cloudTwinUrls);
5399
+ stateAfter = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
5400
+ trace = await collectTraceFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
4436
5401
  } else {
4437
5402
  if (!twinPaths) {
4438
5403
  throw new Error("Twin paths not initialized");
@@ -4443,7 +5408,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4443
5408
  const diff = computeStateDiff(beforeState, stateAfter);
4444
5409
  cleanupTempFiles(mcpConfigPath, twinPaths ?? {}, seedPaths, runId, twinNames);
4445
5410
  if (agentResult.timedOut) {
4446
- const timeoutDisplay = openclawRemote ? `${(openclawRemote.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
5411
+ const timeoutDisplay = apiEngine ? `${(apiEngine.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
4447
5412
  const durationMs2 = Date.now() - startTime;
4448
5413
  return {
4449
5414
  runIndex,
@@ -4461,6 +5426,9 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4461
5426
  }
4462
5427
  if (agentResult.exitCode !== 0 && agentResult.exitCode !== null) {
4463
5428
  warn(`Agent exited with non-zero code ${agentResult.exitCode} on run ${runIndex + 1}`);
5429
+ if (agentResult.stderr) {
5430
+ debug(`Agent stderr: ${agentResult.stderr.slice(0, 500)}`);
5431
+ }
4464
5432
  }
4465
5433
  progress(`Evaluating run ${runIndex + 1}...`);
4466
5434
  const evaluationResult = await evaluateRun(
@@ -4511,7 +5479,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4511
5479
  for (const paths of Object.values(seedPaths)) {
4512
5480
  for (const file of [paths.stateFile, `${paths.stateFile}.tmp`]) {
4513
5481
  try {
4514
- if (existsSync10(file)) unlinkSync6(file);
5482
+ if (existsSync11(file)) unlinkSync6(file);
4515
5483
  } catch {
4516
5484
  }
4517
5485
  }
@@ -4520,14 +5488,14 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
4520
5488
  if (restConfigPath) {
4521
5489
  for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
4522
5490
  try {
4523
- if (existsSync10(file)) unlinkSync6(file);
5491
+ if (existsSync11(file)) unlinkSync6(file);
4524
5492
  } catch {
4525
5493
  }
4526
5494
  }
4527
5495
  }
4528
5496
  }
4529
5497
  }
4530
- function preflightCheck(scenario, apiKey) {
5498
+ function preflightCheck(scenario, apiKey, model, baseUrl) {
4531
5499
  const errors = [];
4532
5500
  for (const twin of scenario.config.twins) {
4533
5501
  const result = checkTwinAvailability(twin);
@@ -4540,17 +5508,30 @@ function preflightCheck(scenario, apiKey) {
4540
5508
  }
4541
5509
  }
4542
5510
  const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
4543
- if (hasProbabilistic && !apiKey) {
4544
- const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
4545
- errors.push({
4546
- check: "ANTHROPIC_API_KEY",
4547
- message: `Scenario has ${pCount} probabilistic criteria but no API key is configured`,
4548
- detail: "Set via: export ANTHROPIC_API_KEY=sk-ant-... or archal config set evaluator.apiKey <key>"
4549
- });
5511
+ if (hasProbabilistic) {
5512
+ const provider = detectProvider(model);
5513
+ const resolvedKey = resolveProviderApiKey(apiKey, provider);
5514
+ if (provider === "openai-compatible" && !baseUrl) {
5515
+ errors.push({
5516
+ check: "evaluator.baseUrl",
5517
+ message: `Model "${model}" requires a base URL for the OpenAI-compatible endpoint`,
5518
+ detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>"
5519
+ });
5520
+ }
5521
+ if (!resolvedKey) {
5522
+ const envVar = getProviderEnvVar(provider);
5523
+ const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
5524
+ errors.push({
5525
+ check: envVar,
5526
+ message: `Scenario has ${pCount} probabilistic criteria that will be skipped (no API key for ${provider})`,
5527
+ detail: `Set via: export ${envVar}=<your-key> or archal config set evaluator.apiKey <key>`,
5528
+ warning: true
5529
+ });
5530
+ }
4550
5531
  }
4551
5532
  return errors;
4552
5533
  }
4553
- async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
5534
+ async function runRemoteApiEnginePreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
4554
5535
  const runId = `archal-preflight-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
4555
5536
  const twinConfigs = seedSelections.map((sel) => ({
4556
5537
  twinName: sel.twinName,
@@ -4592,14 +5573,14 @@ async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, r
4592
5573
  for (const paths of Object.values(restResult.twinPaths)) {
4593
5574
  for (const file of [paths.stateFile, `${paths.stateFile}.tmp`, paths.traceFile, `${paths.traceFile}.tmp`]) {
4594
5575
  try {
4595
- if (existsSync10(file)) unlinkSync6(file);
5576
+ if (existsSync11(file)) unlinkSync6(file);
4596
5577
  } catch {
4597
5578
  }
4598
5579
  }
4599
5580
  }
4600
5581
  for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
4601
5582
  try {
4602
- if (existsSync10(file)) unlinkSync6(file);
5583
+ if (existsSync11(file)) unlinkSync6(file);
4603
5584
  } catch {
4604
5585
  }
4605
5586
  }
@@ -4622,9 +5603,14 @@ async function runScenario(options) {
4622
5603
  );
4623
5604
  }
4624
5605
  }
4625
- const preflightErrors = preflightCheck(scenario, config.apiKey);
4626
- if (preflightErrors.length > 0) {
4627
- const lines = preflightErrors.map((e) => {
5606
+ const preflightErrors = preflightCheck(scenario, config.apiKey, model, config.baseUrl);
5607
+ const hardErrors = preflightErrors.filter((e) => !e.warning);
5608
+ const warnings = preflightErrors.filter((e) => e.warning);
5609
+ for (const w of warnings) {
5610
+ warn(`${w.check}: ${w.message}${w.detail ? ` (${w.detail})` : ""}`);
5611
+ }
5612
+ if (hardErrors.length > 0) {
5613
+ const lines = hardErrors.map((e) => {
4628
5614
  let line = ` - ${e.check}: ${e.message}`;
4629
5615
  if (e.detail) line += `
4630
5616
  ${e.detail}`;
@@ -4651,7 +5637,7 @@ Run 'archal doctor' for a full system check.`
4651
5637
  }
4652
5638
  seedSelections = overrideSeedSelection(seedSelections, overrides);
4653
5639
  }
4654
- if (config.apiKey && !options.noDynamicSeed) {
5640
+ if (config.geminiApiKey && !options.noDynamicSeed) {
4655
5641
  progress("Generating dynamic seeds from setup description...");
4656
5642
  const baseTwinConfigs = seedSelections.map((sel) => ({
4657
5643
  twinName: sel.twinName,
@@ -4659,8 +5645,8 @@ Run 'archal doctor' for a full system check.`
4659
5645
  }));
4660
5646
  const { beforeState: baseSeedStates } = await captureSeedState(baseTwinConfigs);
4661
5647
  const dynamicConfig = {
4662
- apiKey: config.apiKey,
4663
- model,
5648
+ geminiApiKey: config.geminiApiKey,
5649
+ model: config.seedModel,
4664
5650
  noCache: options.noSeedCache
4665
5651
  };
4666
5652
  for (const sel of seedSelections) {
@@ -4683,24 +5669,28 @@ Run 'archal doctor' for a full system check.`
4683
5669
  sel.seedData = result.seed;
4684
5670
  }
4685
5671
  }
4686
- const scenarioDir = dirname2(resolve5(options.scenarioPath));
5672
+ const scenarioDir = dirname2(resolve7(options.scenarioPath));
4687
5673
  let projectConfigPath;
4688
5674
  for (const dir of [scenarioDir, process.cwd()]) {
4689
- const candidate = resolve5(dir, ".archal.json");
4690
- if (existsSync10(candidate)) {
5675
+ const candidate = resolve7(dir, ".archal.json");
5676
+ if (existsSync11(candidate)) {
4691
5677
  projectConfigPath = candidate;
4692
5678
  break;
4693
5679
  }
4694
5680
  }
4695
- function resolveOpenClawModel(raw) {
4696
- if (!raw || !raw.trim()) return "openclaw:main";
5681
+ function resolveOpenClawModel2(raw) {
5682
+ if (!raw || !raw.trim()) return void 0;
4697
5683
  const value = raw.trim();
4698
5684
  return value.includes(":") ? value : `openclaw:${value}`;
4699
5685
  }
4700
- function resolveOpenClawGatewayToken2(explicitToken) {
5686
+ function resolveEngineToken2(explicitToken) {
4701
5687
  if (explicitToken && explicitToken.trim()) {
4702
5688
  return explicitToken.trim();
4703
5689
  }
5690
+ const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
5691
+ if (engineToken) {
5692
+ return engineToken;
5693
+ }
4704
5694
  const gatewayToken = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
4705
5695
  if (gatewayToken) {
4706
5696
  return gatewayToken;
@@ -4711,42 +5701,124 @@ Run 'archal doctor' for a full system check.`
4711
5701
  }
4712
5702
  return void 0;
4713
5703
  }
4714
- let openclawRemote;
4715
- if (options.openclawUrl) {
4716
- openclawRemote = {
4717
- url: options.openclawUrl,
4718
- token: resolveOpenClawGatewayToken2(options.openclawToken),
4719
- model: resolveOpenClawModel(options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"]),
4720
- timeoutMs: (options.openclawTimeout ?? timeoutSeconds) * 1e3
5704
+ const openclawEndpointAlias = options.openclawUrl ?? process.env["OPENCLAW_URL"];
5705
+ const engineMode = (() => {
5706
+ if (options.engine) {
5707
+ return options.engine;
5708
+ }
5709
+ if (options.engineEndpoint || openclawEndpointAlias || process.env["ARCHAL_ENGINE_ENDPOINT"]) {
5710
+ return "api";
5711
+ }
5712
+ if (options.harnessDir || process.env["ARCHAL_HARNESS_DIR"]) {
5713
+ return "local";
5714
+ }
5715
+ return "legacy";
5716
+ })();
5717
+ const apiEndpoint = options.engineEndpoint ?? openclawEndpointAlias ?? process.env["ARCHAL_ENGINE_ENDPOINT"];
5718
+ const rawOpenClawAgent = options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"];
5719
+ const rawEngineModel = options.engineModel ?? process.env["ARCHAL_ENGINE_MODEL"];
5720
+ const resolvedEngineToken = resolveEngineToken2(options.engineToken ?? options.openclawToken);
5721
+ const harnessDir = options.harnessDir ?? process.env["ARCHAL_HARNESS_DIR"];
5722
+ let apiEngine;
5723
+ if (engineMode === "api") {
5724
+ const apiTimeoutSeconds = options.engineTimeout ?? options.openclawTimeout ?? parsePositiveIntFromEnv("ARCHAL_ENGINE_TIMEOUT") ?? timeoutSeconds;
5725
+ if (!apiEndpoint || !apiEndpoint.trim()) {
5726
+ throw new Error(
5727
+ "API engine mode requires --engine-endpoint (or --openclaw-url for legacy compatibility)."
5728
+ );
5729
+ }
5730
+ if (!Number.isFinite(apiTimeoutSeconds) || apiTimeoutSeconds <= 0) {
5731
+ throw new Error("Engine timeout must be a positive integer number of seconds.");
5732
+ }
5733
+ const resolvedApiModel = rawEngineModel?.trim() || resolveOpenClawModel2(rawOpenClawAgent) || (openclawEndpointAlias ? "openclaw:main" : void 0);
5734
+ if (!resolvedApiModel) {
5735
+ throw new Error(
5736
+ "API engine mode requires --engine-model/ARCHAL_ENGINE_MODEL (or --openclaw-agent/OPENCLAW_AGENT_ID)."
5737
+ );
5738
+ }
5739
+ apiEngine = {
5740
+ url: apiEndpoint.trim(),
5741
+ token: resolvedEngineToken,
5742
+ model: resolvedApiModel,
5743
+ timeoutMs: apiTimeoutSeconds * 1e3,
5744
+ agentId: rawOpenClawAgent?.trim() || void 0
4721
5745
  };
4722
- if (!openclawRemote.token) {
5746
+ if (openclawEndpointAlias && !apiEngine.token) {
4723
5747
  throw new Error(
4724
5748
  "OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
4725
5749
  );
4726
5750
  }
4727
5751
  }
4728
- const remoteTwinUrlOverrides = parseRemoteTwinUrlOverrides(options.openclawTwinUrls);
5752
+ let localEngine;
5753
+ if (engineMode === "local") {
5754
+ if (!harnessDir) {
5755
+ throw new Error(
5756
+ "Local engine mode requires --harness-dir (or ARCHAL_HARNESS_DIR)."
5757
+ );
5758
+ }
5759
+ const resolvedHarness = resolveLocalHarness(harnessDir, rawEngineModel);
5760
+ const resolvedFallbackLocalAgentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath);
5761
+ const fallbackLocalAgentConfig = resolvedFallbackLocalAgentConfig ?? { command: "openclaw", args: [] };
5762
+ if (!resolvedHarness.manifest) {
5763
+ debug(
5764
+ "Harness manifest not found for local mode; using agent command defaults.",
5765
+ { manifestPath: resolvedHarness.manifestPath }
5766
+ );
5767
+ } else if (!resolvedHarness.localCommand) {
5768
+ warn(
5769
+ `Harness manifest at ${resolvedHarness.manifestPath} does not define local.command; falling back to agent command defaults.`
5770
+ );
5771
+ }
5772
+ if (!resolvedHarness.localCommand && !resolvedFallbackLocalAgentConfig) {
5773
+ warn(
5774
+ 'No local command configured via harness manifest/.archal.json/ARCHAL_AGENT_COMMAND; defaulting to "openclaw".'
5775
+ );
5776
+ }
5777
+ const commandConfig = resolvedHarness.localCommand ?? fallbackLocalAgentConfig;
5778
+ localEngine = {
5779
+ model: resolvedHarness.model,
5780
+ command: commandConfig.command,
5781
+ args: commandConfig.args,
5782
+ env: commandConfig.env,
5783
+ cwd: resolvedHarness.harnessDir,
5784
+ promptContext: resolvedHarness.promptContext
5785
+ };
5786
+ }
5787
+ const remoteTwinUrlOverrides = apiEngine ? parseRemoteTwinUrlOverrides(
5788
+ options.engineTwinUrls ?? options.openclawTwinUrls ?? process.env["ARCHAL_ENGINE_TWIN_URLS"]
5789
+ ) : void 0;
4729
5790
  const apiBaseUrlOverrides = parseApiBaseUrlOverrides(options.apiBaseUrls);
4730
5791
  const apiProxyUrl = parseProxyUrl(options.apiProxyUrl ?? process.env["ARCHAL_API_PROXY_URL"]);
4731
5792
  const apiRouting = apiBaseUrlOverrides && Object.keys(apiBaseUrlOverrides).length > 0 || apiProxyUrl ? {
4732
5793
  baseUrls: apiBaseUrlOverrides,
4733
- proxyUrl: apiProxyUrl
5794
+ proxyUrl: apiProxyUrl,
5795
+ bearerToken: options.apiBearerToken,
5796
+ adminToken: options.apiAdminToken,
5797
+ adminUserId: options.apiAdminUserId
4734
5798
  } : void 0;
4735
- const agentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (openclawRemote ? { command: "openclaw", args: [] } : {
5799
+ const agentConfig = localEngine ? {
5800
+ command: localEngine.command,
5801
+ args: localEngine.args,
5802
+ env: localEngine.env,
5803
+ cwd: localEngine.cwd
5804
+ } : options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (apiEngine ? { command: "openclaw", args: [] } : {
4736
5805
  command: process.env["ARCHAL_AGENT_COMMAND"] ?? "echo",
4737
5806
  args: process.env["ARCHAL_AGENT_COMMAND"] ? [] : ["No agent command configured"]
4738
5807
  });
4739
- if (!openclawRemote && agentConfig.command === "echo") {
5808
+ if (!apiEngine && !localEngine && agentConfig.command === "echo") {
4740
5809
  process.stderr.write(
4741
- "Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json or provide --openclaw-url.\n"
5810
+ "Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json, use --engine-endpoint, or run --engine local with --harness-dir.\n"
4742
5811
  );
4743
5812
  }
4744
- if (openclawRemote) {
4745
- info("Remote OpenClaw mode enabled", { url: openclawRemote.url });
5813
+ if (apiEngine) {
5814
+ info("Remote API engine mode enabled", { url: apiEngine.url });
4746
5815
  warn(
4747
- "Remote OpenClaw requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate OpenClaw with Archal or expose twins via a reachable network path."
5816
+ "Remote engine mode requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate the engine with Archal or expose twins via a reachable network path."
4748
5817
  );
4749
- validateRemoteOpenClawTopology(openclawRemote.url, scenario.config.twins, remoteTwinUrlOverrides);
5818
+ validateRemoteApiEngineTopology(apiEngine.url, scenario.config.twins, remoteTwinUrlOverrides);
5819
+ }
5820
+ if (localEngine) {
5821
+ info("Local harness engine mode enabled", { harnessDir: localEngine.cwd });
4750
5822
  }
4751
5823
  if (apiRouting) {
4752
5824
  info("API routing context enabled", {
@@ -4755,18 +5827,18 @@ Run 'archal doctor' for a full system check.`
4755
5827
  });
4756
5828
  }
4757
5829
  if (options.preflightOnly) {
4758
- if (openclawRemote) {
4759
- await runRemoteOpenClawPreflight(
5830
+ if (apiEngine) {
5831
+ await runRemoteApiEnginePreflight(
4760
5832
  scenario,
4761
5833
  seedSelections,
4762
5834
  options.rateLimit,
4763
- openclawRemote,
5835
+ apiEngine,
4764
5836
  remoteTwinUrlOverrides
4765
5837
  );
4766
5838
  }
4767
5839
  info("Preflight checks passed", {
4768
5840
  scenario: scenario.title,
4769
- remoteOpenClaw: openclawRemote ? "enabled" : "disabled"
5841
+ engineMode: apiEngine ? "api" : localEngine ? "local" : "legacy-local"
4770
5842
  });
4771
5843
  return {
4772
5844
  scenarioTitle: scenario.title,
@@ -4786,6 +5858,7 @@ Run 'archal doctor' for a full system check.`
4786
5858
  };
4787
5859
  const runs = [];
4788
5860
  for (let i = 0; i < numRuns; i++) {
5861
+ const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
4789
5862
  const result = await executeSingleRun(
4790
5863
  i,
4791
5864
  scenario,
@@ -4794,10 +5867,13 @@ Run 'archal doctor' for a full system check.`
4794
5867
  evaluatorConfig,
4795
5868
  timeoutSeconds,
4796
5869
  options.rateLimit,
4797
- openclawRemote,
5870
+ apiEngine,
5871
+ localEngine,
4798
5872
  remoteTwinUrlOverrides,
4799
5873
  apiRouting,
4800
- options.cloudTwinUrls
5874
+ options.cloudTwinUrls,
5875
+ options.apiBearerToken,
5876
+ adminAuth
4801
5877
  );
4802
5878
  runs.push(result);
4803
5879
  printRunProgress(i, numRuns, result.overallScore, result.error);
@@ -4836,10 +5912,10 @@ function normalizeBaseUrl(value, fallback) {
4836
5912
  const normalized = trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
4837
5913
  return normalized.length > 0 ? normalized : fallback;
4838
5914
  }
4839
- var DEFAULT_BASE_URL = "https://archal.ai";
5915
+ var DEFAULT_BASE_URL = "https://www.archal.ai";
4840
5916
  var AUTH_BASE_URL2 = normalizeBaseUrl(process.env["ARCHAL_AUTH_URL"] ?? DEFAULT_BASE_URL, DEFAULT_BASE_URL);
4841
5917
  var API_BASE_URL = normalizeBaseUrl(process.env["ARCHAL_API_URL"] ?? AUTH_BASE_URL2, AUTH_BASE_URL2);
4842
- var REQUEST_TIMEOUT_MS3 = 8e3;
5918
+ var REQUEST_TIMEOUT_MS4 = 8e3;
4843
5919
  var RETRYABLE_STATUS_CODES2 = /* @__PURE__ */ new Set([408, 425, 429, 500, 502, 503, 504]);
4844
5920
  var RETRYABLE_NETWORK_CODES = /* @__PURE__ */ new Set([
4845
5921
  "ECONNABORTED",
@@ -4864,7 +5940,7 @@ var MAX_RETRIES2 = parseBoundedInt(process.env["ARCHAL_API_MAX_RETRIES"], 3, 0,
4864
5940
  var RETRY_BASE_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_BASE_MS"], 250, 25, 1e4);
4865
5941
  var RETRY_MAX_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_MAX_MS"], 3e3, RETRY_BASE_DELAY_MS, 2e4);
4866
5942
  function sleep2(ms) {
4867
- return new Promise((resolve11) => setTimeout(resolve11, ms));
5943
+ return new Promise((resolve13) => setTimeout(resolve13, ms));
4868
5944
  }
4869
5945
  function retryDelayMs(attempt, retryAfter) {
4870
5946
  if (retryAfter) {
@@ -4924,13 +6000,30 @@ function isFinalizeEvidencePath(path) {
4924
6000
  }
4925
6001
  return /^\/api\/sessions\/[^/]+\/evidence\/finalize$/.test(pathname);
4926
6002
  }
6003
+ async function tryRefreshToken() {
6004
+ try {
6005
+ const creds = getStoredCredentials();
6006
+ if (!creds || !creds.refreshToken) return null;
6007
+ const refreshed = await refreshCliSession(creds);
6008
+ if (!refreshed) return null;
6009
+ saveCredentials(refreshed);
6010
+ return refreshed.token;
6011
+ } catch {
6012
+ return null;
6013
+ }
6014
+ }
4927
6015
  async function request(method, path, token, body) {
4928
6016
  const url = `${resolveBaseUrl(path)}${path}`;
4929
6017
  const headers = {
4930
6018
  "content-type": "application/json",
4931
- "user-agent": "archal-cli/0.1.0"
6019
+ "user-agent": CLI_USER_AGENT
4932
6020
  };
4933
- if (token) {
6021
+ const runtimeAdminToken = process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"]?.trim();
6022
+ if (runtimeAdminToken) {
6023
+ headers["x-archal-admin-token"] = runtimeAdminToken;
6024
+ headers["x-archal-user-id"] = process.env["ARCHAL_RUNTIME_USER_ID"]?.trim() || "cli-user";
6025
+ headers["x-archal-plan"] = process.env["ARCHAL_RUNTIME_PLAN"]?.trim() || "free";
6026
+ } else if (token) {
4934
6027
  headers["authorization"] = `Bearer ${token}`;
4935
6028
  }
4936
6029
  const isIdempotentFinalize = method === "POST" && isFinalizeEvidencePath(path);
@@ -4938,16 +6031,28 @@ async function request(method, path, token, body) {
4938
6031
  const attempts = retriesAllowed ? MAX_RETRIES2 + 1 : 1;
4939
6032
  let lastError = "request failed";
4940
6033
  let lastOffline = false;
6034
+ let refreshAttempted = false;
4941
6035
  for (let attempt = 1; attempt <= attempts; attempt += 1) {
4942
6036
  try {
4943
6037
  const response = await fetch(url, {
4944
6038
  method,
4945
6039
  headers,
4946
6040
  body: body ? JSON.stringify(body) : void 0,
4947
- signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
6041
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS4)
4948
6042
  });
4949
6043
  if (!response.ok) {
4950
- const text = await response.text().catch(() => "");
6044
+ if (response.status === 401 && token && !refreshAttempted) {
6045
+ refreshAttempted = true;
6046
+ const refreshed = await tryRefreshToken();
6047
+ if (refreshed) {
6048
+ token = refreshed;
6049
+ headers["authorization"] = `Bearer ${token}`;
6050
+ attempt -= 1;
6051
+ continue;
6052
+ }
6053
+ }
6054
+ const rawText = await response.text().catch(() => "");
6055
+ const text = rawText.length > 200 ? rawText.slice(0, 200) + "..." : rawText;
4951
6056
  const retryable = retriesAllowed && attempt < attempts && RETRYABLE_STATUS_CODES2.has(response.status);
4952
6057
  if (retryable) {
4953
6058
  await sleep2(retryDelayMs(attempt, response.headers.get("retry-after")));
@@ -5018,7 +6123,7 @@ function fetchScenarioCatalog(token) {
5018
6123
  return request("GET", "/api/scenarios", token);
5019
6124
  }
5020
6125
 
5021
- // src/commands/twin.ts
6126
+ // src/commands/twins.ts
5022
6127
  import { Command as Command2 } from "commander";
5023
6128
 
5024
6129
  // src/constants.ts
@@ -5045,10 +6150,10 @@ var PLAN_LIMITS = {
5045
6150
  import { createInterface as createInterface2 } from "readline";
5046
6151
  function askLine(question) {
5047
6152
  const rl = createInterface2({ input: process.stdin, output: process.stderr });
5048
- return new Promise((resolve11) => {
6153
+ return new Promise((resolve13) => {
5049
6154
  rl.question(question, (answer) => {
5050
6155
  rl.close();
5051
- resolve11(answer.trim());
6156
+ resolve13(answer.trim());
5052
6157
  });
5053
6158
  });
5054
6159
  }
@@ -5057,8 +6162,7 @@ async function askConfirm(question) {
5057
6162
  return answer.toLowerCase().startsWith("y");
5058
6163
  }
5059
6164
 
5060
- // src/commands/twin.ts
5061
- var runningTwins = /* @__PURE__ */ new Map();
6165
+ // src/commands/twins.ts
5062
6166
  var KNOWN_TWINS2 = [
5063
6167
  { name: "github", package: "@archal/twin-github", description: "GitHub digital twin" },
5064
6168
  { name: "slack", package: "@archal/twin-slack", description: "Slack digital twin" },
@@ -5083,7 +6187,7 @@ async function runInteractiveTwinSelect(token) {
5083
6187
  const marker = currentlySelected.has(twin.id) ? "\x1B[32m\u2713\x1B[0m" : " ";
5084
6188
  const num = String(i + 1).padStart(2);
5085
6189
  process.stderr.write(
5086
- ` ${marker} [${num}] ${twin.name.padEnd(18)} (${twin.toolCount} tools) \u2014 ${twin.description}
6190
+ ` ${marker} [${num}] ${twin.name.padEnd(18)}${twin.toolCount != null ? ` (${twin.toolCount} tools)` : ""} \u2014 ${twin.description}
5087
6191
  `
5088
6192
  );
5089
6193
  }
@@ -5169,7 +6273,7 @@ async function listTwinCatalog() {
5169
6273
  } else {
5170
6274
  status = "\x1B[90m\u2717 not selected\x1B[0m";
5171
6275
  }
5172
- return [twin.name, String(twin.toolCount), twin.description, status];
6276
+ return [twin.name, twin.toolCount != null ? String(twin.toolCount) : "\u2014", twin.description, status];
5173
6277
  });
5174
6278
  table(headers, rows);
5175
6279
  if (isUnlimited) {
@@ -5194,85 +6298,12 @@ async function selectTwinsForPlan() {
5194
6298
  const refreshed = await refreshAuthFromServer(creds);
5195
6299
  saveCredentials(refreshed);
5196
6300
  }
5197
- function createTwinCommand() {
5198
- const cmd = new Command2("twin").description("Manage local digital twin processes (debug/local only)");
5199
- cmd.command("start").description("Start a digital twin process").argument("<name>", "Twin name (e.g., github, slack)").option("--seed <seed>", "Seed name to load", "small-project").option("--port <port>", "Port for REST transport").action((name, opts) => {
5200
- requireAuth({
5201
- action: `start the "${name}" twin`,
5202
- nextCommand: `archal twin start ${name}`
5203
- });
5204
- const knownTwin = KNOWN_TWINS2.find((t) => t.name === name);
5205
- if (!knownTwin) {
5206
- const available = KNOWN_TWINS2.map((t) => t.name).join(", ");
5207
- error(`Unknown twin: "${name}". Available twins: ${available}`);
5208
- process.exit(1);
5209
- }
5210
- if (runningTwins.has(name)) {
5211
- warn(`Twin "${name}" is already running (PID: ${runningTwins.get(name)?.pid ?? "unknown"})`);
5212
- return;
5213
- }
5214
- info("`archal run` uses hosted cloud twins. `archal twin start` is for local debugging only.");
5215
- const args = [knownTwin.package, "--seed", opts.seed, "--transport", "rest"];
5216
- if (opts.port) {
5217
- args.push("--port", opts.port);
5218
- }
5219
- info(`Starting twin: ${name}`, { seed: opts.seed, transport: "rest" });
5220
- const child = spawnMcpStdioProcess({
5221
- command: "npx",
5222
- args
5223
- });
5224
- const pid = child.pid ?? 0;
5225
- runningTwins.set(name, {
5226
- name,
5227
- pid,
5228
- startedAt: (/* @__PURE__ */ new Date()).toISOString(),
5229
- process: child
5230
- });
5231
- child.on("exit", (code) => {
5232
- info(`Twin "${name}" exited`, { code: String(code ?? "unknown") });
5233
- runningTwins.delete(name);
5234
- });
5235
- success(`Twin "${name}" started (PID: ${pid})`);
5236
- });
5237
- cmd.command("stop").description("Stop a running digital twin").argument("<name>", "Twin name to stop").action(async (name) => {
5238
- const twin = runningTwins.get(name);
5239
- if (!twin) {
5240
- error(`Twin "${name}" is not running`);
5241
- const running = Array.from(runningTwins.keys());
5242
- if (running.length > 0) {
5243
- info(`Running twins: ${running.join(", ")}`);
5244
- }
5245
- process.exit(1);
5246
- }
5247
- info(`Stopping twin: ${name}`, { pid: String(twin.pid) });
5248
- await killProcess(twin.process);
5249
- runningTwins.delete(name);
5250
- success(`Twin "${name}" stopped`);
5251
- });
5252
- cmd.command("status").description("Show status of running digital twins").action(() => {
5253
- if (runningTwins.size === 0) {
5254
- info("No twins currently running");
5255
- return;
5256
- }
5257
- const headers = ["Name", "PID", "Started", "Status"];
5258
- const rows = [];
5259
- for (const twin of runningTwins.values()) {
5260
- const isAlive = twin.process.exitCode === null;
5261
- rows.push([
5262
- twin.name,
5263
- String(twin.pid),
5264
- twin.startedAt,
5265
- isAlive ? "running" : `exited (${twin.process.exitCode})`
5266
- ]);
5267
- }
5268
- table(headers, rows);
5269
- });
5270
- cmd.command("list").description("List available digital twins and entitlement status").action(async () => {
5271
- warn("`archal twin list` is deprecated. Use `archal twins list`.");
6301
+ function createTwinsCommand() {
6302
+ const cmd = new Command2("twins").description("Manage twin catalog entitlements");
6303
+ cmd.command("list").description("List available twins and entitlement status").action(async () => {
5272
6304
  await listTwinCatalog();
5273
6305
  });
5274
6306
  cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
5275
- warn("`archal twin select` is deprecated. Use `archal twins select`.");
5276
6307
  await selectTwinsForPlan();
5277
6308
  });
5278
6309
  return cmd;
@@ -5280,7 +6311,13 @@ function createTwinCommand() {
5280
6311
 
5281
6312
  // src/commands/run.ts
5282
6313
  function createRunCommand() {
5283
- const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--openclaw-url <url>", "OpenClaw Gateway URL or /v1/responses endpoint (enables remote OpenClaw mode)").option("--openclaw-token <token>", "Bearer token for OpenClaw Gateway auth").option("--openclaw-agent <id>", "OpenClaw agent/model id for remote mode (e.g. main or openclaw:my-agent)").option("--openclaw-twin-urls <path>", "Path to JSON mapping twin names to remotely reachable MCP base URLs").option("--openclaw-timeout <seconds>", "Timeout for remote OpenClaw HTTP call per run (defaults to run timeout)").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
6314
+ const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--engine-endpoint <url>", "API engine endpoint URL (base URL or /v1/responses)").option("--engine-token <token>", "Bearer token for API engine auth").option(
6315
+ "--engine-model <model>",
6316
+ "Model id for API mode; in local mode this is exported as ARCHAL_ENGINE_MODEL"
6317
+ ).option("--engine-twin-urls <path>", "Path to JSON mapping twin names to remote-reachable MCP base URLs").option("--engine-timeout <seconds>", "Timeout for API engine HTTP call per run (defaults to run timeout)").option(
6318
+ "--harness-dir <path>",
6319
+ "Local agent execution directory (archal-harness.json is optional)"
6320
+ ).option("--openclaw-url <url>", "Deprecated alias for --engine-endpoint").option("--openclaw-token <token>", "Deprecated alias for --engine-token").option("--openclaw-agent <id>", "Deprecated alias for --engine-model").option("--openclaw-twin-urls <path>", "Deprecated alias for --engine-twin-urls").option("--openclaw-timeout <seconds>", "Deprecated alias for --engine-timeout").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
5284
6321
  const required = requireAuth({
5285
6322
  action: "run a scenario",
5286
6323
  nextCommand: `archal run ${scenarioArg}`
@@ -5296,8 +6333,8 @@ function createRunCommand() {
5296
6333
  if (opts.verbose) {
5297
6334
  configureLogger({ verbose: true, level: "debug" });
5298
6335
  }
5299
- const scenarioPath = resolve6(scenarioArg);
5300
- if (!existsSync11(scenarioPath)) {
6336
+ const scenarioPath = resolve8(scenarioArg);
6337
+ if (!existsSync12(scenarioPath)) {
5301
6338
  process.stderr.write(`Error: Scenario file not found: ${scenarioPath}
5302
6339
  `);
5303
6340
  process.exit(1);
@@ -5387,26 +6424,20 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5387
6424
  process.stderr.write("Error: --pass-threshold must be a number between 0 and 100\n");
5388
6425
  process.exit(1);
5389
6426
  }
5390
- if (!opts.openclawUrl) {
5391
- process.stderr.write(
5392
- "Error: --openclaw-url is required. `archal run` now uses cloud transport only.\n"
5393
- );
6427
+ let engine;
6428
+ try {
6429
+ engine = resolveEngineConfig(opts, timeout);
6430
+ } catch (err) {
6431
+ const message = err instanceof Error ? err.message : String(err);
6432
+ process.stderr.write(`Error: ${message}
6433
+ `);
5394
6434
  process.exit(1);
5395
6435
  }
5396
- let openclawTimeout;
5397
- if (opts.openclawTimeout) {
5398
- openclawTimeout = parseInt(opts.openclawTimeout, 10);
5399
- if (Number.isNaN(openclawTimeout) || openclawTimeout <= 0) {
5400
- process.stderr.write("Error: --openclaw-timeout must be a positive integer\n");
5401
- process.exit(1);
5402
- }
5403
- }
5404
- const resolvedOpenClawToken = resolveOpenClawGatewayToken(opts.openclawToken);
5405
- if (opts.openclawUrl && !resolvedOpenClawToken) {
6436
+ if (engine.deprecatedAliasesUsed.length > 0) {
5406
6437
  process.stderr.write(
5407
- "Error: OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD.\n"
6438
+ `Warning: OpenClaw flags are deprecated (${engine.deprecatedAliasesUsed.join(", ")}). Use --engine-* equivalents.
6439
+ `
5408
6440
  );
5409
- process.exit(1);
5410
6441
  }
5411
6442
  {
5412
6443
  const sessionResult = await startSession(credentials.token, {
@@ -5433,9 +6464,9 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5433
6464
  if (!runFailureMessage && Object.keys(endpointRoots).length > 0) {
5434
6465
  cloudTwinUrls = endpointRoots;
5435
6466
  }
5436
- if (!runFailureMessage && opts.openclawUrl && !opts.openclawTwinUrls) {
5437
- generatedTwinUrlMapPath = resolve6(
5438
- `.archal-session-${backendSessionId}-openclaw-twin-urls.json`
6467
+ if (!runFailureMessage && engine.mode === "api" && !engine.twinUrlsPath) {
6468
+ generatedTwinUrlMapPath = resolve8(
6469
+ `.archal-session-${backendSessionId}-engine-twin-urls.json`
5439
6470
  );
5440
6471
  writeFileSync9(
5441
6472
  generatedTwinUrlMapPath,
@@ -5444,7 +6475,7 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5444
6475
  );
5445
6476
  }
5446
6477
  if (!runFailureMessage && !opts.apiBaseUrls && apiBaseUrls && Object.keys(apiBaseUrls).length > 0) {
5447
- generatedApiBaseUrlMapPath = resolve6(
6478
+ generatedApiBaseUrlMapPath = resolve8(
5448
6479
  `.archal-session-${backendSessionId}-api-base-urls.json`
5449
6480
  );
5450
6481
  writeFileSync9(
@@ -5454,15 +6485,34 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5454
6485
  );
5455
6486
  }
5456
6487
  if (!runFailureMessage) {
5457
- const [statusResult, healthResult] = await Promise.all([
5458
- getSessionStatus(credentials.token, backendSessionId),
5459
- getSessionHealth(credentials.token, backendSessionId)
5460
- ]);
5461
- if (!statusResult.ok || !statusResult.data.alive) {
5462
- runFailureMessage = `session not ready (${statusResult.ok ? statusResult.data.status : statusResult.error})`;
6488
+ const SESSION_READY_TIMEOUT_MS = 12e4;
6489
+ const SESSION_POLL_INTERVAL_MS = 3e3;
6490
+ const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
6491
+ let sessionReady = false;
6492
+ while (Date.now() < readyDeadline) {
6493
+ const [statusResult, healthResult] = await Promise.all([
6494
+ getSessionStatus(credentials.token, backendSessionId),
6495
+ getSessionHealth(credentials.token, backendSessionId)
6496
+ ]);
6497
+ if (!statusResult.ok) {
6498
+ runFailureMessage = `session status check failed (${statusResult.error})`;
6499
+ break;
6500
+ }
6501
+ const status = statusResult.data.status;
6502
+ if (status === "failed" || status === "expired" || status === "ended") {
6503
+ runFailureMessage = `session ${status}`;
6504
+ break;
6505
+ }
6506
+ const healthAlive = healthResult.ok && healthResult.data.alive;
6507
+ const statusAlive = statusResult.data.alive || statusResult.data.status === "ready";
6508
+ if (statusAlive && healthAlive) {
6509
+ sessionReady = true;
6510
+ break;
6511
+ }
6512
+ await new Promise((resolve13) => setTimeout(resolve13, SESSION_POLL_INTERVAL_MS));
5463
6513
  }
5464
- if (!runFailureMessage && (!healthResult.ok || !healthResult.data.alive)) {
5465
- runFailureMessage = `session health check failed (${healthResult.ok ? "dead" : healthResult.error})`;
6514
+ if (!sessionReady && !runFailureMessage) {
6515
+ runFailureMessage = "session timed out waiting for twins to become ready";
5466
6516
  }
5467
6517
  }
5468
6518
  } else if (!sessionResult.offline) {
@@ -5482,17 +6532,26 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5482
6532
  output: outputFormat,
5483
6533
  seed: opts.seed,
5484
6534
  rateLimit,
6535
+ engineEndpoint: engine.endpoint,
6536
+ engineToken: engine.token,
6537
+ engineModel: engine.model,
6538
+ engineTwinUrls: generatedTwinUrlMapPath ?? engine.twinUrlsPath,
6539
+ engineTimeout: engine.timeoutSeconds,
6540
+ harnessDir: engine.harnessDir,
5485
6541
  openclawUrl: opts.openclawUrl,
5486
- openclawToken: resolvedOpenClawToken,
6542
+ openclawToken: engine.token,
5487
6543
  openclawAgent: opts.openclawAgent,
5488
6544
  openclawTwinUrls: generatedTwinUrlMapPath ?? opts.openclawTwinUrls,
5489
- openclawTimeout,
6545
+ openclawTimeout: engine.timeoutSeconds,
5490
6546
  apiBaseUrls: generatedApiBaseUrlMapPath ?? opts.apiBaseUrls,
5491
6547
  apiProxyUrl: opts.apiProxyUrl,
5492
6548
  preflightOnly: opts.preflightOnly,
5493
6549
  cloudTwinUrls,
5494
6550
  noDynamicSeed: !opts.dynamicSeed,
5495
- noSeedCache: !opts.seedCache
6551
+ noSeedCache: !opts.seedCache,
6552
+ apiBearerToken: credentials.token,
6553
+ apiAdminToken: process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"],
6554
+ apiAdminUserId: process.env["ARCHAL_RUNTIME_USER_ID"]
5496
6555
  });
5497
6556
  if (!opts.preflightOnly && report.satisfactionScore < passThreshold) {
5498
6557
  runFailureMessage = `Satisfaction score ${report.satisfactionScore.toFixed(1)} is below pass threshold ${passThreshold}`;
@@ -5502,10 +6561,10 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5502
6561
  const message = err instanceof Error ? err.message : String(err);
5503
6562
  runFailureMessage = message;
5504
6563
  } finally {
5505
- if (generatedTwinUrlMapPath && existsSync11(generatedTwinUrlMapPath)) {
6564
+ if (generatedTwinUrlMapPath && existsSync12(generatedTwinUrlMapPath)) {
5506
6565
  unlinkSync7(generatedTwinUrlMapPath);
5507
6566
  }
5508
- if (generatedApiBaseUrlMapPath && existsSync11(generatedApiBaseUrlMapPath)) {
6567
+ if (generatedApiBaseUrlMapPath && existsSync12(generatedApiBaseUrlMapPath)) {
5509
6568
  unlinkSync7(generatedApiBaseUrlMapPath);
5510
6569
  }
5511
6570
  if (backendSessionId) {
@@ -5566,10 +6625,90 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
5566
6625
  });
5567
6626
  return cmd;
5568
6627
  }
5569
- function resolveOpenClawGatewayToken(rawToken) {
6628
+ function resolveEngineConfig(opts, runTimeoutSeconds) {
6629
+ const deprecatedAliasesUsed = collectDeprecatedAliases(opts);
6630
+ const mode = resolveEngineMode(opts);
6631
+ const openclawEndpointAlias = firstNonEmpty(opts.openclawUrl, process.env["OPENCLAW_URL"]);
6632
+ const endpoint = firstNonEmpty(
6633
+ opts.engineEndpoint,
6634
+ openclawEndpointAlias,
6635
+ process.env["ARCHAL_ENGINE_ENDPOINT"]
6636
+ );
6637
+ const token = resolveEngineToken(firstNonEmpty(opts.engineToken, opts.openclawToken));
6638
+ const openclawModel = resolveOpenClawModel(firstNonEmpty(opts.openclawAgent, process.env["OPENCLAW_AGENT_ID"]));
6639
+ const model = firstNonEmpty(
6640
+ opts.engineModel,
6641
+ process.env["ARCHAL_ENGINE_MODEL"],
6642
+ openclawModel,
6643
+ // Legacy OpenClaw alias path keeps the historical default model for compatibility.
6644
+ openclawEndpointAlias ? "openclaw:main" : void 0
6645
+ );
6646
+ const timeoutInput = firstNonEmpty(
6647
+ opts.engineTimeout,
6648
+ opts.openclawTimeout,
6649
+ process.env["ARCHAL_ENGINE_TIMEOUT"]
6650
+ );
6651
+ const timeoutSeconds = mode === "api" ? parsePositiveInteger(timeoutInput, "--engine-timeout") ?? runTimeoutSeconds : runTimeoutSeconds;
6652
+ const twinUrlsPath = firstNonEmpty(
6653
+ opts.engineTwinUrls,
6654
+ opts.openclawTwinUrls,
6655
+ process.env["ARCHAL_ENGINE_TWIN_URLS"]
6656
+ );
6657
+ const harnessDir = firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"]);
6658
+ if (mode === "api") {
6659
+ if (!model) {
6660
+ throw new Error(
6661
+ "--engine-model is required for API mode (or use --openclaw-agent/OPENCLAW_AGENT_ID)."
6662
+ );
6663
+ }
6664
+ if (openclawEndpointAlias && !token) {
6665
+ throw new Error(
6666
+ "OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
6667
+ );
6668
+ }
6669
+ }
6670
+ return {
6671
+ mode,
6672
+ endpoint,
6673
+ token,
6674
+ model,
6675
+ twinUrlsPath,
6676
+ timeoutSeconds,
6677
+ harnessDir,
6678
+ deprecatedAliasesUsed
6679
+ };
6680
+ }
6681
+ function resolveEngineMode(opts) {
6682
+ if (firstNonEmpty(
6683
+ opts.engineEndpoint,
6684
+ opts.openclawUrl,
6685
+ process.env["ARCHAL_ENGINE_ENDPOINT"],
6686
+ process.env["OPENCLAW_URL"]
6687
+ )) {
6688
+ return "api";
6689
+ }
6690
+ if (firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"])) {
6691
+ return "local";
6692
+ }
6693
+ throw new Error(
6694
+ "No agent execution mode configured. Provide --engine-endpoint for remote agent execution, or --harness-dir for local agent execution."
6695
+ );
6696
+ }
6697
+ function resolveOpenClawModel(raw) {
6698
+ if (!raw || !raw.trim()) {
6699
+ return void 0;
6700
+ }
6701
+ const value = raw.trim();
6702
+ return value.includes(":") ? value : `openclaw:${value}`;
6703
+ }
6704
+ function resolveEngineToken(rawToken) {
5570
6705
  if (rawToken && rawToken.trim()) {
5571
6706
  return rawToken.trim();
5572
6707
  }
6708
+ const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
6709
+ if (engineToken) {
6710
+ return engineToken;
6711
+ }
5573
6712
  const token = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
5574
6713
  if (token) {
5575
6714
  return token;
@@ -5580,11 +6719,36 @@ function resolveOpenClawGatewayToken(rawToken) {
5580
6719
  }
5581
6720
  return void 0;
5582
6721
  }
6722
+ function firstNonEmpty(...values) {
6723
+ for (const value of values) {
6724
+ if (value && value.trim()) {
6725
+ return value.trim();
6726
+ }
6727
+ }
6728
+ return void 0;
6729
+ }
6730
+ function parsePositiveInteger(raw, flagName) {
6731
+ if (!raw) return void 0;
6732
+ const parsed = parseInt(raw, 10);
6733
+ if (Number.isNaN(parsed) || parsed <= 0) {
6734
+ throw new Error(`${flagName} must be a positive integer`);
6735
+ }
6736
+ return parsed;
6737
+ }
6738
+ function collectDeprecatedAliases(opts) {
6739
+ const aliases = [];
6740
+ if (opts.openclawUrl) aliases.push("--openclaw-url");
6741
+ if (opts.openclawToken) aliases.push("--openclaw-token");
6742
+ if (opts.openclawAgent) aliases.push("--openclaw-agent");
6743
+ if (opts.openclawTwinUrls) aliases.push("--openclaw-twin-urls");
6744
+ if (opts.openclawTimeout) aliases.push("--openclaw-timeout");
6745
+ return aliases;
6746
+ }
5583
6747
 
5584
6748
  // src/commands/init.ts
5585
6749
  import { Command as Command4 } from "commander";
5586
- import { existsSync as existsSync12, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
5587
- import { join as join9, resolve as resolve7 } from "path";
6750
+ import { existsSync as existsSync13, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
6751
+ import { join as join9, resolve as resolve9 } from "path";
5588
6752
  var SAMPLE_SCENARIO = `# Close Stale Issues
5589
6753
 
5590
6754
  ## Setup
@@ -5759,7 +6923,7 @@ var SAMPLE_PACKAGE_JSON = `{
5759
6923
  }
5760
6924
  `;
5761
6925
  function writeIfMissing(filePath, content) {
5762
- if (!existsSync12(filePath)) {
6926
+ if (!existsSync13(filePath)) {
5763
6927
  writeFileSync10(filePath, content);
5764
6928
  info(`Created ${filePath}`);
5765
6929
  } else {
@@ -5768,8 +6932,8 @@ function writeIfMissing(filePath, content) {
5768
6932
  }
5769
6933
  function createInitCommand() {
5770
6934
  const cmd = new Command4("init").description("Initialize an Archal test directory with sample scenario and agent").argument("[directory]", "Directory to initialize", "archal").action((directory) => {
5771
- const targetDir = resolve7(directory);
5772
- if (existsSync12(targetDir)) {
6935
+ const targetDir = resolve9(directory);
6936
+ if (existsSync13(targetDir)) {
5773
6937
  warn(`Directory already exists: ${targetDir}`);
5774
6938
  warn("Skipping files that already exist.");
5775
6939
  } else {
@@ -5792,23 +6956,10 @@ function createInitCommand() {
5792
6956
  return cmd;
5793
6957
  }
5794
6958
 
5795
- // src/commands/twins.ts
5796
- import { Command as Command5 } from "commander";
5797
- function createTwinsCommand() {
5798
- const cmd = new Command5("twins").description("Manage twin catalog entitlements");
5799
- cmd.command("list").description("List available twins and entitlement status").action(async () => {
5800
- await listTwinCatalog();
5801
- });
5802
- cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
5803
- await selectTwinsForPlan();
5804
- });
5805
- return cmd;
5806
- }
5807
-
5808
6959
  // src/commands/scenario.ts
5809
- import { Command as Command6 } from "commander";
5810
- import { existsSync as existsSync13, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
5811
- import { resolve as resolve8, join as join10, extname } from "path";
6960
+ import { Command as Command5 } from "commander";
6961
+ import { existsSync as existsSync14, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
6962
+ import { resolve as resolve10, join as join10, extname, relative } from "path";
5812
6963
  var SCENARIO_TEMPLATE = `# {{NAME}}
5813
6964
 
5814
6965
  ## Setup
@@ -5834,15 +6985,15 @@ timeout: 120
5834
6985
  runs: 5
5835
6986
  `;
5836
6987
  var SCENARIO_DIR_CANDIDATES = [
5837
- resolve8("scenarios"),
5838
- resolve8("scenario"),
5839
- resolve8("test", "scenarios"),
5840
- resolve8("tests", "scenarios"),
5841
- resolve8(".archal", "scenarios")
6988
+ resolve10("scenarios"),
6989
+ resolve10("scenario"),
6990
+ resolve10("test", "scenarios"),
6991
+ resolve10("tests", "scenarios"),
6992
+ resolve10(".archal", "scenarios")
5842
6993
  ];
5843
6994
  function findScenarioFiles(dir) {
5844
6995
  const files = [];
5845
- if (!existsSync13(dir)) return files;
6996
+ if (!existsSync14(dir)) return files;
5846
6997
  const entries = readdirSync3(dir, { withFileTypes: true });
5847
6998
  for (const entry of entries) {
5848
6999
  const fullPath = join10(dir, entry.name);
@@ -5856,22 +7007,19 @@ function findScenarioFiles(dir) {
5856
7007
  }
5857
7008
  function findLocalScenariosDir() {
5858
7009
  for (const candidate of SCENARIO_DIR_CANDIDATES) {
5859
- if (existsSync13(candidate)) {
7010
+ if (existsSync14(candidate)) {
5860
7011
  return { dir: candidate, candidates: SCENARIO_DIR_CANDIDATES };
5861
7012
  }
5862
7013
  }
5863
7014
  return {
5864
- dir: resolve8("scenarios"),
7015
+ dir: resolve10("scenarios"),
5865
7016
  candidates: SCENARIO_DIR_CANDIDATES
5866
7017
  };
5867
7018
  }
5868
7019
  function toDisplayPath(path) {
5869
- const cwd = resolve8(".");
5870
- if (path === cwd) return ".";
5871
- if (path.startsWith(`${cwd}/`)) {
5872
- return `.${path.slice(cwd.length)}`;
5873
- }
5874
- return path;
7020
+ const rel = relative(resolve10("."), path);
7021
+ if (!rel) return ".";
7022
+ return rel.startsWith("..") ? path : rel;
5875
7023
  }
5876
7024
  function getCachedScenariosDir() {
5877
7025
  return join10(ensureArchalDir(), "scenarios");
@@ -5897,14 +7045,14 @@ async function syncRemoteScenarios(token) {
5897
7045
  return scenarios;
5898
7046
  }
5899
7047
  function createScenarioCommand() {
5900
- const cmd = new Command6("scenario").description("Manage test scenarios");
7048
+ const cmd = new Command5("scenario").description("Manage test scenarios");
5901
7049
  cmd.command("list").description("List available scenarios").option("-d, --dir <directory>", "Scenario directory to search").option("--local", "Only show local scenarios (skip remote fetch)").action(async (opts) => {
5902
7050
  const creds = getCredentials();
5903
7051
  const headers = ["Scenario", "Source", "Criteria", "Twins"];
5904
7052
  const rows = [];
5905
- const localResolution = opts.dir ? { dir: resolve8(opts.dir), candidates: [resolve8(opts.dir)] } : findLocalScenariosDir();
7053
+ const localResolution = opts.dir ? { dir: resolve10(opts.dir), candidates: [resolve10(opts.dir)] } : findLocalScenariosDir();
5906
7054
  const localDir = localResolution.dir;
5907
- if (existsSync13(localDir)) {
7055
+ if (existsSync14(localDir)) {
5908
7056
  const localFiles = findScenarioFiles(localDir);
5909
7057
  let hiddenCount = 0;
5910
7058
  for (const file of localFiles) {
@@ -5917,7 +7065,7 @@ function createScenarioCommand() {
5917
7065
  continue;
5918
7066
  }
5919
7067
  }
5920
- const relativePath = file.replace(resolve8(".") + "\\", "").replace(resolve8(".") + "/", "");
7068
+ const relativePath = relative(resolve10("."), file);
5921
7069
  rows.push([
5922
7070
  scenario.title,
5923
7071
  relativePath,
@@ -5926,7 +7074,7 @@ function createScenarioCommand() {
5926
7074
  ]);
5927
7075
  } catch (err) {
5928
7076
  const message = err instanceof Error ? err.message : String(err);
5929
- const relativePath = file.replace(resolve8(".") + "\\", "").replace(resolve8(".") + "/", "");
7077
+ const relativePath = relative(resolve10("."), file);
5930
7078
  rows.push([`(parse error)`, relativePath, "-", message]);
5931
7079
  }
5932
7080
  }
@@ -5971,8 +7119,8 @@ function createScenarioCommand() {
5971
7119
  Found ${rows.length} scenario(s)`);
5972
7120
  });
5973
7121
  cmd.command("validate").description("Parse and validate a scenario file").argument("<file>", "Path to scenario markdown file").action((file) => {
5974
- const filePath = resolve8(file);
5975
- if (!existsSync13(filePath)) {
7122
+ const filePath = resolve10(file);
7123
+ if (!existsSync14(filePath)) {
5976
7124
  error(`File not found: ${filePath}`);
5977
7125
  process.exit(1);
5978
7126
  }
@@ -6014,14 +7162,14 @@ Found ${rows.length} scenario(s)`);
6014
7162
  info("Run `archal twins select` to change your selection or `archal upgrade` to unlock all twins.");
6015
7163
  process.exit(1);
6016
7164
  }
6017
- const scenariosDir = opts.dir ? resolve8(opts.dir) : findLocalScenariosDir().dir;
6018
- if (!existsSync13(scenariosDir)) {
7165
+ const scenariosDir = opts.dir ? resolve10(opts.dir) : findLocalScenariosDir().dir;
7166
+ if (!existsSync14(scenariosDir)) {
6019
7167
  mkdirSync7(scenariosDir, { recursive: true });
6020
7168
  info(`Created scenarios directory: ${scenariosDir}`);
6021
7169
  }
6022
7170
  const fileName = name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "") + ".md";
6023
7171
  const filePath = join10(scenariosDir, fileName);
6024
- if (existsSync13(filePath)) {
7172
+ if (existsSync14(filePath)) {
6025
7173
  error(`Scenario file already exists: ${filePath}`);
6026
7174
  process.exit(1);
6027
7175
  }
@@ -6038,9 +7186,9 @@ Found ${rows.length} scenario(s)`);
6038
7186
 
6039
7187
  // src/commands/trace.ts
6040
7188
  import { writeFileSync as writeFileSync12 } from "fs";
6041
- import { resolve as resolve9 } from "path";
7189
+ import { resolve as resolve11 } from "path";
6042
7190
  import { createInterface as createInterface3 } from "readline";
6043
- import { Command as Command7 } from "commander";
7191
+ import { Command as Command6 } from "commander";
6044
7192
  function formatTimestamp2(iso) {
6045
7193
  try {
6046
7194
  return new Date(iso).toLocaleString();
@@ -6063,10 +7211,10 @@ var TRACE_HEADERS = ["ID", "Scenario", "Score", "Runs", "Entries", "Timestamp"];
6063
7211
  function confirmPrompt(message) {
6064
7212
  if (!process.stdin.isTTY) return Promise.resolve(false);
6065
7213
  const rl = createInterface3({ input: process.stdin, output: process.stderr });
6066
- return new Promise((resolve11) => {
7214
+ return new Promise((resolve13) => {
6067
7215
  rl.question(`${message} [y/N] `, (answer) => {
6068
7216
  rl.close();
6069
- resolve11(answer.trim().toLowerCase() === "y");
7217
+ resolve13(answer.trim().toLowerCase() === "y");
6070
7218
  });
6071
7219
  });
6072
7220
  }
@@ -6079,7 +7227,7 @@ function parsePositiveInt(val, flag) {
6079
7227
  return n;
6080
7228
  }
6081
7229
  function createTraceCommand() {
6082
- const cmd = new Command7("trace").description("Inspect, search, and manage run traces");
7230
+ const cmd = new Command6("trace").description("Inspect, search, and manage run traces");
6083
7231
  cmd.command("list").description("List recent traces").option("-n, --limit <count>", "Number of traces to show", "20").action((opts) => {
6084
7232
  const traces = listTraces(parsePositiveInt(opts.limit, "--limit"));
6085
7233
  if (traces.length === 0) {
@@ -6183,7 +7331,7 @@ ${traces.length} trace(s) found`);
6183
7331
  process.exit(1);
6184
7332
  }
6185
7333
  if (opts.output) {
6186
- const outPath = resolve9(opts.output);
7334
+ const outPath = resolve11(opts.output);
6187
7335
  writeFileSync12(outPath, json, "utf-8");
6188
7336
  info(`Trace exported to: ${outPath}`);
6189
7337
  } else {
@@ -6260,10 +7408,10 @@ ${traces.length} trace(s) found`);
6260
7408
  }
6261
7409
 
6262
7410
  // src/commands/config.ts
6263
- import { existsSync as existsSync14, unlinkSync as unlinkSync8 } from "fs";
6264
- import { Command as Command8 } from "commander";
7411
+ import { existsSync as existsSync15, unlinkSync as unlinkSync8 } from "fs";
7412
+ import { Command as Command7 } from "commander";
6265
7413
  function createConfigCommand() {
6266
- const cmd = new Command8("config").description("Manage Archal configuration");
7414
+ const cmd = new Command7("config").description("Manage Archal configuration");
6267
7415
  cmd.command("show").description("Print current configuration").option("--json", "Output as JSON").action((opts) => {
6268
7416
  const display = getConfigDisplay();
6269
7417
  if (opts.json) {
@@ -6279,6 +7427,11 @@ function createConfigCommand() {
6279
7427
  model: evaluator["model"] ?? "(not set)",
6280
7428
  apiKey: evaluator["apiKey"] ?? "(not set)"
6281
7429
  });
7430
+ const seedGen = display["seedGeneration"];
7431
+ printConfigSection("Seed Generation", {
7432
+ model: seedGen["model"] ?? "(not set)",
7433
+ geminiApiKey: seedGen["geminiApiKey"] ?? "(not set)"
7434
+ });
6282
7435
  const defaults = display["defaults"];
6283
7436
  printConfigSection("Defaults", {
6284
7437
  runs: String(defaults["runs"]),
@@ -6291,12 +7444,16 @@ function createConfigCommand() {
6291
7444
  });
6292
7445
  process.stdout.write("\n");
6293
7446
  info("Set values with: archal config set <key> <value>");
6294
- info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout");
7447
+ info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout");
6295
7448
  });
6296
7449
  cmd.command("set").description("Set a configuration value").argument("<key>", "Configuration key (e.g., evaluator.model, defaults.runs)").argument("<value>", "Value to set").action((key, value) => {
6297
7450
  try {
6298
7451
  setConfigValue(key, value);
6299
7452
  success(`Set ${key} = ${key.includes("apiKey") ? "***" : value}`);
7453
+ if (key.includes("apiKey") && !value.startsWith("env:")) {
7454
+ warn("API key stored in plaintext in config file. Consider using env: prefix instead:");
7455
+ info(` archal config set ${key} env:YOUR_ENV_VAR_NAME`);
7456
+ }
6300
7457
  } catch (err) {
6301
7458
  const message = err instanceof Error ? err.message : String(err);
6302
7459
  error(message);
@@ -6306,7 +7463,7 @@ function createConfigCommand() {
6306
7463
  cmd.command("init").description("Create default configuration file").option("--force", "Overwrite existing config").action((opts) => {
6307
7464
  const configPath = getConfigPath();
6308
7465
  if (opts.force) {
6309
- if (existsSync14(configPath)) {
7466
+ if (existsSync15(configPath)) {
6310
7467
  unlinkSync8(configPath);
6311
7468
  }
6312
7469
  }
@@ -6316,7 +7473,7 @@ function createConfigCommand() {
6316
7473
  info("\nNext steps:");
6317
7474
  info(" 1. Set your API key:");
6318
7475
  info(" archal config set evaluator.apiKey your-key-here");
6319
- info(" or set ANTHROPIC_API_KEY environment variable");
7476
+ info(" or set GEMINI_API_KEY environment variable (default provider)");
6320
7477
  info("");
6321
7478
  info(" 2. Create a scenario:");
6322
7479
  info(" archal scenario create my-first-test");
@@ -6345,31 +7502,33 @@ function printConfigSection(name, values) {
6345
7502
  }
6346
7503
 
6347
7504
  // src/commands/demo.ts
6348
- import { Command as Command9 } from "commander";
6349
- import { existsSync as existsSync15 } from "fs";
6350
- import { resolve as resolve10, dirname as dirname4 } from "path";
6351
- import { fileURLToPath as fileURLToPath4 } from "url";
7505
+ import { Command as Command8 } from "commander";
7506
+ import { existsSync as existsSync16 } from "fs";
7507
+ import { resolve as resolve12, dirname as dirname4 } from "path";
7508
+ import { fileURLToPath as fileURLToPath5 } from "url";
6352
7509
  import { createRequire as createRequire4 } from "module";
6353
- var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
7510
+ var __dirname5 = fileURLToPath5(new URL(".", import.meta.url));
6354
7511
  function resolveDemoDir() {
6355
- const monorepoDemoDir = resolve10(__dirname4, "..", "demo");
6356
- if (existsSync15(resolve10(monorepoDemoDir, "scenario.md"))) {
6357
- return monorepoDemoDir;
7512
+ const demoDir = resolve12(__dirname5, "..", "demo");
7513
+ if (existsSync16(resolve12(demoDir, "scenario.md"))) {
7514
+ return demoDir;
6358
7515
  }
6359
7516
  try {
6360
7517
  const require2 = createRequire4(import.meta.url);
6361
7518
  const cliMain = require2.resolve("@archal/cli");
6362
7519
  const pkgDir = dirname4(dirname4(cliMain));
6363
- const npmDemoDir = resolve10(pkgDir, "demo");
6364
- if (existsSync15(resolve10(npmDemoDir, "scenario.md"))) {
7520
+ const npmDemoDir = resolve12(pkgDir, "demo");
7521
+ if (existsSync16(resolve12(npmDemoDir, "scenario.md"))) {
6365
7522
  return npmDemoDir;
6366
7523
  }
6367
7524
  } catch {
6368
7525
  }
6369
- throw new Error("Demo files not found. Ensure @archal/cli is installed correctly.");
7526
+ throw new Error(
7527
+ "Demo files not found. Ensure @archal/cli is installed correctly.\nIf installed globally, try reinstalling: npm install -g @archal/cli"
7528
+ );
6370
7529
  }
6371
7530
  function createDemoCommand() {
6372
- const cmd = new Command9("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
7531
+ const cmd = new Command8("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
6373
7532
  if (opts.quiet) {
6374
7533
  configureLogger({ quiet: true });
6375
7534
  }
@@ -6377,9 +7536,9 @@ function createDemoCommand() {
6377
7536
  configureLogger({ verbose: true, level: "debug" });
6378
7537
  }
6379
7538
  const demoDir = resolveDemoDir();
6380
- const scenarioPath = resolve10(demoDir, "scenario.md");
6381
- const goodAgentPath = resolve10(demoDir, "good-agent.mjs");
6382
- const badAgentPath = resolve10(demoDir, "bad-agent.mjs");
7539
+ const scenarioPath = resolve12(demoDir, "scenario.md");
7540
+ const goodAgentPath = resolve12(demoDir, "good-agent.mjs");
7541
+ const badAgentPath = resolve12(demoDir, "bad-agent.mjs");
6383
7542
  process.stderr.write("\n\x1B[36m\x1B[1marchal demo\x1B[0m \x1B[2m\u2014 same scenario, two agents\x1B[0m\n\n");
6384
7543
  process.stderr.write("\x1B[1m\x1B[32m\u25B8 Good agent\x1B[0m \x1B[2m(checks labels, skips keep-open)\x1B[0m\n");
6385
7544
  const goodReport = await runScenario({
@@ -6412,100 +7571,194 @@ function createDemoCommand() {
6412
7571
  }
6413
7572
 
6414
7573
  // src/commands/login.ts
6415
- import { Command as Command10 } from "commander";
7574
+ import { Command as Command9 } from "commander";
6416
7575
  import { exec } from "child_process";
6417
- import { randomBytes } from "crypto";
7576
+ import { createHash as createHash3, randomBytes } from "crypto";
6418
7577
  import { createServer } from "http";
6419
- var AUTH_BASE_URL3 = process.env["ARCHAL_AUTH_URL"] ?? "https://archal.ai";
7578
+ function normalizeAuthUrl2(value) {
7579
+ const trimmed = value.trim().replace(/\/+$/, "");
7580
+ return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
7581
+ }
7582
+ var AUTH_BASE_URL3 = normalizeAuthUrl2(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
6420
7583
  var START_PORT = 51423;
6421
7584
  var LOGIN_TIMEOUT_MS = 5 * 60 * 1e3;
7585
+ var TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
7586
+ function escapeHtml(value) {
7587
+ return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
7588
+ }
6422
7589
  function openBrowser(url) {
6423
7590
  const platform = process.platform;
6424
7591
  const command = platform === "darwin" ? `open "${url}"` : platform === "win32" ? `start "" "${url}"` : `xdg-open "${url}"`;
6425
- exec(command, () => {
7592
+ exec(command, (err) => {
7593
+ if (err) {
7594
+ info("Could not open browser automatically.");
7595
+ info(`Please visit the URL above manually to complete login.`);
7596
+ }
6426
7597
  });
6427
7598
  }
7599
+ function createPkcePair() {
7600
+ const codeVerifier = randomBytes(32).toString("base64url");
7601
+ const codeChallenge = createHash3("sha256").update(codeVerifier).digest("base64url");
7602
+ return { codeVerifier, codeChallenge };
7603
+ }
7604
+ function isPlan2(value) {
7605
+ return value === "free" || value === "pro" || value === "enterprise";
7606
+ }
7607
+ function credentialsFromApiToken(token) {
7608
+ const nowSeconds = Math.floor(Date.now() / 1e3);
7609
+ return {
7610
+ token,
7611
+ refreshToken: "",
7612
+ email: "(from token)",
7613
+ plan: "free",
7614
+ selectedTwins: [],
7615
+ expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
7616
+ };
7617
+ }
7618
+ function credentialsFromLegacyCallback(requestUrl) {
7619
+ const token = requestUrl.searchParams.get("token") ?? requestUrl.searchParams.get("access_token");
7620
+ const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
7621
+ const email = requestUrl.searchParams.get("email");
7622
+ const planParam = requestUrl.searchParams.get("plan");
7623
+ const twins = requestUrl.searchParams.get("twins");
7624
+ if (!token || !email || !isPlan2(planParam)) {
7625
+ return null;
7626
+ }
7627
+ const nowSeconds = Math.floor(Date.now() / 1e3);
7628
+ return {
7629
+ token,
7630
+ refreshToken,
7631
+ email,
7632
+ plan: planParam,
7633
+ selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
7634
+ expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
7635
+ };
7636
+ }
6428
7637
  function findFreePort(startPort) {
6429
- return new Promise((resolve11, reject) => {
7638
+ return new Promise((resolve13, reject) => {
6430
7639
  const server = createServer();
6431
7640
  server.listen(startPort, "127.0.0.1", () => {
6432
7641
  const address = server.address();
6433
7642
  const port = typeof address === "object" && address ? address.port : startPort;
6434
- server.close(() => resolve11(port));
7643
+ server.close(() => resolve13(port));
6435
7644
  });
6436
7645
  server.on("error", () => {
6437
7646
  if (startPort < START_PORT + 100) {
6438
- findFreePort(startPort + 1).then(resolve11).catch(reject);
7647
+ findFreePort(startPort + 1).then(resolve13).catch(reject);
6439
7648
  } else {
6440
- reject(new Error("Could not find a free localhost callback port"));
7649
+ reject(new Error(
7650
+ "Could not find a free localhost callback port (tried ports 51423-51523).\nTry closing other services, or use token login: archal login --token <your-token>"
7651
+ ));
6441
7652
  }
6442
7653
  });
6443
7654
  });
6444
7655
  }
6445
7656
  function createLoginCommand() {
6446
- return new Command10("login").description("Log in via archal.ai browser auth").action(async () => {
7657
+ return new Command9("login").description("Log in via archal.ai browser auth").option("--no-browser", "Do not automatically open the login URL in a browser").option("--token <token>", "Use an API key/token directly (CI/service fallback)").action(async (opts) => {
7658
+ const directToken = opts.token?.trim();
7659
+ if (directToken) {
7660
+ let credentials = credentialsFromApiToken(directToken);
7661
+ credentials = await refreshAuthFromServer(credentials);
7662
+ saveCredentials(credentials);
7663
+ success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
7664
+ return;
7665
+ }
6447
7666
  const port = await findFreePort(START_PORT);
6448
7667
  const state = randomBytes(16).toString("hex");
6449
7668
  const redirectUrl = `http://localhost:${port}/callback`;
6450
- const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}`;
7669
+ const { codeVerifier, codeChallenge } = createPkcePair();
7670
+ const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}&code_challenge=${encodeURIComponent(codeChallenge)}&code_challenge_method=S256`;
6451
7671
  info("Opening browser for authentication...");
6452
7672
  info(`If your browser does not open, visit:
6453
7673
  ${authUrl}`);
6454
- openBrowser(authUrl);
6455
- await new Promise((resolve11, reject) => {
6456
- const server = createServer((req, res) => {
6457
- const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
6458
- if (requestUrl.pathname !== "/callback") {
6459
- res.writeHead(404);
6460
- res.end("Not found");
6461
- return;
6462
- }
6463
- const returnedState = requestUrl.searchParams.get("state");
6464
- if (returnedState !== state) {
6465
- res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
6466
- res.end("<h1>Login failed</h1><p>State mismatch.</p>");
6467
- server.close();
6468
- reject(new Error("State mismatch in callback"));
7674
+ if (opts.browser !== false) {
7675
+ openBrowser(authUrl);
7676
+ }
7677
+ await new Promise((resolve13, reject) => {
7678
+ let settled = false;
7679
+ const settleResolve = () => {
7680
+ if (settled) return;
7681
+ settled = true;
7682
+ resolve13();
7683
+ };
7684
+ const settleReject = (error2) => {
7685
+ if (settled) return;
7686
+ settled = true;
7687
+ reject(error2);
7688
+ };
7689
+ function closeAndResolve() {
7690
+ if (!server.listening) {
7691
+ settleResolve();
6469
7692
  return;
6470
7693
  }
6471
- const token = requestUrl.searchParams.get("token");
6472
- const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
6473
- const email = requestUrl.searchParams.get("email");
6474
- const plan = requestUrl.searchParams.get("plan");
6475
- const twins = requestUrl.searchParams.get("twins");
6476
- if (!token || !email || !plan) {
6477
- res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
6478
- res.end("<h1>Login failed</h1><p>Missing callback parameters.</p>");
6479
- server.close();
6480
- reject(new Error("Missing token/email/plan in callback"));
7694
+ server.close(() => settleResolve());
7695
+ }
7696
+ function closeAndReject(error2) {
7697
+ if (!server.listening) {
7698
+ settleReject(error2);
6481
7699
  return;
6482
7700
  }
6483
- const expiresAt = getJwtExpiry(token) ?? Math.floor(Date.now() / 1e3) + 30 * 24 * 60 * 60;
6484
- const credentials = {
6485
- token,
6486
- refreshToken,
6487
- email,
6488
- plan,
6489
- selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
6490
- expiresAt
6491
- };
6492
- saveCredentials(credentials);
6493
- res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
6494
- res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
6495
- success(`Logged in as ${email} (${plan})`);
6496
- if (plan === "free" && credentials.selectedTwins.length === 0) {
6497
- info(
6498
- "You haven't selected any twins yet.\n Run `archal twins select` to choose up to 5 twins for your free plan."
6499
- );
6500
- }
6501
- server.close(() => resolve11());
7701
+ server.close(() => settleReject(error2));
7702
+ }
7703
+ const server = createServer((req, res) => {
7704
+ void (async () => {
7705
+ try {
7706
+ const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
7707
+ if (requestUrl.pathname !== "/callback") {
7708
+ res.writeHead(404);
7709
+ res.end("Not found");
7710
+ return;
7711
+ }
7712
+ const returnedState = requestUrl.searchParams.get("state");
7713
+ if (returnedState !== state) {
7714
+ res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
7715
+ res.end("<h1>Login failed</h1><p>State mismatch.</p>");
7716
+ closeAndReject(new Error("State mismatch in callback"));
7717
+ return;
7718
+ }
7719
+ const code = requestUrl.searchParams.get("code");
7720
+ const credentials = code ? await exchangeCliAuthCode({
7721
+ code,
7722
+ codeVerifier,
7723
+ redirectUri: redirectUrl
7724
+ }) : credentialsFromLegacyCallback(requestUrl);
7725
+ if (!credentials) {
7726
+ res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
7727
+ res.end("<h1>Login failed</h1><p>Missing auth code.</p>");
7728
+ closeAndReject(new Error("Missing code in callback"));
7729
+ return;
7730
+ }
7731
+ saveCredentials(credentials);
7732
+ res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
7733
+ res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
7734
+ success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
7735
+ if (credentials.plan === "free" && credentials.selectedTwins.length === 0) {
7736
+ info(
7737
+ "You haven't selected any twins yet.\n Run `archal twins select` to choose up to 5 twins for your free plan."
7738
+ );
7739
+ }
7740
+ closeAndResolve();
7741
+ } catch (error2) {
7742
+ const message = error2 instanceof Error ? error2.message : String(error2);
7743
+ if (!res.headersSent) {
7744
+ res.writeHead(500, { "content-type": "text/html; charset=utf-8" });
7745
+ res.end(`<h1>Login failed</h1><p>${escapeHtml(message)}</p>`);
7746
+ }
7747
+ closeAndReject(error2);
7748
+ }
7749
+ })().catch((error2) => {
7750
+ closeAndReject(error2);
7751
+ });
6502
7752
  });
6503
- server.listen(port, "127.0.0.1");
6504
7753
  const timeout = setTimeout(() => {
6505
- server.close();
6506
- reject(new Error("Login timed out. Run archal login again."));
7754
+ closeAndReject(new Error("Login timed out. Run archal login again."));
6507
7755
  }, LOGIN_TIMEOUT_MS);
6508
7756
  server.on("close", () => clearTimeout(timeout));
7757
+ server.once("error", (error2) => {
7758
+ clearTimeout(timeout);
7759
+ closeAndReject(error2);
7760
+ });
7761
+ server.listen(port, "127.0.0.1");
6509
7762
  }).catch((error2) => {
6510
7763
  const message = error2 instanceof Error ? error2.message : String(error2);
6511
7764
  error(message);
@@ -6515,9 +7768,9 @@ function createLoginCommand() {
6515
7768
  }
6516
7769
 
6517
7770
  // src/commands/logout.ts
6518
- import { Command as Command11 } from "commander";
7771
+ import { Command as Command10 } from "commander";
6519
7772
  function createLogoutCommand() {
6520
- return new Command11("logout").description("Log out and remove stored credentials").action(() => {
7773
+ return new Command10("logout").description("Log out and remove stored credentials").action(() => {
6521
7774
  const creds = getCredentials();
6522
7775
  if (!creds) {
6523
7776
  info("Not currently logged in.");
@@ -6535,7 +7788,7 @@ function createLogoutCommand() {
6535
7788
  }
6536
7789
 
6537
7790
  // src/commands/whoami.ts
6538
- import { Command as Command12 } from "commander";
7791
+ import { Command as Command11 } from "commander";
6539
7792
  var RESET2 = "\x1B[0m";
6540
7793
  var BOLD2 = "\x1B[1m";
6541
7794
  var DIM2 = "\x1B[2m";
@@ -6543,11 +7796,12 @@ var CYAN2 = "\x1B[36m";
6543
7796
  var GREEN2 = "\x1B[32m";
6544
7797
  var YELLOW2 = "\x1B[33m";
6545
7798
  function createWhoamiCommand() {
6546
- return new Command12("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
6547
- let current = requireAuth({
6548
- action: "show account status",
6549
- nextCommand: "archal whoami"
6550
- });
7799
+ return new Command11("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
7800
+ let current = getCredentials();
7801
+ if (!current) {
7802
+ info("Not logged in. Run: archal login");
7803
+ return;
7804
+ }
6551
7805
  if (opts.refresh) {
6552
7806
  current = await refreshAuthFromServer(current);
6553
7807
  saveCredentials(current);
@@ -6611,7 +7865,7 @@ function planBadge(plan) {
6611
7865
  }
6612
7866
 
6613
7867
  // src/commands/upgrade.ts
6614
- import { Command as Command13 } from "commander";
7868
+ import { Command as Command12 } from "commander";
6615
7869
  import { exec as exec2 } from "child_process";
6616
7870
  var BILLING_URL = "https://archal.ai/dashboard/billing";
6617
7871
  function openBrowser2(url) {
@@ -6621,7 +7875,7 @@ function openBrowser2(url) {
6621
7875
  });
6622
7876
  }
6623
7877
  function createUpgradeCommand() {
6624
- return new Command13("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
7878
+ return new Command12("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
6625
7879
  const creds = getCredentials();
6626
7880
  if (creds?.plan === "enterprise") {
6627
7881
  info("You are already on the enterprise plan.");
@@ -6640,7 +7894,7 @@ function createUpgradeCommand() {
6640
7894
  }
6641
7895
 
6642
7896
  // src/commands/help.ts
6643
- import { Command as Command14 } from "commander";
7897
+ import { Command as Command13 } from "commander";
6644
7898
  var RESET3 = "\x1B[0m";
6645
7899
  var BOLD3 = "\x1B[1m";
6646
7900
  var DIM3 = "\x1B[2m";
@@ -6668,15 +7922,7 @@ var COMMAND_GROUPS = [
6668
7922
  ]
6669
7923
  },
6670
7924
  {
6671
- heading: "Twin Processes",
6672
- commands: [
6673
- { name: "twin start <name>", description: "Start a local twin process (debug/local only)" },
6674
- { name: "twin stop <name>", description: "Stop a running local twin process" },
6675
- { name: "twin status", description: "Show running local twin processes" }
6676
- ]
6677
- },
6678
- {
6679
- heading: "Twin Catalog",
7925
+ heading: "Twins",
6680
7926
  commands: [
6681
7927
  { name: "twins list", description: "List available twins and entitlement status" },
6682
7928
  { name: "twins select", description: "Choose which twins to use on your free plan" }
@@ -6700,7 +7946,7 @@ var COMMAND_GROUPS = [
6700
7946
  ];
6701
7947
  function showHelp() {
6702
7948
  process.stderr.write(`
6703
- ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v0.1.0${RESET3}
7949
+ ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v${CLI_VERSION}${RESET3}
6704
7950
  `);
6705
7951
  process.stderr.write(`${DIM3}The QA layer for the software factory era${RESET3}
6706
7952
 
@@ -6722,21 +7968,21 @@ ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v0.1.0${RESET3}
6722
7968
  `);
6723
7969
  }
6724
7970
  function createHelpCommand() {
6725
- return new Command14("help").description("Show all available commands").action(() => {
7971
+ return new Command13("help").description("Show all available commands").action(() => {
6726
7972
  showHelp();
6727
7973
  });
6728
7974
  }
6729
7975
 
6730
7976
  // src/commands/setup.ts
6731
- import { Command as Command15 } from "commander";
6732
- import { existsSync as existsSync16 } from "fs";
7977
+ import { Command as Command14 } from "commander";
7978
+ import { existsSync as existsSync17 } from "fs";
6733
7979
  var RESET4 = "\x1B[0m";
6734
7980
  var BOLD4 = "\x1B[1m";
6735
7981
  var DIM4 = "\x1B[2m";
6736
7982
  var CYAN4 = "\x1B[36m";
6737
7983
  var GREEN3 = "\x1B[32m";
6738
7984
  function createSetupCommand() {
6739
- return new Command15("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
7985
+ return new Command14("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
6740
7986
  process.stderr.write(`
6741
7987
  ${CYAN4}${BOLD4}Archal Setup${RESET4}
6742
7988
  `);
@@ -6758,7 +8004,7 @@ ${CYAN4}${BOLD4}Archal Setup${RESET4}
6758
8004
  ${BOLD4}Step 2: Configuration${RESET4}
6759
8005
  `);
6760
8006
  const configPath = getConfigPath();
6761
- if (existsSync16(configPath)) {
8007
+ if (existsSync17(configPath)) {
6762
8008
  success(`Config file exists: ${configPath}`);
6763
8009
  } else {
6764
8010
  const create = await askConfirm("Create a default config file?");
@@ -6823,7 +8069,7 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
6823
8069
  `);
6824
8070
  process.stderr.write(` ${CYAN4}archal scenario create my-first-test${RESET4} ${DIM4}Create a scenario${RESET4}
6825
8071
  `);
6826
- process.stderr.write(` ${CYAN4}archal run scenario.md --openclaw-url "..."${RESET4} ${DIM4}Run a scenario${RESET4}
8072
+ process.stderr.write(` ${CYAN4}archal run scenario.md --engine-endpoint "..." --engine-model "..."${RESET4} ${DIM4}Run a scenario${RESET4}
6827
8073
  `);
6828
8074
  process.stderr.write(` ${CYAN4}archal help${RESET4} ${DIM4}See all commands${RESET4}
6829
8075
 
@@ -6832,8 +8078,8 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
6832
8078
  }
6833
8079
 
6834
8080
  // src/index.ts
6835
- var program = new Command16();
6836
- program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version("0.1.0").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
8081
+ var program = new Command15();
8082
+ program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version(CLI_VERSION).option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
6837
8083
  const opts = program.opts();
6838
8084
  if (opts.quiet) {
6839
8085
  configureLogger({ quiet: true });
@@ -6848,7 +8094,6 @@ program.addCommand(createWhoamiCommand());
6848
8094
  program.addCommand(createSetupCommand());
6849
8095
  program.addCommand(createRunCommand());
6850
8096
  program.addCommand(createInitCommand());
6851
- program.addCommand(createTwinCommand());
6852
8097
  program.addCommand(createTwinsCommand());
6853
8098
  program.addCommand(createScenarioCommand());
6854
8099
  program.addCommand(createTraceCommand());
@@ -6864,6 +8109,14 @@ program.action(() => {
6864
8109
  process.stderr.write("\x1B[33mNot logged in.\x1B[0m Get started with: \x1B[36marchal login\x1B[0m\n\n");
6865
8110
  }
6866
8111
  });
8112
+ function handleShutdown(signal) {
8113
+ process.stderr.write(`
8114
+ Received ${signal}, shutting down...
8115
+ `);
8116
+ process.exit(128 + (signal === "SIGINT" ? 2 : 15));
8117
+ }
8118
+ process.on("SIGINT", () => handleShutdown("SIGINT"));
8119
+ process.on("SIGTERM", () => handleShutdown("SIGTERM"));
6867
8120
  program.parseAsync(process.argv).catch((err) => {
6868
8121
  const message = err instanceof Error ? err.message : String(err);
6869
8122
  process.stderr.write(`Error: ${message}