@archal/cli 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -11
- package/dist/api-client-D7SCA64V.js +23 -0
- package/dist/api-client-DI7R3H4C.js +21 -0
- package/dist/api-client-EMMBIJU7.js +23 -0
- package/dist/api-client-VYQMFDLN.js +23 -0
- package/dist/api-client-WN45C63M.js +23 -0
- package/dist/api-client-ZOCVG6CC.js +21 -0
- package/dist/api-client-ZUMDL3TP.js +23 -0
- package/dist/chunk-3EH6CG2H.js +561 -0
- package/dist/chunk-3RG5ZIWI.js +10 -0
- package/dist/chunk-4FTU232H.js +191 -0
- package/dist/chunk-4LM2CKUI.js +561 -0
- package/dist/chunk-A6WOU5RO.js +214 -0
- package/dist/chunk-AXLDC4PC.js +561 -0
- package/dist/chunk-NZEPQ6IZ.js +83 -0
- package/dist/chunk-PGMDLZW5.js +561 -0
- package/dist/chunk-SVGN2AFT.js +148 -0
- package/dist/chunk-UOJHYCMX.js +144 -0
- package/dist/chunk-VYCADG5E.js +189 -0
- package/dist/chunk-WZXES7XO.js +136 -0
- package/dist/chunk-XJOKVFOL.js +561 -0
- package/dist/chunk-XSO7ETSM.js +561 -0
- package/dist/chunk-YDGWON57.js +561 -0
- package/dist/index.js +1868 -647
- package/dist/login-4RNNR4YA.js +7 -0
- package/dist/login-CQ2DRBRU.js +7 -0
- package/dist/login-LOTTPY7G.js +7 -0
- package/dist/login-MBCG3N5P.js +7 -0
- package/dist/login-MP6YLOEA.js +7 -0
- package/dist/login-SGLSVIZZ.js +7 -0
- package/dist/login-TFBKIZ7I.js +7 -0
- package/package.json +4 -5
package/dist/index.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/index.ts
|
|
4
|
-
import { Command as
|
|
4
|
+
import { Command as Command15 } from "commander";
|
|
5
5
|
|
|
6
6
|
// src/commands/run.ts
|
|
7
7
|
import { Command as Command3 } from "commander";
|
|
8
|
-
import { existsSync as
|
|
9
|
-
import { dirname as dirname3, resolve as
|
|
8
|
+
import { existsSync as existsSync12, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
|
|
9
|
+
import { dirname as dirname3, resolve as resolve8 } from "path";
|
|
10
10
|
|
|
11
11
|
// src/runner/orchestrator.ts
|
|
12
|
-
import { existsSync as
|
|
13
|
-
import { resolve as
|
|
12
|
+
import { existsSync as existsSync11, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
|
|
13
|
+
import { resolve as resolve7, dirname as dirname2, join as join8 } from "path";
|
|
14
14
|
import { tmpdir as tmpdir3 } from "os";
|
|
15
15
|
|
|
16
16
|
// src/runner/scenario-parser.ts
|
|
@@ -276,10 +276,10 @@ function inferTwinsFromContent(setup, expectedBehavior) {
|
|
|
276
276
|
${expectedBehavior}`.toLowerCase();
|
|
277
277
|
const twins = [];
|
|
278
278
|
const twinKeywords = {
|
|
279
|
-
github: ["github", "repository", "
|
|
280
|
-
slack: ["slack", "channel", "
|
|
281
|
-
linear: ["linear", "ticket", "project", "cycle"
|
|
282
|
-
jira: ["jira", "sprint", "epic", "
|
|
279
|
+
github: ["github", "repository", "pull request", "create_issue", "create_pull_request", "merge_pull_request"],
|
|
280
|
+
slack: ["slack", "slack channel", "send_message", "slack message", "direct message"],
|
|
281
|
+
linear: ["linear", "linear ticket", "linear project", "linear cycle"],
|
|
282
|
+
jira: ["jira", "jira sprint", "jira epic", "jira board"]
|
|
283
283
|
};
|
|
284
284
|
for (const [twin, keywords] of Object.entries(twinKeywords)) {
|
|
285
285
|
if (keywords.some((kw) => combined.includes(kw))) {
|
|
@@ -442,6 +442,19 @@ var GITHUB_SEED_MAPPINGS = [
|
|
|
442
442
|
],
|
|
443
443
|
seedName: "large-backlog",
|
|
444
444
|
weight: 2
|
|
445
|
+
},
|
|
446
|
+
{
|
|
447
|
+
keywords: [
|
|
448
|
+
"triage",
|
|
449
|
+
"unlabeled",
|
|
450
|
+
"no labels",
|
|
451
|
+
"categorize",
|
|
452
|
+
"classify",
|
|
453
|
+
"label",
|
|
454
|
+
"none of them have labels"
|
|
455
|
+
],
|
|
456
|
+
seedName: "triage-unlabeled",
|
|
457
|
+
weight: 2
|
|
445
458
|
}
|
|
446
459
|
];
|
|
447
460
|
var SLACK_SEED_MAPPINGS = [
|
|
@@ -450,34 +463,47 @@ var SLACK_SEED_MAPPINGS = [
|
|
|
450
463
|
seedName: "empty",
|
|
451
464
|
weight: 1
|
|
452
465
|
},
|
|
453
|
-
{
|
|
454
|
-
keywords: ["small team", "few channels", "simple", "basic", "starter"],
|
|
455
|
-
seedName: "small-team",
|
|
456
|
-
weight: 1
|
|
457
|
-
},
|
|
458
466
|
{
|
|
459
467
|
keywords: [
|
|
460
468
|
"engineering",
|
|
461
469
|
"development",
|
|
462
470
|
"engineering team",
|
|
463
471
|
"developers",
|
|
464
|
-
"incidents",
|
|
465
|
-
"on-call",
|
|
466
472
|
"sprints",
|
|
467
|
-
"standups"
|
|
473
|
+
"standups",
|
|
474
|
+
"hr",
|
|
475
|
+
"confidential",
|
|
476
|
+
"salary"
|
|
468
477
|
],
|
|
469
478
|
seedName: "engineering-team",
|
|
470
479
|
weight: 1
|
|
471
480
|
},
|
|
472
481
|
{
|
|
473
|
-
keywords: [
|
|
474
|
-
|
|
482
|
+
keywords: [
|
|
483
|
+
"support",
|
|
484
|
+
"customer",
|
|
485
|
+
"tickets",
|
|
486
|
+
"help desk",
|
|
487
|
+
"routing",
|
|
488
|
+
"busy",
|
|
489
|
+
"high volume",
|
|
490
|
+
"many messages",
|
|
491
|
+
"active",
|
|
492
|
+
"noisy",
|
|
493
|
+
"general",
|
|
494
|
+
"workspace",
|
|
495
|
+
"members",
|
|
496
|
+
"finance",
|
|
497
|
+
"ceo",
|
|
498
|
+
"fraud"
|
|
499
|
+
],
|
|
500
|
+
seedName: "busy-workspace",
|
|
475
501
|
weight: 1
|
|
476
502
|
},
|
|
477
503
|
{
|
|
478
|
-
keywords: ["
|
|
479
|
-
seedName: "
|
|
480
|
-
weight:
|
|
504
|
+
keywords: ["incident", "on-call", "alert", "outage", "escalat", "sev1", "sev2"],
|
|
505
|
+
seedName: "incident-active",
|
|
506
|
+
weight: 2
|
|
481
507
|
}
|
|
482
508
|
];
|
|
483
509
|
var LINEAR_SEED_MAPPINGS = [
|
|
@@ -507,14 +533,59 @@ var LINEAR_SEED_MAPPINGS = [
|
|
|
507
533
|
weight: 1
|
|
508
534
|
}
|
|
509
535
|
];
|
|
536
|
+
var STRIPE_SEED_MAPPINGS = [
|
|
537
|
+
{
|
|
538
|
+
keywords: ["empty", "blank", "new", "fresh", "clean", "no customers"],
|
|
539
|
+
seedName: "empty",
|
|
540
|
+
weight: 1
|
|
541
|
+
},
|
|
542
|
+
{
|
|
543
|
+
keywords: [
|
|
544
|
+
"small business",
|
|
545
|
+
"few customers",
|
|
546
|
+
"simple",
|
|
547
|
+
"basic",
|
|
548
|
+
"starter",
|
|
549
|
+
"payment",
|
|
550
|
+
"charge",
|
|
551
|
+
"wire",
|
|
552
|
+
"transfer",
|
|
553
|
+
"balance",
|
|
554
|
+
"vendor",
|
|
555
|
+
"invoice",
|
|
556
|
+
"ceo",
|
|
557
|
+
"fraud",
|
|
558
|
+
"financial"
|
|
559
|
+
],
|
|
560
|
+
seedName: "small-business",
|
|
561
|
+
weight: 1
|
|
562
|
+
},
|
|
563
|
+
{
|
|
564
|
+
keywords: [
|
|
565
|
+
"subscription",
|
|
566
|
+
"recurring",
|
|
567
|
+
"saas",
|
|
568
|
+
"monthly",
|
|
569
|
+
"annual",
|
|
570
|
+
"plan",
|
|
571
|
+
"pricing",
|
|
572
|
+
"trial",
|
|
573
|
+
"cancel"
|
|
574
|
+
],
|
|
575
|
+
seedName: "subscription-heavy",
|
|
576
|
+
weight: 2
|
|
577
|
+
}
|
|
578
|
+
];
|
|
510
579
|
var TWIN_SEED_REGISTRY = {
|
|
511
580
|
github: GITHUB_SEED_MAPPINGS,
|
|
512
581
|
slack: SLACK_SEED_MAPPINGS,
|
|
582
|
+
stripe: STRIPE_SEED_MAPPINGS,
|
|
513
583
|
linear: LINEAR_SEED_MAPPINGS
|
|
514
584
|
};
|
|
515
585
|
var DEFAULT_SEEDS = {
|
|
516
586
|
github: "small-project",
|
|
517
|
-
slack: "
|
|
587
|
+
slack: "engineering-team",
|
|
588
|
+
stripe: "small-business",
|
|
518
589
|
linear: "small-team"
|
|
519
590
|
};
|
|
520
591
|
function normalizeText(text) {
|
|
@@ -612,7 +683,27 @@ import { spawn } from "child_process";
|
|
|
612
683
|
function buildSanitizedSpawnEnv(explicitEnv) {
|
|
613
684
|
const sanitized = {};
|
|
614
685
|
const tempVarKey = process.platform === "win32" ? "TEMP" : "TMPDIR";
|
|
615
|
-
const passthroughKeys = [
|
|
686
|
+
const passthroughKeys = [
|
|
687
|
+
"PATH",
|
|
688
|
+
"HOME",
|
|
689
|
+
"USER",
|
|
690
|
+
"SHELL",
|
|
691
|
+
tempVarKey,
|
|
692
|
+
"NODE_ENV",
|
|
693
|
+
// Proxy vars — critical for corporate environments
|
|
694
|
+
"HTTP_PROXY",
|
|
695
|
+
"HTTPS_PROXY",
|
|
696
|
+
"NO_PROXY",
|
|
697
|
+
"http_proxy",
|
|
698
|
+
"https_proxy",
|
|
699
|
+
"no_proxy",
|
|
700
|
+
// API keys needed by local engine harness agents
|
|
701
|
+
"ANTHROPIC_API_KEY",
|
|
702
|
+
"OPENAI_API_KEY",
|
|
703
|
+
"GEMINI_API_KEY",
|
|
704
|
+
// Windows-specific
|
|
705
|
+
...process.platform === "win32" ? ["USERPROFILE", "APPDATA", "LOCALAPPDATA", "SystemRoot", "COMSPEC", "TMP"] : []
|
|
706
|
+
];
|
|
616
707
|
for (const key of passthroughKeys) {
|
|
617
708
|
const value = process.env[key];
|
|
618
709
|
if (typeof value === "string" && value.length > 0) {
|
|
@@ -640,7 +731,7 @@ function spawnWithTimeout(options) {
|
|
|
640
731
|
onStdout,
|
|
641
732
|
onStderr
|
|
642
733
|
} = options;
|
|
643
|
-
return new Promise((
|
|
734
|
+
return new Promise((resolve13, reject) => {
|
|
644
735
|
const startTime = Date.now();
|
|
645
736
|
let timedOut = false;
|
|
646
737
|
let stdoutBuf = "";
|
|
@@ -696,7 +787,7 @@ function spawnWithTimeout(options) {
|
|
|
696
787
|
clearTimeout(timer);
|
|
697
788
|
const durationMs = Date.now() - startTime;
|
|
698
789
|
debug("Process exited", { command, exitCode, durationMs, timedOut });
|
|
699
|
-
|
|
790
|
+
resolve13({
|
|
700
791
|
exitCode,
|
|
701
792
|
stdout: stdoutBuf,
|
|
702
793
|
stderr: stderrBuf,
|
|
@@ -721,9 +812,9 @@ function spawnMcpStdioProcess(options) {
|
|
|
721
812
|
return child;
|
|
722
813
|
}
|
|
723
814
|
function killProcess(child, gracePeriodMs = 5e3) {
|
|
724
|
-
return new Promise((
|
|
815
|
+
return new Promise((resolve13) => {
|
|
725
816
|
if (child.killed || child.exitCode !== null) {
|
|
726
|
-
|
|
817
|
+
resolve13();
|
|
727
818
|
return;
|
|
728
819
|
}
|
|
729
820
|
child.kill("SIGTERM");
|
|
@@ -734,7 +825,7 @@ function killProcess(child, gracePeriodMs = 5e3) {
|
|
|
734
825
|
}, gracePeriodMs);
|
|
735
826
|
child.on("close", () => {
|
|
736
827
|
clearTimeout(forceKillTimer);
|
|
737
|
-
|
|
828
|
+
resolve13();
|
|
738
829
|
});
|
|
739
830
|
});
|
|
740
831
|
}
|
|
@@ -768,6 +859,20 @@ function generateTaskFromScenario(scenario, apiRouting) {
|
|
|
768
859
|
}
|
|
769
860
|
lines.push("");
|
|
770
861
|
}
|
|
862
|
+
if (apiRouting?.adminToken) {
|
|
863
|
+
lines.push("Authentication:");
|
|
864
|
+
lines.push("Include these headers with every request to the base URLs above:");
|
|
865
|
+
lines.push(` x-archal-admin-token: ${apiRouting.adminToken}`);
|
|
866
|
+
if (apiRouting.adminUserId) {
|
|
867
|
+
lines.push(` x-archal-user-id: ${apiRouting.adminUserId}`);
|
|
868
|
+
}
|
|
869
|
+
lines.push("");
|
|
870
|
+
} else if (apiRouting?.bearerToken) {
|
|
871
|
+
lines.push("Authentication:");
|
|
872
|
+
lines.push("Include this header with every request to the base URLs above:");
|
|
873
|
+
lines.push(` Authorization: Bearer ${apiRouting.bearerToken}`);
|
|
874
|
+
lines.push("");
|
|
875
|
+
}
|
|
771
876
|
if (hasProxy && apiRouting?.proxyUrl) {
|
|
772
877
|
lines.push(`Proxy URL: ${apiRouting.proxyUrl}`);
|
|
773
878
|
lines.push("");
|
|
@@ -812,14 +917,6 @@ function resolveResponsesUrl(rawUrl) {
|
|
|
812
917
|
}
|
|
813
918
|
return url.toString();
|
|
814
919
|
}
|
|
815
|
-
function toMcpUrl(rawUrl) {
|
|
816
|
-
const url = new URL(rawUrl);
|
|
817
|
-
const path = url.pathname.replace(/\/+$/, "");
|
|
818
|
-
if (!path.endsWith("/mcp")) {
|
|
819
|
-
url.pathname = `${path || ""}/mcp`;
|
|
820
|
-
}
|
|
821
|
-
return url.toString();
|
|
822
|
-
}
|
|
823
920
|
function collectResponseText(response) {
|
|
824
921
|
if (!response.output || response.output.length === 0) return "";
|
|
825
922
|
const chunks = [];
|
|
@@ -838,7 +935,7 @@ function collectResponseText(response) {
|
|
|
838
935
|
}
|
|
839
936
|
return chunks.join("\n").trim();
|
|
840
937
|
}
|
|
841
|
-
function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting
|
|
938
|
+
function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting) {
|
|
842
939
|
const metadata = {
|
|
843
940
|
run_id: runId,
|
|
844
941
|
scenario_title: scenario.title,
|
|
@@ -851,40 +948,11 @@ function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, m
|
|
|
851
948
|
if (apiRouting?.proxyUrl) {
|
|
852
949
|
metadata["archal_api_proxy_url"] = apiRouting.proxyUrl;
|
|
853
950
|
}
|
|
854
|
-
|
|
855
|
-
type: "mcp",
|
|
856
|
-
server_label: name,
|
|
857
|
-
server_url: toMcpUrl(url),
|
|
858
|
-
require_approval: "never"
|
|
859
|
-
}));
|
|
860
|
-
const request2 = {
|
|
951
|
+
return {
|
|
861
952
|
model,
|
|
862
953
|
input: taskMessage,
|
|
863
954
|
metadata
|
|
864
955
|
};
|
|
865
|
-
if (mcpField === "both") {
|
|
866
|
-
request2.tools = mcpTools;
|
|
867
|
-
request2.mcp_servers = mcpTools;
|
|
868
|
-
return request2;
|
|
869
|
-
}
|
|
870
|
-
request2[mcpField] = mcpTools;
|
|
871
|
-
return request2;
|
|
872
|
-
}
|
|
873
|
-
function shouldRetryWithAlternateMcpField(status, rawBody, attemptedField) {
|
|
874
|
-
if (status !== 400) return false;
|
|
875
|
-
const pattern = new RegExp(`Unrecognized key:\\s*"?${attemptedField}"?`, "i");
|
|
876
|
-
try {
|
|
877
|
-
const parsed = JSON.parse(rawBody);
|
|
878
|
-
if (typeof parsed.error?.message === "string") {
|
|
879
|
-
return pattern.test(parsed.error.message);
|
|
880
|
-
}
|
|
881
|
-
} catch {
|
|
882
|
-
}
|
|
883
|
-
return pattern.test(rawBody);
|
|
884
|
-
}
|
|
885
|
-
function resolvePreferredMcpField() {
|
|
886
|
-
const configured = (process.env["ARCHAL_OPENCLAW_MCP_FIELD"] ?? process.env["OPENCLAW_MCP_FIELD"] ?? "tools").trim().toLowerCase();
|
|
887
|
-
return configured === "mcp_servers" ? "mcp_servers" : "tools";
|
|
888
956
|
}
|
|
889
957
|
function extractOpenClawResponseText(response) {
|
|
890
958
|
return collectResponseText(response);
|
|
@@ -927,15 +995,13 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
|
|
|
927
995
|
const timer = setTimeout(() => controller.abort(), remoteConfig.timeoutMs);
|
|
928
996
|
try {
|
|
929
997
|
responsesUrl = resolveResponsesUrl(remoteConfig.url);
|
|
930
|
-
|
|
931
|
-
let requestBody = buildOpenClawResponsesRequest(
|
|
998
|
+
const requestBody = buildOpenClawResponsesRequest(
|
|
932
999
|
scenario,
|
|
933
1000
|
runId,
|
|
934
1001
|
taskMessage,
|
|
935
1002
|
twinUrls,
|
|
936
1003
|
remoteConfig.model,
|
|
937
|
-
apiRouting
|
|
938
|
-
mcpField
|
|
1004
|
+
apiRouting
|
|
939
1005
|
);
|
|
940
1006
|
const headers = {
|
|
941
1007
|
"Content-Type": "application/json"
|
|
@@ -943,36 +1009,32 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
|
|
|
943
1009
|
if (remoteConfig.token) {
|
|
944
1010
|
headers["Authorization"] = `Bearer ${remoteConfig.token}`;
|
|
945
1011
|
}
|
|
1012
|
+
if (remoteConfig.agentId) {
|
|
1013
|
+
headers["x-openclaw-agent-id"] = remoteConfig.agentId;
|
|
1014
|
+
}
|
|
946
1015
|
info("Executing remote OpenClaw agent", {
|
|
947
1016
|
url: responsesUrl,
|
|
948
|
-
timeout: `${remoteConfig.timeoutMs}ms
|
|
1017
|
+
timeout: `${remoteConfig.timeoutMs}ms`,
|
|
1018
|
+
...remoteConfig.agentId ? { agentId: remoteConfig.agentId } : {}
|
|
1019
|
+
});
|
|
1020
|
+
debug("Task message being sent to OpenClaw:", {
|
|
1021
|
+
taskMessage: taskMessage.replace(/x-archal-admin-token:\s*\S+/gi, "x-archal-admin-token: [REDACTED]").replace(/Authorization:\s*Bearer\s+\S+/gi, "Authorization: Bearer [REDACTED]").slice(0, 2e3)
|
|
949
1022
|
});
|
|
950
|
-
|
|
1023
|
+
debug("Twin URLs:", { twinUrls: JSON.stringify(twinUrls) });
|
|
1024
|
+
debug("API routing:", {
|
|
1025
|
+
apiRouting: JSON.stringify({
|
|
1026
|
+
...apiRouting,
|
|
1027
|
+
bearerToken: apiRouting?.bearerToken ? "[REDACTED]" : void 0,
|
|
1028
|
+
adminToken: apiRouting?.adminToken ? "[REDACTED]" : void 0
|
|
1029
|
+
})
|
|
1030
|
+
});
|
|
1031
|
+
const response = await fetch(responsesUrl, {
|
|
951
1032
|
method: "POST",
|
|
952
1033
|
headers,
|
|
953
1034
|
body: JSON.stringify(requestBody),
|
|
954
1035
|
signal: controller.signal
|
|
955
1036
|
});
|
|
956
|
-
|
|
957
|
-
if (!response.ok && shouldRetryWithAlternateMcpField(response.status, rawBody, mcpField)) {
|
|
958
|
-
mcpField = mcpField === "tools" ? "mcp_servers" : "tools";
|
|
959
|
-
requestBody = buildOpenClawResponsesRequest(
|
|
960
|
-
scenario,
|
|
961
|
-
runId,
|
|
962
|
-
taskMessage,
|
|
963
|
-
twinUrls,
|
|
964
|
-
remoteConfig.model,
|
|
965
|
-
apiRouting,
|
|
966
|
-
mcpField
|
|
967
|
-
);
|
|
968
|
-
response = await fetch(responsesUrl, {
|
|
969
|
-
method: "POST",
|
|
970
|
-
headers,
|
|
971
|
-
body: JSON.stringify(requestBody),
|
|
972
|
-
signal: controller.signal
|
|
973
|
-
});
|
|
974
|
-
rawBody = await response.text();
|
|
975
|
-
}
|
|
1037
|
+
const rawBody = await response.text();
|
|
976
1038
|
if (!response.ok) {
|
|
977
1039
|
const statusLine = `${response.status} ${response.statusText}`.trim();
|
|
978
1040
|
return {
|
|
@@ -1155,7 +1217,7 @@ function writeMcpConfig(twinConfigs, runId) {
|
|
|
1155
1217
|
return { configPath, twinPaths };
|
|
1156
1218
|
}
|
|
1157
1219
|
function waitForPortOutput(child, timeoutMs = 15e3) {
|
|
1158
|
-
return new Promise((
|
|
1220
|
+
return new Promise((resolve13, reject) => {
|
|
1159
1221
|
const timer = setTimeout(() => {
|
|
1160
1222
|
reject(new Error("Timed out waiting for twin REST port"));
|
|
1161
1223
|
}, timeoutMs);
|
|
@@ -1165,7 +1227,7 @@ function waitForPortOutput(child, timeoutMs = 15e3) {
|
|
|
1165
1227
|
const match = /listening on http:\/\/(?:localhost|127\.0\.0\.1):(\d+)/.exec(stderrBuf);
|
|
1166
1228
|
if (match) {
|
|
1167
1229
|
clearTimeout(timer);
|
|
1168
|
-
|
|
1230
|
+
resolve13(parseInt(match[1], 10));
|
|
1169
1231
|
}
|
|
1170
1232
|
});
|
|
1171
1233
|
child.on("exit", (code) => {
|
|
@@ -1323,11 +1385,16 @@ function collectTraceFromFiles(twinPaths) {
|
|
|
1323
1385
|
return allTraces;
|
|
1324
1386
|
}
|
|
1325
1387
|
var HTTP_COLLECT_TIMEOUT_MS = 5e3;
|
|
1326
|
-
|
|
1388
|
+
function twinBasePath(url) {
|
|
1389
|
+
return url.replace(/\/(mcp|api)\/?$/, "");
|
|
1390
|
+
}
|
|
1391
|
+
async function collectStateFromHttp(twinUrls, bearerToken, adminAuth) {
|
|
1327
1392
|
const state = {};
|
|
1393
|
+
const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
|
|
1328
1394
|
for (const [name, baseUrl] of Object.entries(twinUrls)) {
|
|
1329
1395
|
try {
|
|
1330
|
-
const response = await fetch(`${baseUrl
|
|
1396
|
+
const response = await fetch(`${twinBasePath(baseUrl)}/state`, {
|
|
1397
|
+
headers,
|
|
1331
1398
|
signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
|
|
1332
1399
|
});
|
|
1333
1400
|
if (response.ok) {
|
|
@@ -1344,11 +1411,13 @@ async function collectStateFromHttp(twinUrls) {
|
|
|
1344
1411
|
}
|
|
1345
1412
|
return state;
|
|
1346
1413
|
}
|
|
1347
|
-
async function collectTraceFromHttp(twinUrls) {
|
|
1414
|
+
async function collectTraceFromHttp(twinUrls, bearerToken, adminAuth) {
|
|
1348
1415
|
const allTraces = [];
|
|
1416
|
+
const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
|
|
1349
1417
|
for (const [name, baseUrl] of Object.entries(twinUrls)) {
|
|
1350
1418
|
try {
|
|
1351
|
-
const response = await fetch(`${baseUrl
|
|
1419
|
+
const response = await fetch(`${twinBasePath(baseUrl)}/trace`, {
|
|
1420
|
+
headers,
|
|
1352
1421
|
signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
|
|
1353
1422
|
});
|
|
1354
1423
|
if (response.ok) {
|
|
@@ -1443,10 +1512,94 @@ function resolveAgentConfig(agentCommand, projectConfigPath) {
|
|
|
1443
1512
|
return null;
|
|
1444
1513
|
}
|
|
1445
1514
|
|
|
1515
|
+
// src/runner/harness.ts
|
|
1516
|
+
import { existsSync as existsSync3, readFileSync as readFileSync4 } from "fs";
|
|
1517
|
+
import { resolve as resolve3 } from "path";
|
|
1518
|
+
import { z } from "zod";
|
|
1519
|
+
var harnessLocalSchema = z.object({
|
|
1520
|
+
command: z.string().min(1, "local.command must be a non-empty string"),
|
|
1521
|
+
args: z.array(z.string()).default([]),
|
|
1522
|
+
env: z.record(z.string()).optional()
|
|
1523
|
+
});
|
|
1524
|
+
var harnessManifestSchema = z.object({
|
|
1525
|
+
version: z.literal(1),
|
|
1526
|
+
defaultModel: z.string().optional(),
|
|
1527
|
+
promptFiles: z.array(z.string()).default([]),
|
|
1528
|
+
local: harnessLocalSchema.optional()
|
|
1529
|
+
});
|
|
1530
|
+
var MANIFEST_FILE = "archal-harness.json";
|
|
1531
|
+
function resolveHarnessDir(rawDir) {
|
|
1532
|
+
const harnessDir = resolve3(rawDir);
|
|
1533
|
+
if (!existsSync3(harnessDir)) {
|
|
1534
|
+
throw new Error(`Harness directory not found: ${harnessDir}`);
|
|
1535
|
+
}
|
|
1536
|
+
return harnessDir;
|
|
1537
|
+
}
|
|
1538
|
+
function parseHarnessManifest(manifestPath) {
|
|
1539
|
+
try {
|
|
1540
|
+
const raw = readFileSync4(manifestPath, "utf-8");
|
|
1541
|
+
return harnessManifestSchema.parse(JSON.parse(raw));
|
|
1542
|
+
} catch (err) {
|
|
1543
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1544
|
+
throw new Error(`Invalid harness manifest at ${manifestPath}: ${message}`);
|
|
1545
|
+
}
|
|
1546
|
+
}
|
|
1547
|
+
function trimToUndefined(value) {
|
|
1548
|
+
const trimmed = value?.trim();
|
|
1549
|
+
return trimmed ? trimmed : void 0;
|
|
1550
|
+
}
|
|
1551
|
+
function resolveLocalHarness(harnessDirInput, explicitModel) {
|
|
1552
|
+
const harnessDir = resolveHarnessDir(harnessDirInput);
|
|
1553
|
+
const manifestPath = resolve3(harnessDir, MANIFEST_FILE);
|
|
1554
|
+
const explicit = trimToUndefined(explicitModel);
|
|
1555
|
+
if (!existsSync3(manifestPath)) {
|
|
1556
|
+
return {
|
|
1557
|
+
harnessDir,
|
|
1558
|
+
manifestPath,
|
|
1559
|
+
model: explicit
|
|
1560
|
+
};
|
|
1561
|
+
}
|
|
1562
|
+
const manifest = parseHarnessManifest(manifestPath);
|
|
1563
|
+
const promptContext = loadPromptContext(harnessDir, manifest.promptFiles);
|
|
1564
|
+
const localCommand = manifest.local ? {
|
|
1565
|
+
command: manifest.local.command,
|
|
1566
|
+
args: manifest.local.args,
|
|
1567
|
+
env: manifest.local.env
|
|
1568
|
+
} : void 0;
|
|
1569
|
+
const model = explicit ?? trimToUndefined(manifest.defaultModel);
|
|
1570
|
+
return { harnessDir, manifestPath, manifest, model, promptContext, localCommand };
|
|
1571
|
+
}
|
|
1572
|
+
function loadPromptContext(harnessDir, promptFiles) {
|
|
1573
|
+
if (promptFiles.length === 0) {
|
|
1574
|
+
return void 0;
|
|
1575
|
+
}
|
|
1576
|
+
const sections = [];
|
|
1577
|
+
for (const promptFile of promptFiles) {
|
|
1578
|
+
const relativePath = promptFile.trim();
|
|
1579
|
+
if (!relativePath) {
|
|
1580
|
+
throw new Error("Harness promptFiles entries must be non-empty strings");
|
|
1581
|
+
}
|
|
1582
|
+
const absolutePath = resolve3(harnessDir, relativePath);
|
|
1583
|
+
if (!existsSync3(absolutePath)) {
|
|
1584
|
+
throw new Error(`Harness prompt file not found: ${absolutePath}`);
|
|
1585
|
+
}
|
|
1586
|
+
const content = readFileSync4(absolutePath, "utf-8").trim();
|
|
1587
|
+
if (!content) {
|
|
1588
|
+
warn(`Harness prompt file is empty and will be skipped: ${absolutePath}`);
|
|
1589
|
+
continue;
|
|
1590
|
+
}
|
|
1591
|
+
sections.push(content);
|
|
1592
|
+
}
|
|
1593
|
+
if (sections.length === 0) {
|
|
1594
|
+
return void 0;
|
|
1595
|
+
}
|
|
1596
|
+
return sections.join("\n\n");
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1446
1599
|
// src/runner/reporter.ts
|
|
1447
|
-
import { readFileSync as
|
|
1600
|
+
import { readFileSync as readFileSync5, existsSync as existsSync4 } from "fs";
|
|
1448
1601
|
import { createRequire as createRequire2 } from "module";
|
|
1449
|
-
import { dirname, resolve as
|
|
1602
|
+
import { dirname, resolve as resolve4 } from "path";
|
|
1450
1603
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
1451
1604
|
var __dirname2 = fileURLToPath2(new URL(".", import.meta.url));
|
|
1452
1605
|
function printHeader(scenarioTitle, seedSelections) {
|
|
@@ -1530,23 +1683,26 @@ function loadTwinFidelity(twinNames) {
|
|
|
1530
1683
|
for (const name of twinNames) {
|
|
1531
1684
|
try {
|
|
1532
1685
|
let fidelityPath = null;
|
|
1533
|
-
const monorepoPath =
|
|
1534
|
-
if (
|
|
1686
|
+
const monorepoPath = resolve4(__dirname2, "..", "..", "twins", name, "fidelity.json");
|
|
1687
|
+
if (existsSync4(monorepoPath)) {
|
|
1535
1688
|
fidelityPath = monorepoPath;
|
|
1536
1689
|
}
|
|
1537
1690
|
if (!fidelityPath) {
|
|
1538
1691
|
try {
|
|
1539
1692
|
const require2 = createRequire2(import.meta.url);
|
|
1540
1693
|
const twinMain = require2.resolve(`@archal/twin-${name}`);
|
|
1541
|
-
const candidate =
|
|
1542
|
-
if (
|
|
1694
|
+
const candidate = resolve4(dirname(twinMain), "..", "fidelity.json");
|
|
1695
|
+
if (existsSync4(candidate)) {
|
|
1543
1696
|
fidelityPath = candidate;
|
|
1544
1697
|
}
|
|
1545
1698
|
} catch {
|
|
1546
1699
|
}
|
|
1547
1700
|
}
|
|
1548
|
-
if (!fidelityPath)
|
|
1549
|
-
|
|
1701
|
+
if (!fidelityPath) {
|
|
1702
|
+
debug(`Fidelity data not found for twin "${name}" \u2014 skipping badge`);
|
|
1703
|
+
continue;
|
|
1704
|
+
}
|
|
1705
|
+
const raw = readFileSync5(fidelityPath, "utf-8");
|
|
1550
1706
|
const data = JSON.parse(raw);
|
|
1551
1707
|
lines.push(` ${DIM}twin fidelity:${RESET} ${data.twin} v${data.version}`);
|
|
1552
1708
|
for (const cap of data.capabilities) {
|
|
@@ -1701,6 +1857,7 @@ function cleanPredicate(pred) {
|
|
|
1701
1857
|
return cleaned.trim();
|
|
1702
1858
|
}
|
|
1703
1859
|
function parseAssertion(description) {
|
|
1860
|
+
const lowerOriginal = description.toLowerCase().trim();
|
|
1704
1861
|
const lower = stripParenthetical(description).toLowerCase().trim();
|
|
1705
1862
|
const noLabeledMatch = lower.match(/^no\s+(.+?)\s+labeled\s+["']?([^"']+?)["']?\s+(?:are|were|is|was|should be)\s+(.+)$/);
|
|
1706
1863
|
if (noLabeledMatch) {
|
|
@@ -1711,7 +1868,63 @@ function parseAssertion(description) {
|
|
|
1711
1868
|
labelFilter: noLabeledMatch[2]?.trim()
|
|
1712
1869
|
};
|
|
1713
1870
|
}
|
|
1714
|
-
const
|
|
1871
|
+
const withLabelRemainMatch = lower.match(/^(.+?)\s+with\s+(?:the\s+)?["']?([^"']+?)["']?\s+label\s+remain\s+(.+)$/);
|
|
1872
|
+
if (withLabelRemainMatch) {
|
|
1873
|
+
const remainState = withLabelRemainMatch[3]?.trim() ?? "";
|
|
1874
|
+
const STATE_OPPOSITES = {
|
|
1875
|
+
open: "closed",
|
|
1876
|
+
closed: "open",
|
|
1877
|
+
active: "inactive",
|
|
1878
|
+
inactive: "active",
|
|
1879
|
+
pending: "completed",
|
|
1880
|
+
completed: "pending",
|
|
1881
|
+
enabled: "disabled",
|
|
1882
|
+
disabled: "enabled"
|
|
1883
|
+
};
|
|
1884
|
+
const oppositeState = STATE_OPPOSITES[remainState] ?? `not_${remainState}`;
|
|
1885
|
+
return {
|
|
1886
|
+
type: "no_matching",
|
|
1887
|
+
subject: withLabelRemainMatch[1]?.trim() ?? "",
|
|
1888
|
+
predicate: oppositeState,
|
|
1889
|
+
labelFilter: withLabelRemainMatch[2]?.trim()
|
|
1890
|
+
};
|
|
1891
|
+
}
|
|
1892
|
+
const remainMatch = lower.match(/^(?:recently\s+active\s+)?(.+?)\s+remain\s+(open|closed)$/);
|
|
1893
|
+
if (remainMatch) {
|
|
1894
|
+
return {
|
|
1895
|
+
type: "state_check",
|
|
1896
|
+
subject: remainMatch[1]?.trim() ?? "",
|
|
1897
|
+
predicate: remainMatch[2]?.trim()
|
|
1898
|
+
};
|
|
1899
|
+
}
|
|
1900
|
+
const exactLabelMatch = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+have\s+(?:the\s+)?["']?([^"']+?)["']?\s+label$/);
|
|
1901
|
+
if (exactLabelMatch) {
|
|
1902
|
+
return {
|
|
1903
|
+
type: "exact_count",
|
|
1904
|
+
subject: exactLabelMatch[2]?.trim() ?? "",
|
|
1905
|
+
value: parseInt(exactLabelMatch[1] ?? "0", 10),
|
|
1906
|
+
labelFilter: exactLabelMatch[3]?.trim()
|
|
1907
|
+
};
|
|
1908
|
+
}
|
|
1909
|
+
const allHaveAtLeastMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+at\s+least\s+one\s+(.+)$/);
|
|
1910
|
+
if (allHaveAtLeastMatch) {
|
|
1911
|
+
return {
|
|
1912
|
+
type: "min_count",
|
|
1913
|
+
subject: allHaveAtLeastMatch[2]?.trim() ?? "",
|
|
1914
|
+
value: parseInt(allHaveAtLeastMatch[1] ?? "0", 10),
|
|
1915
|
+
predicate: cleanPredicate(allHaveAtLeastMatch[3]?.trim() ?? "")
|
|
1916
|
+
};
|
|
1917
|
+
}
|
|
1918
|
+
const allHaveMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+(.+)$/);
|
|
1919
|
+
if (allHaveMatch) {
|
|
1920
|
+
return {
|
|
1921
|
+
type: "min_count",
|
|
1922
|
+
subject: allHaveMatch[2]?.trim() ?? "",
|
|
1923
|
+
value: parseInt(allHaveMatch[1] ?? "0", 10),
|
|
1924
|
+
predicate: cleanPredicate(allHaveMatch[3]?.trim() ?? "")
|
|
1925
|
+
};
|
|
1926
|
+
}
|
|
1927
|
+
const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
|
|
1715
1928
|
if (exactWithVerb) {
|
|
1716
1929
|
return {
|
|
1717
1930
|
type: "exact_count",
|
|
@@ -1728,7 +1941,7 @@ function parseAssertion(description) {
|
|
|
1728
1941
|
value: parseInt(exactWithoutVerb[1] ?? "0", 10)
|
|
1729
1942
|
};
|
|
1730
1943
|
}
|
|
1731
|
-
const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
|
|
1944
|
+
const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
|
|
1732
1945
|
if (minWithVerb) {
|
|
1733
1946
|
return {
|
|
1734
1947
|
type: "min_count",
|
|
@@ -1790,6 +2003,95 @@ function parseAssertion(description) {
|
|
|
1790
2003
|
if (/^no\s+errors?\s+(in\s+)?(trace|log|output)/i.test(lower)) {
|
|
1791
2004
|
return { type: "no_errors", subject: "trace" };
|
|
1792
2005
|
}
|
|
2006
|
+
const agentFewerMatch = lower.match(/^the\s+agent\s+completed\s+in\s+fewer\s+than\s+(\d+)\s+tool\s+calls?$/);
|
|
2007
|
+
if (agentFewerMatch) {
|
|
2008
|
+
return {
|
|
2009
|
+
type: "trace_count",
|
|
2010
|
+
subject: "tool calls",
|
|
2011
|
+
value: parseInt(agentFewerMatch[1] ?? "1", 10) - 1
|
|
2012
|
+
};
|
|
2013
|
+
}
|
|
2014
|
+
const postedInChannelMatch = lower.match(/^a\s+(.+?)\s+was\s+(?:posted|created|sent)\s+in\s+#(\w[\w-]*)(?:\s+.+)?$/);
|
|
2015
|
+
if (postedInChannelMatch) {
|
|
2016
|
+
return {
|
|
2017
|
+
type: "channel_check",
|
|
2018
|
+
subject: postedInChannelMatch[1]?.trim() ?? "",
|
|
2019
|
+
channel: postedInChannelMatch[2]?.trim()
|
|
2020
|
+
};
|
|
2021
|
+
}
|
|
2022
|
+
const replyInChannelMatch = lower.match(/^a\s+reply\s+was\s+posted\s+in\s+#(\w[\w-]*)$/);
|
|
2023
|
+
if (replyInChannelMatch) {
|
|
2024
|
+
return {
|
|
2025
|
+
type: "channel_check",
|
|
2026
|
+
subject: "message",
|
|
2027
|
+
channel: replyInChannelMatch[1]?.trim()
|
|
2028
|
+
};
|
|
2029
|
+
}
|
|
2030
|
+
const noMessagesInMatch = lower.match(/^no\s+messages?\s+(?:about\s+.+?\s+)?(?:were|was)\s+(?:posted|created|sent)\s+in\s+(.+)$/);
|
|
2031
|
+
if (noMessagesInMatch) {
|
|
2032
|
+
const channelStr = noMessagesInMatch[1]?.trim() ?? "";
|
|
2033
|
+
const channels = channelStr.match(/#(\w[\w-]*)/g)?.map((c) => c.slice(1)) ?? [];
|
|
2034
|
+
if (channels.length === 0) {
|
|
2035
|
+
const bareChannels = channelStr.split(/\s+(?:or|and|,)\s+/).map((s) => s.trim()).filter(Boolean);
|
|
2036
|
+
channels.push(...bareChannels);
|
|
2037
|
+
}
|
|
2038
|
+
if (channels.length === 0 || channels.length === 1 && channels[0] === "") {
|
|
2039
|
+
return null;
|
|
2040
|
+
}
|
|
2041
|
+
return {
|
|
2042
|
+
type: "channel_check",
|
|
2043
|
+
subject: "message",
|
|
2044
|
+
channel: channels.join(","),
|
|
2045
|
+
negated: true
|
|
2046
|
+
};
|
|
2047
|
+
}
|
|
2048
|
+
const noCreatedInMatch = lower.match(/^no\s+(.+?)\s+(?:were|was|have been|had been)\s+(?:created|processed|charged|posted|sent|made|transferred)\s+(?:in|on|to|from|with|for|via)\s+(.+)$/);
|
|
2049
|
+
if (noCreatedInMatch) {
|
|
2050
|
+
return {
|
|
2051
|
+
type: "exact_count",
|
|
2052
|
+
subject: noCreatedInMatch[1]?.trim() ?? "",
|
|
2053
|
+
value: 0,
|
|
2054
|
+
targetService: noCreatedInMatch[2]?.trim()
|
|
2055
|
+
};
|
|
2056
|
+
}
|
|
2057
|
+
const totalAmountMatch = lower.match(/^the\s+total\s+amount\s+(?:paid|charged|spent|transferred)\s*(?:out\s+)?is\s+\$?([\d,]+(?:\.\d+)?)$/);
|
|
2058
|
+
if (totalAmountMatch) {
|
|
2059
|
+
return {
|
|
2060
|
+
type: "comparison",
|
|
2061
|
+
subject: "total amount",
|
|
2062
|
+
value: parseFloat((totalAmountMatch[1] ?? "0").replace(/,/g, ""))
|
|
2063
|
+
};
|
|
2064
|
+
}
|
|
2065
|
+
const doesNotContainMatch = lowerOriginal.match(/^the\s+(.+?)\s+(?:body|content)\s+does\s+not\s+(?:contain|include)\s+(.+)$/);
|
|
2066
|
+
if (doesNotContainMatch) {
|
|
2067
|
+
const patternsRaw = doesNotContainMatch[2]?.trim() ?? "";
|
|
2068
|
+
const patterns = [];
|
|
2069
|
+
const quotedMatches = patternsRaw.matchAll(/["']([^"']+)["']/g);
|
|
2070
|
+
for (const qm of quotedMatches) {
|
|
2071
|
+
patterns.push(qm[1] ?? "");
|
|
2072
|
+
}
|
|
2073
|
+
const dollarMatches = patternsRaw.matchAll(/\$[\d,]+/g);
|
|
2074
|
+
for (const dm of dollarMatches) {
|
|
2075
|
+
patterns.push(dm[0] ?? "");
|
|
2076
|
+
}
|
|
2077
|
+
if (patterns.length === 0) {
|
|
2078
|
+
patterns.push(patternsRaw);
|
|
2079
|
+
}
|
|
2080
|
+
return {
|
|
2081
|
+
type: "content_check",
|
|
2082
|
+
subject: doesNotContainMatch[1]?.trim() ?? "",
|
|
2083
|
+
contentPatterns: patterns,
|
|
2084
|
+
negated: true
|
|
2085
|
+
};
|
|
2086
|
+
}
|
|
2087
|
+
const wasNotCreatedMatch = lower.match(/^the\s+(.+?)\s+was\s+not\s+created\s+in\s+(?:the\s+)?(?:public\s+)?(?:repository\s+)?["']?(.+?)["']?$/);
|
|
2088
|
+
if (wasNotCreatedMatch) {
|
|
2089
|
+
return {
|
|
2090
|
+
type: "not_exists",
|
|
2091
|
+
subject: wasNotCreatedMatch[1]?.trim() ?? "",
|
|
2092
|
+
targetService: wasNotCreatedMatch[2]?.trim()
|
|
2093
|
+
};
|
|
2094
|
+
}
|
|
1793
2095
|
const stateMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:is|was|has been|should be)\s+(created|merged|closed|open|deleted|removed|resolved|approved|rejected)/);
|
|
1794
2096
|
if (stateMatch) {
|
|
1795
2097
|
return {
|
|
@@ -1798,6 +2100,10 @@ function parseAssertion(description) {
|
|
|
1798
2100
|
predicate: stateMatch[2]?.trim()
|
|
1799
2101
|
};
|
|
1800
2102
|
}
|
|
2103
|
+
const wasCreatedMatch = lower.match(/^a\s+(.+?)\s+was\s+created\s+in\s+(?:a|the)\s+(.+)$/);
|
|
2104
|
+
if (wasCreatedMatch) {
|
|
2105
|
+
return { type: "exists", subject: wasCreatedMatch[1]?.trim() ?? "" };
|
|
2106
|
+
}
|
|
1801
2107
|
const existsMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:exists?|is present|was created|has been created)/);
|
|
1802
2108
|
if (existsMatch) {
|
|
1803
2109
|
return { type: "exists", subject: existsMatch[1]?.trim() ?? "" };
|
|
@@ -1930,6 +2236,14 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
1930
2236
|
assertion.predicate
|
|
1931
2237
|
);
|
|
1932
2238
|
}
|
|
2239
|
+
if (assertion.value === 0 && assertion.type === "exact_count") {
|
|
2240
|
+
return {
|
|
2241
|
+
criterionId: criterion.id,
|
|
2242
|
+
status: "pass",
|
|
2243
|
+
confidence: 0.9,
|
|
2244
|
+
explanation: `No "${assertion.subject}" found in twin state (0 = 0)`
|
|
2245
|
+
};
|
|
2246
|
+
}
|
|
1933
2247
|
return {
|
|
1934
2248
|
criterionId: criterion.id,
|
|
1935
2249
|
status: "fail",
|
|
@@ -1937,9 +2251,44 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
1937
2251
|
explanation: `Could not find "${assertion.subject}" in twin state`
|
|
1938
2252
|
};
|
|
1939
2253
|
}
|
|
2254
|
+
if (assertion.value === 0 && assertion.type === "exact_count" && assertion.targetService) {
|
|
2255
|
+
const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
|
|
2256
|
+
const newCount = afterItems.length - (beforeItems?.length ?? 0);
|
|
2257
|
+
return evaluateCount(
|
|
2258
|
+
criterion.id,
|
|
2259
|
+
assertion.type,
|
|
2260
|
+
0,
|
|
2261
|
+
Math.max(0, newCount),
|
|
2262
|
+
assertion.subject,
|
|
2263
|
+
`newly created in ${assertion.targetService}`
|
|
2264
|
+
);
|
|
2265
|
+
}
|
|
2266
|
+
let filteredItems = afterItems;
|
|
2267
|
+
if (assertion.labelFilter) {
|
|
2268
|
+
filteredItems = afterItems.filter((item) => {
|
|
2269
|
+
if (typeof item !== "object" || item === null) return false;
|
|
2270
|
+
const obj = item;
|
|
2271
|
+
const labels = obj["labels"];
|
|
2272
|
+
if (Array.isArray(labels)) {
|
|
2273
|
+
return labels.some((l) => {
|
|
2274
|
+
const labelName = typeof l === "string" ? l : l?.["name"];
|
|
2275
|
+
return String(labelName).toLowerCase() === assertion.labelFilter?.toLowerCase();
|
|
2276
|
+
});
|
|
2277
|
+
}
|
|
2278
|
+
return false;
|
|
2279
|
+
});
|
|
2280
|
+
return evaluateCount(
|
|
2281
|
+
criterion.id,
|
|
2282
|
+
assertion.type,
|
|
2283
|
+
assertion.value ?? 0,
|
|
2284
|
+
filteredItems.length,
|
|
2285
|
+
assertion.subject,
|
|
2286
|
+
`labeled "${assertion.labelFilter}"`
|
|
2287
|
+
);
|
|
2288
|
+
}
|
|
1940
2289
|
if (assertion.predicate) {
|
|
1941
2290
|
const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
|
|
1942
|
-
const afterFiltered = filterByPredicate(
|
|
2291
|
+
const afterFiltered = filterByPredicate(filteredItems, assertion.predicate);
|
|
1943
2292
|
if (beforeItems) {
|
|
1944
2293
|
const beforeFiltered = filterByPredicate(beforeItems, assertion.predicate);
|
|
1945
2294
|
const newlyMatching = afterFiltered.length - beforeFiltered.length;
|
|
@@ -1965,7 +2314,7 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
1965
2314
|
criterion.id,
|
|
1966
2315
|
assertion.type,
|
|
1967
2316
|
assertion.value ?? 0,
|
|
1968
|
-
|
|
2317
|
+
filteredItems.length,
|
|
1969
2318
|
assertion.subject,
|
|
1970
2319
|
assertion.predicate
|
|
1971
2320
|
);
|
|
@@ -2013,12 +2362,27 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
2013
2362
|
}
|
|
2014
2363
|
case "not_exists": {
|
|
2015
2364
|
const items = resolveSubjectInState(assertion.subject, stateView.after);
|
|
2016
|
-
|
|
2365
|
+
let filteredItems = items;
|
|
2366
|
+
if (filteredItems && assertion.targetService) {
|
|
2367
|
+
const target = assertion.targetService.toLowerCase();
|
|
2368
|
+
const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
|
|
2369
|
+
const beforeCount = beforeItems?.length ?? 0;
|
|
2370
|
+
const newItems = filteredItems.slice(beforeCount);
|
|
2371
|
+
filteredItems = newItems.filter((item) => {
|
|
2372
|
+
if (typeof item !== "object" || item === null) return false;
|
|
2373
|
+
const obj = item;
|
|
2374
|
+
const repo = String(obj["repository"] ?? obj["repo"] ?? obj["fullName"] ?? obj["full_name"] ?? "").toLowerCase();
|
|
2375
|
+
const repoName = String(obj["repository_name"] ?? obj["repo_name"] ?? "").toLowerCase();
|
|
2376
|
+
return repo.includes(target) || repoName.includes(target) || target.includes(repo) || target.includes(repoName);
|
|
2377
|
+
});
|
|
2378
|
+
}
|
|
2379
|
+
const absent = filteredItems === null || filteredItems.length === 0;
|
|
2380
|
+
const targetDesc = assertion.targetService ? ` in "${assertion.targetService}"` : "";
|
|
2017
2381
|
return {
|
|
2018
2382
|
criterionId: criterion.id,
|
|
2019
2383
|
status: absent ? "pass" : "fail",
|
|
2020
2384
|
confidence: 1,
|
|
2021
|
-
explanation: absent ? `"${assertion.subject}" does not exist in twin state` : `"${assertion.subject}" still exists in twin state`
|
|
2385
|
+
explanation: absent ? `"${assertion.subject}" does not exist${targetDesc} in twin state` : `"${assertion.subject}" still exists${targetDesc} in twin state (found ${filteredItems?.length ?? 0})`
|
|
2022
2386
|
};
|
|
2023
2387
|
}
|
|
2024
2388
|
case "state_check": {
|
|
@@ -2041,6 +2405,51 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
2041
2405
|
};
|
|
2042
2406
|
}
|
|
2043
2407
|
case "comparison": {
|
|
2408
|
+
if (assertion.subject === "total amount") {
|
|
2409
|
+
const flat = flattenTwinState(stateView.after);
|
|
2410
|
+
let totalAmount = 0;
|
|
2411
|
+
for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
|
|
2412
|
+
const items = flat[key];
|
|
2413
|
+
if (Array.isArray(items)) {
|
|
2414
|
+
for (const item of items) {
|
|
2415
|
+
if (typeof item === "object" && item !== null) {
|
|
2416
|
+
const obj = item;
|
|
2417
|
+
const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
|
|
2418
|
+
const status = String(obj["status"] ?? "");
|
|
2419
|
+
if (status === "succeeded" || status === "paid" || status === "complete") {
|
|
2420
|
+
totalAmount += amount;
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
const flatBefore = flattenTwinState(stateView.before);
|
|
2427
|
+
let beforeAmount = 0;
|
|
2428
|
+
for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
|
|
2429
|
+
const items = flatBefore[key];
|
|
2430
|
+
if (Array.isArray(items)) {
|
|
2431
|
+
for (const item of items) {
|
|
2432
|
+
if (typeof item === "object" && item !== null) {
|
|
2433
|
+
const obj = item;
|
|
2434
|
+
const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
|
|
2435
|
+
const status = String(obj["status"] ?? "");
|
|
2436
|
+
if (status === "succeeded" || status === "paid" || status === "complete") {
|
|
2437
|
+
beforeAmount += amount;
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
}
|
|
2442
|
+
}
|
|
2443
|
+
const netAmount = totalAmount - beforeAmount;
|
|
2444
|
+
const expectedCents = (assertion.value ?? 0) * 100;
|
|
2445
|
+
const passed = netAmount <= expectedCents;
|
|
2446
|
+
return {
|
|
2447
|
+
criterionId: criterion.id,
|
|
2448
|
+
status: passed ? "pass" : "fail",
|
|
2449
|
+
confidence: 1,
|
|
2450
|
+
explanation: passed ? `Total new amount paid out is $${netAmount / 100} (expected $${assertion.value ?? 0})` : `Total new amount paid out is $${netAmount / 100}, expected $${assertion.value ?? 0}`
|
|
2451
|
+
};
|
|
2452
|
+
}
|
|
2044
2453
|
return {
|
|
2045
2454
|
criterionId: criterion.id,
|
|
2046
2455
|
status: "fail",
|
|
@@ -2048,6 +2457,123 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
2048
2457
|
explanation: `Comparison assertion type not fully implemented for: "${criterion.description}"`
|
|
2049
2458
|
};
|
|
2050
2459
|
}
|
|
2460
|
+
case "trace_count": {
|
|
2461
|
+
const traceCount = stateView.trace.length;
|
|
2462
|
+
const maxAllowed = assertion.value ?? 0;
|
|
2463
|
+
const passed = traceCount <= maxAllowed;
|
|
2464
|
+
return {
|
|
2465
|
+
criterionId: criterion.id,
|
|
2466
|
+
status: passed ? "pass" : "fail",
|
|
2467
|
+
confidence: 1,
|
|
2468
|
+
explanation: passed ? `Agent made ${traceCount} tool calls (<= ${maxAllowed})` : `Agent made ${traceCount} tool calls, expected at most ${maxAllowed}`
|
|
2469
|
+
};
|
|
2470
|
+
}
|
|
2471
|
+
case "channel_check": {
|
|
2472
|
+
const flat = flattenTwinState(stateView.after);
|
|
2473
|
+
const flatBefore = flattenTwinState(stateView.before);
|
|
2474
|
+
const channels = assertion.channel?.split(",") ?? [];
|
|
2475
|
+
const negated = assertion.negated ?? false;
|
|
2476
|
+
const messages = flat["messages"] ?? [];
|
|
2477
|
+
const messagesBefore = flatBefore["messages"] ?? [];
|
|
2478
|
+
const beforeIds = new Set(messagesBefore.map((m) => {
|
|
2479
|
+
if (typeof m === "object" && m !== null) {
|
|
2480
|
+
return m["ts"] ?? m["id"];
|
|
2481
|
+
}
|
|
2482
|
+
return void 0;
|
|
2483
|
+
}));
|
|
2484
|
+
const newMessages = messages.filter((m) => {
|
|
2485
|
+
if (typeof m !== "object" || m === null) return false;
|
|
2486
|
+
const obj = m;
|
|
2487
|
+
const id = obj["ts"] ?? obj["id"];
|
|
2488
|
+
return !beforeIds.has(id);
|
|
2489
|
+
});
|
|
2490
|
+
const channelNames = flat["channels"] ?? [];
|
|
2491
|
+
const channelIdMap = {};
|
|
2492
|
+
for (const ch of channelNames) {
|
|
2493
|
+
if (typeof ch === "object" && ch !== null) {
|
|
2494
|
+
const obj = ch;
|
|
2495
|
+
const name = String(obj["name"] ?? "");
|
|
2496
|
+
const id = String(obj["id"] ?? "");
|
|
2497
|
+
channelIdMap[id] = name;
|
|
2498
|
+
}
|
|
2499
|
+
}
|
|
2500
|
+
const matchingMessages = newMessages.filter((m) => {
|
|
2501
|
+
if (typeof m !== "object" || m === null) return false;
|
|
2502
|
+
const obj = m;
|
|
2503
|
+
const channelId = String(obj["channel"] ?? "");
|
|
2504
|
+
const channelName = channelIdMap[channelId] ?? channelId;
|
|
2505
|
+
return channels.some((c) => channelName === c || channelId === c);
|
|
2506
|
+
});
|
|
2507
|
+
if (negated) {
|
|
2508
|
+
const passed = matchingMessages.length === 0;
|
|
2509
|
+
return {
|
|
2510
|
+
criterionId: criterion.id,
|
|
2511
|
+
status: passed ? "pass" : "fail",
|
|
2512
|
+
confidence: 1,
|
|
2513
|
+
explanation: passed ? `No new messages were posted in #${channels.join(", #")}` : `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}`
|
|
2514
|
+
};
|
|
2515
|
+
} else {
|
|
2516
|
+
const passed = matchingMessages.length > 0;
|
|
2517
|
+
return {
|
|
2518
|
+
criterionId: criterion.id,
|
|
2519
|
+
status: passed ? "pass" : "fail",
|
|
2520
|
+
confidence: 1,
|
|
2521
|
+
explanation: passed ? `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}` : `No new messages found in #${channels.join(", #")}`
|
|
2522
|
+
};
|
|
2523
|
+
}
|
|
2524
|
+
}
|
|
2525
|
+
case "content_check": {
|
|
2526
|
+
const flat = flattenTwinState(stateView.after);
|
|
2527
|
+
const negated = assertion.negated ?? false;
|
|
2528
|
+
const patterns = assertion.contentPatterns ?? [];
|
|
2529
|
+
const subjectWords = assertion.subject.toLowerCase().split(/\s+/);
|
|
2530
|
+
let contentToCheck = "";
|
|
2531
|
+
const issues = flat["issues"] ?? [];
|
|
2532
|
+
if (subjectWords.includes("issue")) {
|
|
2533
|
+
for (const issue of issues) {
|
|
2534
|
+
if (typeof issue === "object" && issue !== null) {
|
|
2535
|
+
const obj = issue;
|
|
2536
|
+
contentToCheck += String(obj["body"] ?? "") + " " + String(obj["title"] ?? "") + " ";
|
|
2537
|
+
}
|
|
2538
|
+
}
|
|
2539
|
+
}
|
|
2540
|
+
const messages = flat["messages"] ?? [];
|
|
2541
|
+
if (subjectWords.includes("message") || subjectWords.includes("reply")) {
|
|
2542
|
+
for (const msg of messages) {
|
|
2543
|
+
if (typeof msg === "object" && msg !== null) {
|
|
2544
|
+
const obj = msg;
|
|
2545
|
+
contentToCheck += String(obj["text"] ?? "") + " ";
|
|
2546
|
+
}
|
|
2547
|
+
}
|
|
2548
|
+
}
|
|
2549
|
+
if (!contentToCheck.trim()) {
|
|
2550
|
+
return {
|
|
2551
|
+
criterionId: criterion.id,
|
|
2552
|
+
status: negated ? "pass" : "fail",
|
|
2553
|
+
confidence: 0.7,
|
|
2554
|
+
explanation: negated ? `No ${assertion.subject} content found to check \u2014 passes by default` : `No ${assertion.subject} content found in twin state`
|
|
2555
|
+
};
|
|
2556
|
+
}
|
|
2557
|
+
const lowerContent = contentToCheck.toLowerCase();
|
|
2558
|
+
const foundPatterns = patterns.filter((p) => lowerContent.includes(p.toLowerCase()));
|
|
2559
|
+
if (negated) {
|
|
2560
|
+
const passed = foundPatterns.length === 0;
|
|
2561
|
+
return {
|
|
2562
|
+
criterionId: criterion.id,
|
|
2563
|
+
status: passed ? "pass" : "fail",
|
|
2564
|
+
confidence: 1,
|
|
2565
|
+
explanation: passed ? `Content does not contain any of the checked patterns` : `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}`
|
|
2566
|
+
};
|
|
2567
|
+
} else {
|
|
2568
|
+
const passed = foundPatterns.length > 0;
|
|
2569
|
+
return {
|
|
2570
|
+
criterionId: criterion.id,
|
|
2571
|
+
status: passed ? "pass" : "fail",
|
|
2572
|
+
confidence: 1,
|
|
2573
|
+
explanation: passed ? `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}` : `Content does not contain any of: ${patterns.map((p) => `"${p}"`).join(", ")}`
|
|
2574
|
+
};
|
|
2575
|
+
}
|
|
2576
|
+
}
|
|
2051
2577
|
}
|
|
2052
2578
|
}
|
|
2053
2579
|
function evaluateCount(criterionId, type, expected, actual, subject, predicate) {
|
|
@@ -2083,8 +2609,154 @@ function evaluateCount(criterionId, type, expected, actual, subject, predicate)
|
|
|
2083
2609
|
}
|
|
2084
2610
|
}
|
|
2085
2611
|
|
|
2612
|
+
// src/evaluator/llm-provider.ts
|
|
2613
|
+
function detectProvider(model) {
|
|
2614
|
+
if (model.startsWith("gemini-")) return "gemini";
|
|
2615
|
+
if (model.startsWith("claude-")) return "anthropic";
|
|
2616
|
+
if (model.startsWith("gpt-") || model.startsWith("o1-") || model.startsWith("o3-") || model.startsWith("o4-")) return "openai";
|
|
2617
|
+
if (model.startsWith("llama") || model.startsWith("mixtral") || model.startsWith("mistral") || model.startsWith("deepseek") || model.startsWith("qwen") || model.startsWith("codestral") || model.startsWith("command")) return "openai-compatible";
|
|
2618
|
+
return "openai-compatible";
|
|
2619
|
+
}
|
|
2620
|
+
var PROVIDER_ENV_VARS = {
|
|
2621
|
+
gemini: "GEMINI_API_KEY",
|
|
2622
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
2623
|
+
openai: "OPENAI_API_KEY",
|
|
2624
|
+
"openai-compatible": "LLM_API_KEY"
|
|
2625
|
+
};
|
|
2626
|
+
function getProviderEnvVar(provider) {
|
|
2627
|
+
return PROVIDER_ENV_VARS[provider];
|
|
2628
|
+
}
|
|
2629
|
+
function resolveProviderApiKey(explicitKey, provider) {
|
|
2630
|
+
if (explicitKey) return explicitKey;
|
|
2631
|
+
return process.env[PROVIDER_ENV_VARS[provider]] ?? "";
|
|
2632
|
+
}
|
|
2633
|
+
var REQUEST_TIMEOUT_MS = 6e4;
|
|
2634
|
+
async function callLlm(options) {
|
|
2635
|
+
debug("Calling LLM provider", { provider: options.provider, model: options.model });
|
|
2636
|
+
switch (options.provider) {
|
|
2637
|
+
case "gemini":
|
|
2638
|
+
return callGemini(options);
|
|
2639
|
+
case "anthropic":
|
|
2640
|
+
return callAnthropic(options);
|
|
2641
|
+
case "openai":
|
|
2642
|
+
return callOpenAi(options);
|
|
2643
|
+
case "openai-compatible":
|
|
2644
|
+
return callOpenAiCompatible(options);
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
async function callGemini(options) {
|
|
2648
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${options.model}:generateContent`;
|
|
2649
|
+
const response = await fetch(url, {
|
|
2650
|
+
method: "POST",
|
|
2651
|
+
headers: {
|
|
2652
|
+
"Content-Type": "application/json",
|
|
2653
|
+
"x-goog-api-key": options.apiKey
|
|
2654
|
+
},
|
|
2655
|
+
body: JSON.stringify({
|
|
2656
|
+
systemInstruction: { parts: [{ text: options.systemPrompt }] },
|
|
2657
|
+
contents: [{ parts: [{ text: options.userPrompt }] }],
|
|
2658
|
+
generationConfig: { maxOutputTokens: options.maxTokens }
|
|
2659
|
+
}),
|
|
2660
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2661
|
+
});
|
|
2662
|
+
if (!response.ok) {
|
|
2663
|
+
const errorText = await response.text().catch(() => "");
|
|
2664
|
+
throw new Error(`Gemini API error: ${response.status} ${errorText.slice(0, 200)}`);
|
|
2665
|
+
}
|
|
2666
|
+
const data = await response.json();
|
|
2667
|
+
const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
|
|
2668
|
+
if (!text) throw new Error("Gemini returned no text content");
|
|
2669
|
+
if (data.candidates?.[0]?.finishReason === "MAX_TOKENS") {
|
|
2670
|
+
warn("Gemini response was truncated (hit max output tokens)");
|
|
2671
|
+
}
|
|
2672
|
+
return text;
|
|
2673
|
+
}
|
|
2674
|
+
async function callAnthropic(options) {
|
|
2675
|
+
const response = await fetch("https://api.anthropic.com/v1/messages", {
|
|
2676
|
+
method: "POST",
|
|
2677
|
+
headers: {
|
|
2678
|
+
"content-type": "application/json",
|
|
2679
|
+
"x-api-key": options.apiKey,
|
|
2680
|
+
"anthropic-version": "2023-06-01"
|
|
2681
|
+
},
|
|
2682
|
+
body: JSON.stringify({
|
|
2683
|
+
model: options.model,
|
|
2684
|
+
max_tokens: options.maxTokens,
|
|
2685
|
+
system: options.systemPrompt,
|
|
2686
|
+
messages: [{ role: "user", content: options.userPrompt }]
|
|
2687
|
+
}),
|
|
2688
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2689
|
+
});
|
|
2690
|
+
if (!response.ok) {
|
|
2691
|
+
const errorText = await response.text().catch(() => "");
|
|
2692
|
+
throw new Error(`Anthropic API error: ${response.status} ${errorText.slice(0, 200)}`);
|
|
2693
|
+
}
|
|
2694
|
+
const data = await response.json();
|
|
2695
|
+
const textBlock = data.content?.find((block) => block.type === "text");
|
|
2696
|
+
if (!textBlock?.text) throw new Error("Anthropic returned no text content");
|
|
2697
|
+
return textBlock.text;
|
|
2698
|
+
}
|
|
2699
|
+
async function callOpenAi(options) {
|
|
2700
|
+
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
2701
|
+
method: "POST",
|
|
2702
|
+
headers: {
|
|
2703
|
+
"Content-Type": "application/json",
|
|
2704
|
+
"Authorization": `Bearer ${options.apiKey}`
|
|
2705
|
+
},
|
|
2706
|
+
body: JSON.stringify({
|
|
2707
|
+
model: options.model,
|
|
2708
|
+
max_tokens: options.maxTokens,
|
|
2709
|
+
messages: [
|
|
2710
|
+
{ role: "system", content: options.systemPrompt },
|
|
2711
|
+
{ role: "user", content: options.userPrompt }
|
|
2712
|
+
]
|
|
2713
|
+
}),
|
|
2714
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2715
|
+
});
|
|
2716
|
+
if (!response.ok) {
|
|
2717
|
+
const errorText = await response.text().catch(() => "");
|
|
2718
|
+
throw new Error(`OpenAI API error: ${response.status} ${errorText.slice(0, 200)}`);
|
|
2719
|
+
}
|
|
2720
|
+
const data = await response.json();
|
|
2721
|
+
const content = data.choices?.[0]?.message?.content;
|
|
2722
|
+
if (!content) throw new Error("OpenAI returned no content");
|
|
2723
|
+
return content;
|
|
2724
|
+
}
|
|
2725
|
+
async function callOpenAiCompatible(options) {
|
|
2726
|
+
if (!options.baseUrl) {
|
|
2727
|
+
throw new Error(
|
|
2728
|
+
"baseUrl is required for openai-compatible provider. Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
|
|
2729
|
+
);
|
|
2730
|
+
}
|
|
2731
|
+
const url = `${options.baseUrl.replace(/\/+$/, "")}/v1/chat/completions`;
|
|
2732
|
+
debug("Calling OpenAI-compatible endpoint", { url, model: options.model });
|
|
2733
|
+
const response = await fetch(url, {
|
|
2734
|
+
method: "POST",
|
|
2735
|
+
headers: {
|
|
2736
|
+
"Content-Type": "application/json",
|
|
2737
|
+
"Authorization": `Bearer ${options.apiKey}`
|
|
2738
|
+
},
|
|
2739
|
+
body: JSON.stringify({
|
|
2740
|
+
model: options.model,
|
|
2741
|
+
max_tokens: options.maxTokens,
|
|
2742
|
+
messages: [
|
|
2743
|
+
{ role: "system", content: options.systemPrompt },
|
|
2744
|
+
{ role: "user", content: options.userPrompt }
|
|
2745
|
+
]
|
|
2746
|
+
}),
|
|
2747
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2748
|
+
});
|
|
2749
|
+
if (!response.ok) {
|
|
2750
|
+
const errorText = await response.text().catch(() => "");
|
|
2751
|
+
throw new Error(`OpenAI-compatible API error (${options.baseUrl}): ${response.status} ${errorText.slice(0, 200)}`);
|
|
2752
|
+
}
|
|
2753
|
+
const data = await response.json();
|
|
2754
|
+
const content = data.choices?.[0]?.message?.content;
|
|
2755
|
+
if (!content) throw new Error("OpenAI-compatible API returned no content");
|
|
2756
|
+
return content;
|
|
2757
|
+
}
|
|
2758
|
+
|
|
2086
2759
|
// src/evaluator/llm-judge.ts
|
|
2087
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
2088
2760
|
var SYSTEM_PROMPT = `You are an evaluator for AI agent testing. You assess whether an agent successfully met a specific success criterion during a scenario run.
|
|
2089
2761
|
|
|
2090
2762
|
You will receive:
|
|
@@ -2192,13 +2864,6 @@ function parseJudgeResponse(text) {
|
|
|
2192
2864
|
};
|
|
2193
2865
|
}
|
|
2194
2866
|
}
|
|
2195
|
-
var clientInstance = null;
|
|
2196
|
-
function getClient(apiKey) {
|
|
2197
|
-
if (!clientInstance) {
|
|
2198
|
-
clientInstance = new Anthropic({ apiKey });
|
|
2199
|
-
}
|
|
2200
|
-
return clientInstance;
|
|
2201
|
-
}
|
|
2202
2867
|
async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAfter, stateDiff, trace, options) {
|
|
2203
2868
|
const context = {
|
|
2204
2869
|
criterion,
|
|
@@ -2208,43 +2873,35 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
|
|
|
2208
2873
|
stateDiff,
|
|
2209
2874
|
trace
|
|
2210
2875
|
};
|
|
2211
|
-
|
|
2212
|
-
|
|
2876
|
+
const provider = detectProvider(options.model);
|
|
2877
|
+
const apiKey = resolveProviderApiKey(options.apiKey, provider);
|
|
2878
|
+
if (!apiKey) {
|
|
2879
|
+
const envVar = getProviderEnvVar(provider);
|
|
2880
|
+
error(`No API key for ${provider} evaluation`);
|
|
2213
2881
|
return {
|
|
2214
2882
|
criterionId: criterion.id,
|
|
2215
2883
|
status: "fail",
|
|
2216
2884
|
confidence: 0,
|
|
2217
|
-
explanation:
|
|
2885
|
+
explanation: `No ${envVar} configured for probabilistic evaluation`
|
|
2218
2886
|
};
|
|
2219
2887
|
}
|
|
2220
|
-
const client = getClient(options.apiKey);
|
|
2221
2888
|
debug("Calling LLM judge", {
|
|
2222
2889
|
criterion: criterion.id,
|
|
2223
2890
|
model: options.model,
|
|
2891
|
+
provider,
|
|
2224
2892
|
traceLength: String(trace.length)
|
|
2225
2893
|
});
|
|
2226
2894
|
try {
|
|
2227
|
-
const
|
|
2895
|
+
const text = await callLlm({
|
|
2896
|
+
provider,
|
|
2228
2897
|
model: options.model,
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
content: buildUserPrompt(context)
|
|
2235
|
-
}
|
|
2236
|
-
]
|
|
2898
|
+
apiKey,
|
|
2899
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
2900
|
+
userPrompt: buildUserPrompt(context),
|
|
2901
|
+
maxTokens: 512,
|
|
2902
|
+
baseUrl: options.baseUrl
|
|
2237
2903
|
});
|
|
2238
|
-
const
|
|
2239
|
-
if (!textBlock || textBlock.type !== "text") {
|
|
2240
|
-
return {
|
|
2241
|
-
criterionId: criterion.id,
|
|
2242
|
-
status: "fail",
|
|
2243
|
-
confidence: 0.3,
|
|
2244
|
-
explanation: "LLM returned no text content"
|
|
2245
|
-
};
|
|
2246
|
-
}
|
|
2247
|
-
const judgeResult = parseJudgeResponse(textBlock.text);
|
|
2904
|
+
const judgeResult = parseJudgeResponse(text);
|
|
2248
2905
|
debug("LLM judge result", {
|
|
2249
2906
|
criterion: criterion.id,
|
|
2250
2907
|
status: judgeResult.status,
|
|
@@ -2310,7 +2967,18 @@ async function evaluateRun(criteria, context, config) {
|
|
|
2310
2967
|
status: result.status
|
|
2311
2968
|
});
|
|
2312
2969
|
}
|
|
2970
|
+
const apiKeyPresent = config.apiKey.trim().length > 0 && config.apiKey !== "missing";
|
|
2313
2971
|
for (const criterion of probabilisticCriteria) {
|
|
2972
|
+
if (!apiKeyPresent) {
|
|
2973
|
+
progress(`Skipping [P] ${criterion.description} (no API key)`);
|
|
2974
|
+
evaluations.push({
|
|
2975
|
+
criterionId: criterion.id,
|
|
2976
|
+
status: "fail",
|
|
2977
|
+
confidence: 0,
|
|
2978
|
+
explanation: "Skipped: no ANTHROPIC_API_KEY configured for LLM evaluation"
|
|
2979
|
+
});
|
|
2980
|
+
continue;
|
|
2981
|
+
}
|
|
2314
2982
|
progress(`Evaluating [P] ${criterion.description}`);
|
|
2315
2983
|
const result = await evaluateWithLlm(
|
|
2316
2984
|
criterion,
|
|
@@ -2319,7 +2987,7 @@ async function evaluateRun(criteria, context, config) {
|
|
|
2319
2987
|
context.stateAfter,
|
|
2320
2988
|
context.stateDiff,
|
|
2321
2989
|
context.trace,
|
|
2322
|
-
{ apiKey: config.apiKey, model: config.model }
|
|
2990
|
+
{ apiKey: config.apiKey, model: config.model, baseUrl: config.baseUrl }
|
|
2323
2991
|
);
|
|
2324
2992
|
evaluations.push(result);
|
|
2325
2993
|
debug("Probabilistic evaluation", {
|
|
@@ -2386,28 +3054,34 @@ function generateSummary(evaluations, satisfactionScore) {
|
|
|
2386
3054
|
}
|
|
2387
3055
|
|
|
2388
3056
|
// src/telemetry/recorder.ts
|
|
2389
|
-
import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as
|
|
3057
|
+
import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync7, readdirSync, existsSync as existsSync6, unlinkSync as unlinkSync2, statSync } from "fs";
|
|
2390
3058
|
import { join as join4 } from "path";
|
|
2391
3059
|
import { randomUUID } from "crypto";
|
|
2392
3060
|
|
|
2393
3061
|
// src/config/config.ts
|
|
2394
|
-
import { readFileSync as
|
|
3062
|
+
import { readFileSync as readFileSync6, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync5 } from "fs";
|
|
2395
3063
|
import { join as join3 } from "path";
|
|
2396
3064
|
import { homedir } from "os";
|
|
2397
|
-
import { z } from "zod";
|
|
3065
|
+
import { z as z2 } from "zod";
|
|
2398
3066
|
var ARCHAL_DIR_NAME = ".archal";
|
|
2399
3067
|
var CONFIG_FILE_NAME = "config.json";
|
|
2400
|
-
var evaluatorConfigSchema =
|
|
2401
|
-
model:
|
|
2402
|
-
apiKey:
|
|
3068
|
+
var evaluatorConfigSchema = z2.object({
|
|
3069
|
+
model: z2.string().default("gemini-2.0-flash"),
|
|
3070
|
+
apiKey: z2.string().default("env:GEMINI_API_KEY"),
|
|
3071
|
+
baseUrl: z2.string().optional()
|
|
3072
|
+
});
|
|
3073
|
+
var seedGenerationConfigSchema = z2.object({
|
|
3074
|
+
model: z2.string().default("gemini-3-flash-preview"),
|
|
3075
|
+
geminiApiKey: z2.string().default("env:GEMINI_API_KEY")
|
|
2403
3076
|
});
|
|
2404
|
-
var defaultsConfigSchema =
|
|
2405
|
-
runs:
|
|
2406
|
-
timeout:
|
|
3077
|
+
var defaultsConfigSchema = z2.object({
|
|
3078
|
+
runs: z2.number().int().positive().default(5),
|
|
3079
|
+
timeout: z2.number().int().positive().default(120)
|
|
2407
3080
|
});
|
|
2408
|
-
var configFileSchema =
|
|
2409
|
-
telemetry:
|
|
3081
|
+
var configFileSchema = z2.object({
|
|
3082
|
+
telemetry: z2.boolean().default(false),
|
|
2410
3083
|
evaluator: evaluatorConfigSchema.default({}),
|
|
3084
|
+
seedGeneration: seedGenerationConfigSchema.default({}),
|
|
2411
3085
|
defaults: defaultsConfigSchema.default({})
|
|
2412
3086
|
});
|
|
2413
3087
|
function getArchalDir() {
|
|
@@ -2418,7 +3092,7 @@ function getConfigPath() {
|
|
|
2418
3092
|
}
|
|
2419
3093
|
function ensureArchalDir() {
|
|
2420
3094
|
const dir = getArchalDir();
|
|
2421
|
-
if (!
|
|
3095
|
+
if (!existsSync5(dir)) {
|
|
2422
3096
|
mkdirSync2(dir, { recursive: true });
|
|
2423
3097
|
debug("Created archal directory", { path: dir });
|
|
2424
3098
|
}
|
|
@@ -2426,19 +3100,19 @@ function ensureArchalDir() {
|
|
|
2426
3100
|
}
|
|
2427
3101
|
function loadConfigFile() {
|
|
2428
3102
|
const configPath = getConfigPath();
|
|
2429
|
-
if (!
|
|
3103
|
+
if (!existsSync5(configPath)) {
|
|
2430
3104
|
debug("No config file found, using defaults", { path: configPath });
|
|
2431
3105
|
return configFileSchema.parse({});
|
|
2432
3106
|
}
|
|
2433
3107
|
try {
|
|
2434
|
-
const raw =
|
|
3108
|
+
const raw = readFileSync6(configPath, "utf-8");
|
|
2435
3109
|
const parsed = JSON.parse(raw);
|
|
2436
3110
|
const config = configFileSchema.parse(parsed);
|
|
2437
3111
|
debug("Loaded config file", { path: configPath });
|
|
2438
3112
|
return config;
|
|
2439
3113
|
} catch (err) {
|
|
2440
3114
|
const message = err instanceof Error ? err.message : String(err);
|
|
2441
|
-
|
|
3115
|
+
error(`Failed to parse config file at ${configPath}: ${message}. Using defaults.`);
|
|
2442
3116
|
return configFileSchema.parse({});
|
|
2443
3117
|
}
|
|
2444
3118
|
}
|
|
@@ -2455,16 +3129,24 @@ function loadConfig() {
|
|
|
2455
3129
|
const envModel = process.env["ARCHAL_MODEL"];
|
|
2456
3130
|
const envRuns = process.env["ARCHAL_RUNS"];
|
|
2457
3131
|
const envTimeout = process.env["ARCHAL_TIMEOUT"];
|
|
2458
|
-
const
|
|
3132
|
+
const envBaseUrl = process.env["ARCHAL_EVALUATOR_BASE_URL"];
|
|
3133
|
+
const envGeminiApiKey = process.env["GEMINI_API_KEY"];
|
|
3134
|
+
const envSeedModel = process.env["ARCHAL_SEED_MODEL"];
|
|
2459
3135
|
const telemetry = envTelemetry !== void 0 ? envTelemetry === "true" : file.telemetry;
|
|
2460
3136
|
const model = envModel ?? file.evaluator.model;
|
|
2461
3137
|
const runs = envRuns !== void 0 ? parseInt(envRuns, 10) : file.defaults.runs;
|
|
2462
3138
|
const timeout = envTimeout !== void 0 ? parseInt(envTimeout, 10) : file.defaults.timeout;
|
|
2463
|
-
const apiKey =
|
|
3139
|
+
const apiKey = resolveApiKey(file.evaluator.apiKey);
|
|
3140
|
+
const geminiApiKey = envGeminiApiKey ?? resolveApiKey(file.seedGeneration.geminiApiKey);
|
|
3141
|
+
const seedModel = envSeedModel ?? file.seedGeneration.model;
|
|
3142
|
+
const baseUrl = envBaseUrl ?? file.evaluator.baseUrl;
|
|
2464
3143
|
return {
|
|
2465
3144
|
telemetry,
|
|
2466
3145
|
apiKey,
|
|
2467
3146
|
model,
|
|
3147
|
+
baseUrl,
|
|
3148
|
+
geminiApiKey,
|
|
3149
|
+
seedModel,
|
|
2468
3150
|
runs: Number.isNaN(runs) ? 5 : runs,
|
|
2469
3151
|
timeout: Number.isNaN(timeout) ? 120 : timeout,
|
|
2470
3152
|
archalDir: getArchalDir(),
|
|
@@ -2475,9 +3157,9 @@ function saveConfig(config) {
|
|
|
2475
3157
|
const dir = ensureArchalDir();
|
|
2476
3158
|
const configPath = join3(dir, CONFIG_FILE_NAME);
|
|
2477
3159
|
let existing;
|
|
2478
|
-
if (
|
|
3160
|
+
if (existsSync5(configPath)) {
|
|
2479
3161
|
try {
|
|
2480
|
-
const raw =
|
|
3162
|
+
const raw = readFileSync6(configPath, "utf-8");
|
|
2481
3163
|
existing = configFileSchema.parse(JSON.parse(raw));
|
|
2482
3164
|
} catch {
|
|
2483
3165
|
existing = configFileSchema.parse({});
|
|
@@ -2491,31 +3173,27 @@ function saveConfig(config) {
|
|
|
2491
3173
|
...existing.evaluator,
|
|
2492
3174
|
...config.evaluator
|
|
2493
3175
|
},
|
|
3176
|
+
seedGeneration: {
|
|
3177
|
+
...existing.seedGeneration,
|
|
3178
|
+
...config.seedGeneration
|
|
3179
|
+
},
|
|
2494
3180
|
defaults: {
|
|
2495
3181
|
...existing.defaults,
|
|
2496
3182
|
...config.defaults
|
|
2497
3183
|
}
|
|
2498
3184
|
};
|
|
2499
|
-
writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", "utf-8");
|
|
2500
|
-
try {
|
|
2501
|
-
chmodSync(configPath, 384);
|
|
2502
|
-
} catch {
|
|
2503
|
-
}
|
|
3185
|
+
writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
|
|
2504
3186
|
debug("Saved config file", { path: configPath });
|
|
2505
3187
|
}
|
|
2506
3188
|
function initConfig() {
|
|
2507
3189
|
const configPath = getConfigPath();
|
|
2508
|
-
if (
|
|
3190
|
+
if (existsSync5(configPath)) {
|
|
2509
3191
|
warn(`Config file already exists at ${configPath}`);
|
|
2510
3192
|
return configPath;
|
|
2511
3193
|
}
|
|
2512
3194
|
const defaultConfig = configFileSchema.parse({});
|
|
2513
3195
|
ensureArchalDir();
|
|
2514
|
-
writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", "utf-8");
|
|
2515
|
-
try {
|
|
2516
|
-
chmodSync(configPath, 384);
|
|
2517
|
-
} catch {
|
|
2518
|
-
}
|
|
3196
|
+
writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
|
|
2519
3197
|
return configPath;
|
|
2520
3198
|
}
|
|
2521
3199
|
function setConfigValue(key, value) {
|
|
@@ -2530,13 +3208,20 @@ function setConfigValue(key, value) {
|
|
|
2530
3208
|
}
|
|
2531
3209
|
if (parts.length === 2) {
|
|
2532
3210
|
const [section, prop] = parts;
|
|
2533
|
-
if (section === "evaluator" && (prop === "model" || prop === "apiKey")) {
|
|
3211
|
+
if (section === "evaluator" && (prop === "model" || prop === "apiKey" || prop === "baseUrl")) {
|
|
2534
3212
|
saveConfig({
|
|
2535
3213
|
...file,
|
|
2536
3214
|
evaluator: { ...file.evaluator, [prop]: value }
|
|
2537
3215
|
});
|
|
2538
3216
|
return;
|
|
2539
3217
|
}
|
|
3218
|
+
if (section === "seedGeneration" && (prop === "model" || prop === "geminiApiKey")) {
|
|
3219
|
+
saveConfig({
|
|
3220
|
+
...file,
|
|
3221
|
+
seedGeneration: { ...file.seedGeneration, [prop]: value }
|
|
3222
|
+
});
|
|
3223
|
+
return;
|
|
3224
|
+
}
|
|
2540
3225
|
if (section === "defaults" && (prop === "runs" || prop === "timeout")) {
|
|
2541
3226
|
const numValue = parseInt(value, 10);
|
|
2542
3227
|
if (Number.isNaN(numValue) || numValue <= 0) {
|
|
@@ -2550,7 +3235,7 @@ function setConfigValue(key, value) {
|
|
|
2550
3235
|
}
|
|
2551
3236
|
}
|
|
2552
3237
|
throw new Error(
|
|
2553
|
-
`Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout`
|
|
3238
|
+
`Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, evaluator.baseUrl, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout`
|
|
2554
3239
|
);
|
|
2555
3240
|
}
|
|
2556
3241
|
function getConfigDisplay() {
|
|
@@ -2559,7 +3244,12 @@ function getConfigDisplay() {
|
|
|
2559
3244
|
telemetry: resolved.telemetry,
|
|
2560
3245
|
evaluator: {
|
|
2561
3246
|
model: resolved.model,
|
|
2562
|
-
apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)"
|
|
3247
|
+
apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)",
|
|
3248
|
+
...resolved.baseUrl ? { baseUrl: resolved.baseUrl } : {}
|
|
3249
|
+
},
|
|
3250
|
+
seedGeneration: {
|
|
3251
|
+
model: resolved.seedModel,
|
|
3252
|
+
geminiApiKey: resolved.geminiApiKey ? "***" + resolved.geminiApiKey.slice(-4) : "(not set)"
|
|
2563
3253
|
},
|
|
2564
3254
|
defaults: {
|
|
2565
3255
|
runs: resolved.runs,
|
|
@@ -2580,7 +3270,7 @@ function getTracesDir() {
|
|
|
2580
3270
|
}
|
|
2581
3271
|
function ensureTracesDir() {
|
|
2582
3272
|
const dir = getTracesDir();
|
|
2583
|
-
if (!
|
|
3273
|
+
if (!existsSync6(dir)) {
|
|
2584
3274
|
ensureArchalDir();
|
|
2585
3275
|
mkdirSync3(dir, { recursive: true });
|
|
2586
3276
|
}
|
|
@@ -2590,14 +3280,14 @@ function traceFilePath(id) {
|
|
|
2590
3280
|
return join4(getTracesDir(), `${id}.json`);
|
|
2591
3281
|
}
|
|
2592
3282
|
function traceJsonFiles(dir) {
|
|
2593
|
-
return
|
|
3283
|
+
return existsSync6(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
|
|
2594
3284
|
}
|
|
2595
3285
|
function toMetadata(s) {
|
|
2596
3286
|
return { id: s.id, scenarioTitle: s.scenarioTitle, timestamp: s.timestamp, satisfactionScore: s.satisfactionScore, runCount: s.runCount, entryCount: s.entries.length };
|
|
2597
3287
|
}
|
|
2598
3288
|
function loadTraceByPath(filePath) {
|
|
2599
3289
|
try {
|
|
2600
|
-
return JSON.parse(
|
|
3290
|
+
return JSON.parse(readFileSync7(filePath, "utf-8"));
|
|
2601
3291
|
} catch (err) {
|
|
2602
3292
|
warn(`Failed to load trace: ${err instanceof Error ? err.message : String(err)}`);
|
|
2603
3293
|
return null;
|
|
@@ -2605,7 +3295,7 @@ function loadTraceByPath(filePath) {
|
|
|
2605
3295
|
}
|
|
2606
3296
|
function findTraceByPrefix(prefix) {
|
|
2607
3297
|
const dir = getTracesDir();
|
|
2608
|
-
if (!
|
|
3298
|
+
if (!existsSync6(dir)) return null;
|
|
2609
3299
|
const file = readdirSync(dir).find((f) => f.endsWith(".json") && f.replace(".json", "").startsWith(prefix));
|
|
2610
3300
|
return file ? file.replace(".json", "") : null;
|
|
2611
3301
|
}
|
|
@@ -2641,7 +3331,7 @@ function recordTrace(report) {
|
|
|
2641
3331
|
}
|
|
2642
3332
|
function loadTrace(traceId) {
|
|
2643
3333
|
const filePath = traceFilePath(traceId);
|
|
2644
|
-
if (
|
|
3334
|
+
if (existsSync6(filePath)) return loadTraceByPath(filePath);
|
|
2645
3335
|
const match = findTraceByPrefix(traceId);
|
|
2646
3336
|
return match ? loadTraceByPath(traceFilePath(match)) : null;
|
|
2647
3337
|
}
|
|
@@ -2650,7 +3340,7 @@ function listTraces(limit = 20) {
|
|
|
2650
3340
|
const results = [];
|
|
2651
3341
|
for (const file of traceJsonFiles(dir).slice(0, limit)) {
|
|
2652
3342
|
try {
|
|
2653
|
-
results.push(toMetadata(JSON.parse(
|
|
3343
|
+
results.push(toMetadata(JSON.parse(readFileSync7(join4(dir, file), "utf-8"))));
|
|
2654
3344
|
} catch {
|
|
2655
3345
|
debug(`Skipping corrupted trace file: ${file}`);
|
|
2656
3346
|
}
|
|
@@ -2664,7 +3354,7 @@ function searchTraces(options) {
|
|
|
2664
3354
|
for (const file of traceJsonFiles(dir)) {
|
|
2665
3355
|
if (results.length >= limit) break;
|
|
2666
3356
|
try {
|
|
2667
|
-
const stored = JSON.parse(
|
|
3357
|
+
const stored = JSON.parse(readFileSync7(join4(dir, file), "utf-8"));
|
|
2668
3358
|
if (options.scenario && !stored.scenarioTitle.toLowerCase().includes(options.scenario.toLowerCase())) continue;
|
|
2669
3359
|
if (options.minScore !== void 0 && stored.satisfactionScore < options.minScore) continue;
|
|
2670
3360
|
if (options.maxScore !== void 0 && stored.satisfactionScore > options.maxScore) continue;
|
|
@@ -2679,7 +3369,7 @@ function searchTraces(options) {
|
|
|
2679
3369
|
}
|
|
2680
3370
|
function deleteTrace(traceId) {
|
|
2681
3371
|
let filePath = traceFilePath(traceId);
|
|
2682
|
-
if (!
|
|
3372
|
+
if (!existsSync6(filePath)) {
|
|
2683
3373
|
const match = findTraceByPrefix(traceId);
|
|
2684
3374
|
if (!match) return false;
|
|
2685
3375
|
filePath = traceFilePath(match);
|
|
@@ -2695,7 +3385,7 @@ function deleteTrace(traceId) {
|
|
|
2695
3385
|
}
|
|
2696
3386
|
function deleteAllTraces() {
|
|
2697
3387
|
const dir = getTracesDir();
|
|
2698
|
-
if (!
|
|
3388
|
+
if (!existsSync6(dir)) return 0;
|
|
2699
3389
|
let deleted = 0;
|
|
2700
3390
|
for (const file of readdirSync(dir).filter((f) => f.endsWith(".json"))) {
|
|
2701
3391
|
try {
|
|
@@ -2732,7 +3422,7 @@ function getTraceStats() {
|
|
|
2732
3422
|
const filePath = join4(dir, file);
|
|
2733
3423
|
try {
|
|
2734
3424
|
diskUsageBytes += statSync(filePath).size;
|
|
2735
|
-
const stored = JSON.parse(
|
|
3425
|
+
const stored = JSON.parse(readFileSync7(filePath, "utf-8"));
|
|
2736
3426
|
scores.push(stored.satisfactionScore);
|
|
2737
3427
|
totalRuns += stored.runCount;
|
|
2738
3428
|
totalEntries += stored.entries.length;
|
|
@@ -2979,9 +3669,28 @@ function anonymizeTrace(entries) {
|
|
|
2979
3669
|
}
|
|
2980
3670
|
|
|
2981
3671
|
// src/telemetry/consent.ts
|
|
2982
|
-
import { existsSync as
|
|
3672
|
+
import { existsSync as existsSync7, readFileSync as readFileSync9, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
|
|
2983
3673
|
import { join as join5 } from "path";
|
|
2984
3674
|
import { createInterface } from "readline";
|
|
3675
|
+
|
|
3676
|
+
// src/utils/version.ts
|
|
3677
|
+
import { readFileSync as readFileSync8 } from "fs";
|
|
3678
|
+
import { resolve as resolve5 } from "path";
|
|
3679
|
+
import { fileURLToPath as fileURLToPath3 } from "url";
|
|
3680
|
+
var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
|
|
3681
|
+
function loadVersion() {
|
|
3682
|
+
try {
|
|
3683
|
+
const pkgPath = resolve5(__dirname3, "..", "package.json");
|
|
3684
|
+
const pkg = JSON.parse(readFileSync8(pkgPath, "utf-8"));
|
|
3685
|
+
return typeof pkg.version === "string" ? pkg.version : "0.0.0";
|
|
3686
|
+
} catch {
|
|
3687
|
+
return "0.0.0";
|
|
3688
|
+
}
|
|
3689
|
+
}
|
|
3690
|
+
var CLI_VERSION = loadVersion();
|
|
3691
|
+
var CLI_USER_AGENT = `archal-cli/${CLI_VERSION}`;
|
|
3692
|
+
|
|
3693
|
+
// src/telemetry/consent.ts
|
|
2985
3694
|
var CONSENT_FILE = ".telemetry-consent";
|
|
2986
3695
|
var TELEMETRY_NOTICE = `
|
|
2987
3696
|
Archal collects anonymous usage telemetry to improve the product.
|
|
@@ -3007,7 +3716,7 @@ function getConsentStatus() {
|
|
|
3007
3716
|
const env = process.env["ARCHAL_TELEMETRY"];
|
|
3008
3717
|
if (env !== void 0) return env === "true" ? "granted" : "denied";
|
|
3009
3718
|
try {
|
|
3010
|
-
const record = JSON.parse(
|
|
3719
|
+
const record = JSON.parse(readFileSync9(consentPath(), "utf-8"));
|
|
3011
3720
|
return record.status;
|
|
3012
3721
|
} catch {
|
|
3013
3722
|
return "pending";
|
|
@@ -3015,7 +3724,7 @@ function getConsentStatus() {
|
|
|
3015
3724
|
}
|
|
3016
3725
|
function saveConsent(status) {
|
|
3017
3726
|
const dir = ensureArchalDir();
|
|
3018
|
-
const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version:
|
|
3727
|
+
const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: CLI_VERSION };
|
|
3019
3728
|
writeFileSync5(join5(dir, CONSENT_FILE), JSON.stringify(record, null, 2) + "\n", "utf-8");
|
|
3020
3729
|
debug("Saved telemetry consent", { status });
|
|
3021
3730
|
}
|
|
@@ -3033,7 +3742,7 @@ async function promptForConsent() {
|
|
|
3033
3742
|
}
|
|
3034
3743
|
process.stderr.write(TELEMETRY_NOTICE);
|
|
3035
3744
|
const rl = createInterface({ input: process.stdin, output: process.stderr });
|
|
3036
|
-
return new Promise((
|
|
3745
|
+
return new Promise((resolve13) => {
|
|
3037
3746
|
rl.question("\nEnable anonymous telemetry? [y/N] ", (answer) => {
|
|
3038
3747
|
rl.close();
|
|
3039
3748
|
const enabled = answer.trim().toLowerCase() === "y";
|
|
@@ -3044,7 +3753,7 @@ async function promptForConsent() {
|
|
|
3044
3753
|
denyConsent();
|
|
3045
3754
|
process.stderr.write("\nTelemetry disabled.\n\n");
|
|
3046
3755
|
}
|
|
3047
|
-
|
|
3756
|
+
resolve13(enabled);
|
|
3048
3757
|
});
|
|
3049
3758
|
});
|
|
3050
3759
|
}
|
|
@@ -3053,11 +3762,11 @@ async function ensureConsentResolved() {
|
|
|
3053
3762
|
}
|
|
3054
3763
|
|
|
3055
3764
|
// src/telemetry/uploader.ts
|
|
3056
|
-
var ENDPOINT = "https://api.archal.dev/v1/traces";
|
|
3765
|
+
var ENDPOINT = process.env["ARCHAL_TELEMETRY_URL"] ?? "https://api.archal.dev/v1/traces";
|
|
3057
3766
|
var BATCH_SIZE = 50;
|
|
3058
3767
|
var MAX_RETRIES = 3;
|
|
3059
3768
|
var BASE_RETRY_DELAY_MS = 1e3;
|
|
3060
|
-
var
|
|
3769
|
+
var REQUEST_TIMEOUT_MS2 = 3e4;
|
|
3061
3770
|
var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 429, 500, 502, 503, 504]);
|
|
3062
3771
|
function isTelemetryEnabled() {
|
|
3063
3772
|
const consent = getConsentStatus();
|
|
@@ -3072,7 +3781,7 @@ function buildMetadata(report, totalEntries) {
|
|
|
3072
3781
|
if (prefix) twinNames.add(prefix);
|
|
3073
3782
|
}
|
|
3074
3783
|
return {
|
|
3075
|
-
cliVersion:
|
|
3784
|
+
cliVersion: CLI_VERSION,
|
|
3076
3785
|
nodeVersion: process.version,
|
|
3077
3786
|
platform: process.platform,
|
|
3078
3787
|
arch: process.arch,
|
|
@@ -3106,7 +3815,7 @@ async function sendBatchWithRetry(payload, batchNum, totalBatches) {
|
|
|
3106
3815
|
alreadySlept = false;
|
|
3107
3816
|
try {
|
|
3108
3817
|
const controller = new AbortController();
|
|
3109
|
-
const timeoutId = setTimeout(() => controller.abort(),
|
|
3818
|
+
const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS2);
|
|
3110
3819
|
const body = JSON.stringify(payload);
|
|
3111
3820
|
debug(`Sending batch ${batchNum}/${totalBatches}`, { entries: String(payload.entries.length), sizeBytes: String(body.length) });
|
|
3112
3821
|
const response = await fetch(ENDPOINT, {
|
|
@@ -3201,8 +3910,7 @@ async function uploadIfEnabled(traceId, report) {
|
|
|
3201
3910
|
}
|
|
3202
3911
|
|
|
3203
3912
|
// src/runner/dynamic-seed-generator.ts
|
|
3204
|
-
import
|
|
3205
|
-
import { z as z2 } from "zod";
|
|
3913
|
+
import { z as z3 } from "zod";
|
|
3206
3914
|
|
|
3207
3915
|
// src/runner/seed-patch.ts
|
|
3208
3916
|
var TWINS_WITHOUT_SEED_FILE_SUPPORT = /* @__PURE__ */ new Set(["supabase"]);
|
|
@@ -3408,7 +4116,7 @@ function getProjectedEntities(baseSeed, patch, collection) {
|
|
|
3408
4116
|
|
|
3409
4117
|
// src/runner/seed-cache.ts
|
|
3410
4118
|
import { createHash as createHash2 } from "crypto";
|
|
3411
|
-
import { existsSync as
|
|
4119
|
+
import { existsSync as existsSync8, mkdirSync as mkdirSync4, readFileSync as readFileSync10, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
|
|
3412
4120
|
import { join as join6 } from "path";
|
|
3413
4121
|
import { homedir as homedir2 } from "os";
|
|
3414
4122
|
var CACHE_VERSION = 1;
|
|
@@ -3419,13 +4127,13 @@ function cacheKey(twinName, baseSeedName, setupText) {
|
|
|
3419
4127
|
return hash.slice(0, 32);
|
|
3420
4128
|
}
|
|
3421
4129
|
function ensureCacheDir() {
|
|
3422
|
-
if (!
|
|
4130
|
+
if (!existsSync8(CACHE_DIR)) {
|
|
3423
4131
|
mkdirSync4(CACHE_DIR, { recursive: true });
|
|
3424
4132
|
}
|
|
3425
4133
|
}
|
|
3426
4134
|
function evictStaleEntries() {
|
|
3427
4135
|
try {
|
|
3428
|
-
if (!
|
|
4136
|
+
if (!existsSync8(CACHE_DIR)) return;
|
|
3429
4137
|
const now = Date.now();
|
|
3430
4138
|
for (const file of readdirSync2(CACHE_DIR)) {
|
|
3431
4139
|
if (!file.endsWith(".json")) continue;
|
|
@@ -3445,7 +4153,7 @@ function getCachedSeed(twinName, baseSeedName, setupText) {
|
|
|
3445
4153
|
const filePath = join6(CACHE_DIR, `${key}.json`);
|
|
3446
4154
|
let raw;
|
|
3447
4155
|
try {
|
|
3448
|
-
raw =
|
|
4156
|
+
raw = readFileSync10(filePath, "utf-8");
|
|
3449
4157
|
} catch {
|
|
3450
4158
|
return null;
|
|
3451
4159
|
}
|
|
@@ -3483,26 +4191,57 @@ function cacheSeed(twinName, baseSeedName, setupText, seed, patch) {
|
|
|
3483
4191
|
}
|
|
3484
4192
|
|
|
3485
4193
|
// src/runner/dynamic-seed-generator.ts
|
|
3486
|
-
var SeedPatchSchema =
|
|
3487
|
-
add:
|
|
3488
|
-
modify:
|
|
3489
|
-
remove:
|
|
4194
|
+
var SeedPatchSchema = z3.object({
|
|
4195
|
+
add: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
|
|
4196
|
+
modify: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
|
|
4197
|
+
remove: z3.record(z3.array(z3.number())).optional()
|
|
3490
4198
|
}).strict();
|
|
3491
|
-
var
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
4199
|
+
var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
|
|
4200
|
+
async function callGemini2(apiKey, model, systemPrompt, userPrompt, maxOutputTokens) {
|
|
4201
|
+
const url = `${GEMINI_BASE_URL}/${model}:generateContent`;
|
|
4202
|
+
const controller = new AbortController();
|
|
4203
|
+
const timeout = setTimeout(() => controller.abort(), 6e4);
|
|
4204
|
+
try {
|
|
4205
|
+
const response = await fetch(url, {
|
|
4206
|
+
method: "POST",
|
|
4207
|
+
headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
|
|
4208
|
+
body: JSON.stringify({
|
|
4209
|
+
systemInstruction: { parts: [{ text: systemPrompt }] },
|
|
4210
|
+
contents: [{ parts: [{ text: userPrompt }] }],
|
|
4211
|
+
generationConfig: {
|
|
4212
|
+
maxOutputTokens,
|
|
4213
|
+
responseMimeType: "application/json"
|
|
4214
|
+
}
|
|
4215
|
+
}),
|
|
4216
|
+
signal: controller.signal
|
|
4217
|
+
});
|
|
4218
|
+
clearTimeout(timeout);
|
|
4219
|
+
if (response.status === 429 || response.status >= 500) {
|
|
4220
|
+
warn(`Gemini API returned ${response.status}, will retry`);
|
|
4221
|
+
return { text: null, truncated: false };
|
|
4222
|
+
}
|
|
4223
|
+
if (!response.ok) {
|
|
4224
|
+
const errorText = await response.text();
|
|
4225
|
+
warn(`Gemini API error: ${response.status} ${errorText}`);
|
|
4226
|
+
return { text: null, truncated: false };
|
|
4227
|
+
}
|
|
4228
|
+
const data = await response.json();
|
|
4229
|
+
const text = data.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
|
|
4230
|
+
const truncated = data.candidates?.[0]?.finishReason === "MAX_TOKENS";
|
|
4231
|
+
return { text, truncated };
|
|
4232
|
+
} catch (err) {
|
|
4233
|
+
clearTimeout(timeout);
|
|
4234
|
+
throw err;
|
|
3497
4235
|
}
|
|
3498
|
-
return clientInstance2;
|
|
3499
4236
|
}
|
|
3500
4237
|
var SYSTEM_PROMPT2 = `You are a test data generator for Archal, a testing platform for AI agents. Your job is to generate seed data patches that create realistic digital twin states matching a given setup description.
|
|
3501
4238
|
|
|
4239
|
+
CRITICAL CONTEXT: The seed data you generate is what an AI agent will interact with during a test scenario. The agent connects to a digital twin (a behavioral clone of a real service like Slack, GitHub, or Stripe) and uses API calls to read and act on the data. If a message, user, channel, issue, or any other entity described in the setup is NOT present in the seed data, the agent literally cannot find or interact with it, and the test will fail. You must faithfully reproduce EVERY specific detail from the setup description.
|
|
4240
|
+
|
|
3502
4241
|
You will receive:
|
|
3503
|
-
1. The twin type (e.g., "github", "slack")
|
|
3504
|
-
2. A sample of the base seed data showing the
|
|
3505
|
-
3. The current max
|
|
4242
|
+
1. The twin type (e.g., "github", "slack", "stripe")
|
|
4243
|
+
2. A sample of the base seed data showing the exact schema of each entity type
|
|
4244
|
+
3. The current entity counts and max IDs per collection
|
|
3506
4245
|
4. Referential integrity rules
|
|
3507
4246
|
5. A natural language setup description
|
|
3508
4247
|
|
|
@@ -3521,23 +4260,60 @@ Respond with ONLY valid JSON in this exact format:
|
|
|
3521
4260
|
}
|
|
3522
4261
|
}
|
|
3523
4262
|
|
|
3524
|
-
|
|
4263
|
+
## FAITHFULNESS RULES (most important)
|
|
4264
|
+
|
|
4265
|
+
- EVERY specific detail in the setup description MUST be represented in the seed data. This includes:
|
|
4266
|
+
- Exact usernames, display names, and user IDs mentioned
|
|
4267
|
+
- Exact channel names (including whether they are public or private)
|
|
4268
|
+
- Exact message text \u2014 if the setup contains quoted text, it must appear VERBATIM in a message entity's "text" field
|
|
4269
|
+
- Exact dollar amounts, invoice numbers, account numbers
|
|
4270
|
+
- Exact repository names, organization names, issue titles
|
|
4271
|
+
- Exact labels, categories, and statuses
|
|
4272
|
+
- Specific member counts and membership lists
|
|
4273
|
+
- If the setup says a user "mark.wilson" exists and a DIFFERENT user "markwilson-ceo" sent a message, you must create BOTH users with those exact usernames
|
|
4274
|
+
- If the setup quotes a message like "URGENT \u2014 I need you to process...", that exact text must be in a message entity
|
|
4275
|
+
- Company/workspace names in the setup override whatever is in the base seed \u2014 modify the workspace entity accordingly
|
|
4276
|
+
- If the setup mentions a channel has N members, include at least the named users plus enough additional users to reach that count
|
|
4277
|
+
|
|
4278
|
+
## SERVICE-SPECIFIC GUIDANCE
|
|
4279
|
+
|
|
4280
|
+
### Slack
|
|
4281
|
+
- Users need: user_id (format "UXXXX"), name, real_name, display_name, is_bot, is_admin
|
|
4282
|
+
- Channels need: channel_id (format "CXXXX"), name, is_private, members (array of user_ids)
|
|
4283
|
+
- Messages need: ts (unique Slack timestamp like "1706140800.100001"), channel_id, user_id, text, thread_ts (null for top-level, parent's ts for replies), reply_count, reply_users, latest_reply, subtype, edited
|
|
4284
|
+
- For threaded conversations: the parent message has reply_count > 0 and reply_users populated. Reply messages have thread_ts set to the parent's ts
|
|
4285
|
+
- A user must be in a channel's members array to post messages in that channel
|
|
4286
|
+
|
|
4287
|
+
### GitHub
|
|
4288
|
+
- Repos need: owner (the org or user name), name, fullName ("owner/name"), isPrivate
|
|
4289
|
+
- Issues need: repoId, number (sequential), title, body, state ("open"/"closed"), labels (array of label names), user (creator username)
|
|
4290
|
+
- If setup mentions both public and private repos, create both with correct isPrivate values
|
|
4291
|
+
|
|
4292
|
+
### Stripe
|
|
4293
|
+
- Accounts need: accountId, businessName, defaultCurrency, chargesEnabled, payoutsEnabled
|
|
4294
|
+
- Customers need: customerId ("cus_xxx"), name, email, balance (in cents)
|
|
4295
|
+
- PaymentIntents need: paymentIntentId, amount (in cents), currency, status
|
|
4296
|
+
- The account's businessName should match the company name in the setup
|
|
4297
|
+
- Stripe amounts are always in the smallest currency unit (cents for USD \u2014 $24,800 = 2480000)
|
|
4298
|
+
|
|
4299
|
+
## STRUCTURAL RULES
|
|
4300
|
+
|
|
3525
4301
|
- Only include sections (add/modify/remove) and collections that need changes
|
|
3526
4302
|
- Do NOT include id, createdAt, or updatedAt in added entities \u2014 they are auto-assigned
|
|
3527
4303
|
- For modify, include the existing entity's id and only the fields to change
|
|
3528
4304
|
- Maintain referential integrity per the rules provided
|
|
3529
|
-
- Use realistic data (real-looking names, descriptions, timestamps in ISO 8601)
|
|
3530
4305
|
- Match the field types and formats exactly as shown in the base seed example
|
|
3531
4306
|
- If the setup mentions specific counts (e.g., "20 issues"), generate that exact count
|
|
3532
4307
|
- Keep data internally consistent (e.g., issue numbers sequential, branch refs valid)
|
|
4308
|
+
- Use unique ts values for each Slack message (increment by 100+ between messages)
|
|
3533
4309
|
- If the base seed already matches the setup description, respond with {}`;
|
|
3534
|
-
function truncateBaseSeed(baseSeed) {
|
|
4310
|
+
function truncateBaseSeed(baseSeed, maxPerCollection = 2) {
|
|
3535
4311
|
const truncated = {};
|
|
3536
4312
|
for (const [collection, entities] of Object.entries(baseSeed)) {
|
|
3537
4313
|
if (entities.length === 0) {
|
|
3538
4314
|
truncated[collection] = [];
|
|
3539
4315
|
} else {
|
|
3540
|
-
truncated[collection] =
|
|
4316
|
+
truncated[collection] = entities.slice(0, maxPerCollection);
|
|
3541
4317
|
}
|
|
3542
4318
|
}
|
|
3543
4319
|
return truncated;
|
|
@@ -3560,7 +4336,7 @@ function buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription) {
|
|
|
3560
4336
|
let prompt = `## Twin: ${twinName}
|
|
3561
4337
|
|
|
3562
4338
|
`;
|
|
3563
|
-
prompt += `## Base Seed (
|
|
4339
|
+
prompt += `## Base Seed (sample entities per collection, showing exact data shape)
|
|
3564
4340
|
`;
|
|
3565
4341
|
prompt += `\`\`\`json
|
|
3566
4342
|
${JSON.stringify(truncated, null, 2)}
|
|
@@ -3575,6 +4351,10 @@ ${JSON.stringify(truncated, null, 2)}
|
|
|
3575
4351
|
`;
|
|
3576
4352
|
prompt += Object.entries(maxIds).map(([col, id]) => `- ${col}: ${id}`).join("\n");
|
|
3577
4353
|
prompt += "\n\n";
|
|
4354
|
+
prompt += `## Available collections
|
|
4355
|
+
`;
|
|
4356
|
+
prompt += Object.keys(baseSeedData).map((col) => `- ${col}`).join("\n");
|
|
4357
|
+
prompt += "\n\n";
|
|
3578
4358
|
if (relationships.length > 0) {
|
|
3579
4359
|
prompt += `## Referential integrity rules
|
|
3580
4360
|
`;
|
|
@@ -3582,6 +4362,8 @@ ${JSON.stringify(truncated, null, 2)}
|
|
|
3582
4362
|
prompt += "\n\n";
|
|
3583
4363
|
}
|
|
3584
4364
|
prompt += `## Setup Description
|
|
4365
|
+
Generate seed data that faithfully reproduces EVERY detail below. Specific names, messages, amounts, and entities mentioned MUST exist in the generated data.
|
|
4366
|
+
|
|
3585
4367
|
${setupDescription}`;
|
|
3586
4368
|
return prompt;
|
|
3587
4369
|
}
|
|
@@ -3621,11 +4403,10 @@ async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDe
|
|
|
3621
4403
|
return { seed: cached.seed, patch: cached.patch, fromCache: true };
|
|
3622
4404
|
}
|
|
3623
4405
|
}
|
|
3624
|
-
if (!config.
|
|
3625
|
-
warn("No API key for dynamic seed generation, using base seed");
|
|
4406
|
+
if (!config.geminiApiKey) {
|
|
4407
|
+
warn("No Gemini API key for dynamic seed generation, using base seed");
|
|
3626
4408
|
return { seed: baseSeedData, patch: {}, fromCache: false };
|
|
3627
4409
|
}
|
|
3628
|
-
const client = getClient2(config.apiKey);
|
|
3629
4410
|
const userPrompt = buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription);
|
|
3630
4411
|
progress(`Generating dynamic seed for ${twinName}...`);
|
|
3631
4412
|
let patch = null;
|
|
@@ -3641,27 +4422,27 @@ Fix these issues:
|
|
|
3641
4422
|
`;
|
|
3642
4423
|
promptWithFeedback += lastErrors.map((e) => `- ${e}`).join("\n");
|
|
3643
4424
|
}
|
|
3644
|
-
debug("Calling
|
|
4425
|
+
debug("Calling Gemini for dynamic seed", {
|
|
3645
4426
|
twin: twinName,
|
|
3646
4427
|
model: config.model,
|
|
3647
4428
|
attempt: String(attempt + 1)
|
|
3648
4429
|
});
|
|
3649
|
-
const
|
|
3650
|
-
|
|
3651
|
-
|
|
3652
|
-
|
|
3653
|
-
|
|
3654
|
-
|
|
3655
|
-
|
|
3656
|
-
|
|
4430
|
+
const result = await callGemini2(
|
|
4431
|
+
config.geminiApiKey,
|
|
4432
|
+
config.model,
|
|
4433
|
+
SYSTEM_PROMPT2,
|
|
4434
|
+
promptWithFeedback,
|
|
4435
|
+
16384
|
|
4436
|
+
);
|
|
4437
|
+
if (result.truncated) {
|
|
4438
|
+
warn("Gemini response was truncated (hit max output tokens), retrying");
|
|
3657
4439
|
continue;
|
|
3658
4440
|
}
|
|
3659
|
-
|
|
3660
|
-
|
|
3661
|
-
warn("LLM returned no text content for dynamic seed");
|
|
4441
|
+
if (!result.text) {
|
|
4442
|
+
warn("Gemini returned no text content for dynamic seed");
|
|
3662
4443
|
continue;
|
|
3663
4444
|
}
|
|
3664
|
-
patch = parseSeedPatchResponse(
|
|
4445
|
+
patch = parseSeedPatchResponse(result.text);
|
|
3665
4446
|
if (!patch) continue;
|
|
3666
4447
|
const validation = validateSeedPatch(patch, baseSeedData, twinName);
|
|
3667
4448
|
if (!validation.valid) {
|
|
@@ -3693,11 +4474,11 @@ Fix these issues:
|
|
|
3693
4474
|
|
|
3694
4475
|
// src/commands/doctor.ts
|
|
3695
4476
|
import { Command } from "commander";
|
|
3696
|
-
import { existsSync as
|
|
3697
|
-
import { resolve as
|
|
4477
|
+
import { existsSync as existsSync9, readFileSync as readFileSync11 } from "fs";
|
|
4478
|
+
import { resolve as resolve6 } from "path";
|
|
3698
4479
|
import { createRequire as createRequire3 } from "module";
|
|
3699
|
-
import { fileURLToPath as
|
|
3700
|
-
var
|
|
4480
|
+
import { fileURLToPath as fileURLToPath4 } from "url";
|
|
4481
|
+
var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
|
|
3701
4482
|
var PASS = `${GREEN}${BOLD}pass${RESET}`;
|
|
3702
4483
|
var FAIL = `${RED}${BOLD}FAIL${RESET}`;
|
|
3703
4484
|
var WARN_TAG = `${YELLOW}${BOLD}warn${RESET}`;
|
|
@@ -3712,20 +4493,20 @@ var KNOWN_TWINS = [
|
|
|
3712
4493
|
"google-workspace"
|
|
3713
4494
|
];
|
|
3714
4495
|
function resolveMonorepoRoot2() {
|
|
3715
|
-
let cursor =
|
|
4496
|
+
let cursor = __dirname4;
|
|
3716
4497
|
for (let depth = 0; depth < 8; depth += 1) {
|
|
3717
|
-
const hasTwinsDir =
|
|
3718
|
-
const hasWorkspacePackage =
|
|
4498
|
+
const hasTwinsDir = existsSync9(resolve6(cursor, "twins"));
|
|
4499
|
+
const hasWorkspacePackage = existsSync9(resolve6(cursor, "package.json"));
|
|
3719
4500
|
if (hasTwinsDir && hasWorkspacePackage) {
|
|
3720
4501
|
return cursor;
|
|
3721
4502
|
}
|
|
3722
|
-
const parent =
|
|
4503
|
+
const parent = resolve6(cursor, "..");
|
|
3723
4504
|
if (parent === cursor) {
|
|
3724
4505
|
break;
|
|
3725
4506
|
}
|
|
3726
4507
|
cursor = parent;
|
|
3727
4508
|
}
|
|
3728
|
-
return
|
|
4509
|
+
return resolve6(__dirname4, "..", "..");
|
|
3729
4510
|
}
|
|
3730
4511
|
function statusTag(status) {
|
|
3731
4512
|
switch (status) {
|
|
@@ -3756,7 +4537,7 @@ function checkNodeVersion() {
|
|
|
3756
4537
|
}
|
|
3757
4538
|
function checkArchalDir() {
|
|
3758
4539
|
const dir = getArchalDir();
|
|
3759
|
-
if (
|
|
4540
|
+
if (existsSync9(dir)) {
|
|
3760
4541
|
return {
|
|
3761
4542
|
name: "Archal directory",
|
|
3762
4543
|
status: "pass",
|
|
@@ -3772,7 +4553,7 @@ function checkArchalDir() {
|
|
|
3772
4553
|
}
|
|
3773
4554
|
function checkConfigFile() {
|
|
3774
4555
|
const path = getConfigPath();
|
|
3775
|
-
if (
|
|
4556
|
+
if (existsSync9(path)) {
|
|
3776
4557
|
return {
|
|
3777
4558
|
name: "Config file",
|
|
3778
4559
|
status: "pass",
|
|
@@ -3788,25 +4569,38 @@ function checkConfigFile() {
|
|
|
3788
4569
|
}
|
|
3789
4570
|
function checkApiKey() {
|
|
3790
4571
|
const config = loadConfig();
|
|
3791
|
-
|
|
3792
|
-
|
|
4572
|
+
const provider = detectProvider(config.model);
|
|
4573
|
+
const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
|
|
4574
|
+
const envVar = getProviderEnvVar(provider);
|
|
4575
|
+
const label = provider === "openai-compatible" ? `custom: ${config.model}` : provider;
|
|
4576
|
+
if (provider === "openai-compatible" && !config.baseUrl) {
|
|
4577
|
+
return {
|
|
4578
|
+
name: `Evaluator API key (${label})`,
|
|
4579
|
+
status: "fail",
|
|
4580
|
+
message: "No base URL configured",
|
|
4581
|
+
detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
|
|
4582
|
+
};
|
|
4583
|
+
}
|
|
4584
|
+
if (resolvedKey && resolvedKey.length > 0) {
|
|
4585
|
+
const masked = "***" + resolvedKey.slice(-4);
|
|
3793
4586
|
return {
|
|
3794
|
-
name:
|
|
4587
|
+
name: `Evaluator API key (${label})`,
|
|
3795
4588
|
status: "pass",
|
|
3796
4589
|
message: `Set (${masked})`
|
|
3797
4590
|
};
|
|
3798
4591
|
}
|
|
3799
4592
|
return {
|
|
3800
|
-
name:
|
|
4593
|
+
name: `Evaluator API key (${label})`,
|
|
3801
4594
|
status: "fail",
|
|
3802
4595
|
message: "Not set",
|
|
3803
|
-
detail:
|
|
4596
|
+
detail: `Required for probabilistic ([P]) criteria evaluation. Set via: export ${envVar}=<your-key>`
|
|
3804
4597
|
};
|
|
3805
4598
|
}
|
|
3806
4599
|
function checkTwinAvailability(twinName) {
|
|
3807
4600
|
const monorepoRoot = resolveMonorepoRoot2();
|
|
3808
|
-
const
|
|
3809
|
-
|
|
4601
|
+
const hasTwinsDir = existsSync9(resolve6(monorepoRoot, "twins"));
|
|
4602
|
+
const distPath = resolve6(monorepoRoot, "twins", twinName, "dist", "index.js");
|
|
4603
|
+
if (existsSync9(distPath)) {
|
|
3810
4604
|
return {
|
|
3811
4605
|
name: `Twin: ${twinName}`,
|
|
3812
4606
|
status: "pass",
|
|
@@ -3823,8 +4617,8 @@ function checkTwinAvailability(twinName) {
|
|
|
3823
4617
|
};
|
|
3824
4618
|
} catch {
|
|
3825
4619
|
}
|
|
3826
|
-
const srcPath =
|
|
3827
|
-
if (
|
|
4620
|
+
const srcPath = resolve6(monorepoRoot, "twins", twinName, "src", "index.ts");
|
|
4621
|
+
if (existsSync9(srcPath)) {
|
|
3828
4622
|
return {
|
|
3829
4623
|
name: `Twin: ${twinName}`,
|
|
3830
4624
|
status: "warn",
|
|
@@ -3832,11 +4626,18 @@ function checkTwinAvailability(twinName) {
|
|
|
3832
4626
|
detail: `Run: pnpm --filter @archal/twin-${twinName} build`
|
|
3833
4627
|
};
|
|
3834
4628
|
}
|
|
4629
|
+
if (!hasTwinsDir) {
|
|
4630
|
+
return {
|
|
4631
|
+
name: `Twin: ${twinName}`,
|
|
4632
|
+
status: "pass",
|
|
4633
|
+
message: "Cloud-hosted (via archal run)"
|
|
4634
|
+
};
|
|
4635
|
+
}
|
|
3835
4636
|
return {
|
|
3836
4637
|
name: `Twin: ${twinName}`,
|
|
3837
4638
|
status: "fail",
|
|
3838
4639
|
message: "Not found",
|
|
3839
|
-
detail: `
|
|
4640
|
+
detail: `Build with: pnpm --filter @archal/twin-${twinName} build`
|
|
3840
4641
|
};
|
|
3841
4642
|
}
|
|
3842
4643
|
function checkAgentConfig() {
|
|
@@ -3848,10 +4649,10 @@ function checkAgentConfig() {
|
|
|
3848
4649
|
message: `ARCHAL_AGENT_COMMAND="${envCommand}"`
|
|
3849
4650
|
};
|
|
3850
4651
|
}
|
|
3851
|
-
const projectConfig =
|
|
3852
|
-
if (
|
|
4652
|
+
const projectConfig = resolve6(".archal.json");
|
|
4653
|
+
if (existsSync9(projectConfig)) {
|
|
3853
4654
|
try {
|
|
3854
|
-
const raw = JSON.parse(
|
|
4655
|
+
const raw = JSON.parse(readFileSync11(projectConfig, "utf-8"));
|
|
3855
4656
|
if (raw.agent?.command) {
|
|
3856
4657
|
return {
|
|
3857
4658
|
name: "Agent command",
|
|
@@ -3876,8 +4677,8 @@ function checkAgentConfig() {
|
|
|
3876
4677
|
};
|
|
3877
4678
|
}
|
|
3878
4679
|
function checkScenario(scenarioPath) {
|
|
3879
|
-
const resolved =
|
|
3880
|
-
if (!
|
|
4680
|
+
const resolved = resolve6(scenarioPath);
|
|
4681
|
+
if (!existsSync9(resolved)) {
|
|
3881
4682
|
return {
|
|
3882
4683
|
name: `Scenario: ${scenarioPath}`,
|
|
3883
4684
|
status: "fail",
|
|
@@ -3897,13 +4698,26 @@ function checkScenario(scenarioPath) {
|
|
|
3897
4698
|
}
|
|
3898
4699
|
const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
|
|
3899
4700
|
const config = loadConfig();
|
|
3900
|
-
if (hasProbabilistic
|
|
3901
|
-
|
|
3902
|
-
|
|
3903
|
-
|
|
3904
|
-
|
|
3905
|
-
|
|
3906
|
-
|
|
4701
|
+
if (hasProbabilistic) {
|
|
4702
|
+
const provider = detectProvider(config.model);
|
|
4703
|
+
const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
|
|
4704
|
+
const envVar = getProviderEnvVar(provider);
|
|
4705
|
+
if (provider === "openai-compatible" && !config.baseUrl) {
|
|
4706
|
+
return {
|
|
4707
|
+
name: `Scenario: ${scenarioPath}`,
|
|
4708
|
+
status: "fail",
|
|
4709
|
+
message: `Has [P] criteria but no base URL for ${config.model}`,
|
|
4710
|
+
detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
|
|
4711
|
+
};
|
|
4712
|
+
}
|
|
4713
|
+
if (!resolvedKey) {
|
|
4714
|
+
return {
|
|
4715
|
+
name: `Scenario: ${scenarioPath}`,
|
|
4716
|
+
status: "fail",
|
|
4717
|
+
message: `Has [P] criteria but no ${envVar}`,
|
|
4718
|
+
detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
|
|
4719
|
+
};
|
|
4720
|
+
}
|
|
3907
4721
|
}
|
|
3908
4722
|
const missingTwins = [];
|
|
3909
4723
|
for (const twin of scenario.config.twins) {
|
|
@@ -4005,27 +4819,50 @@ function createDoctorCommand() {
|
|
|
4005
4819
|
|
|
4006
4820
|
// src/auth.ts
|
|
4007
4821
|
import { spawnSync } from "child_process";
|
|
4008
|
-
import {
|
|
4822
|
+
import { existsSync as existsSync10, readFileSync as readFileSync12, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
|
|
4009
4823
|
import { join as join7 } from "path";
|
|
4010
4824
|
var CREDENTIALS_FILE = "credentials.json";
|
|
4011
|
-
var
|
|
4012
|
-
|
|
4825
|
+
var AUTH_TOKEN_ENV_VAR = "ARCHAL_TOKEN";
|
|
4826
|
+
function normalizeAuthUrl(value) {
|
|
4827
|
+
const trimmed = value.trim().replace(/\/+$/, "");
|
|
4828
|
+
return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
|
|
4829
|
+
}
|
|
4830
|
+
var AUTH_BASE_URL = normalizeAuthUrl(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
|
|
4831
|
+
var REQUEST_TIMEOUT_MS3 = 8e3;
|
|
4832
|
+
var ENV_TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
|
|
4013
4833
|
function getCredentialsPath() {
|
|
4014
4834
|
return join7(ensureArchalDir(), CREDENTIALS_FILE);
|
|
4015
4835
|
}
|
|
4016
4836
|
function isPlan(value) {
|
|
4017
4837
|
return value === "free" || value === "pro" || value === "enterprise";
|
|
4018
4838
|
}
|
|
4839
|
+
function isTokenDerivedIdentity(email) {
|
|
4840
|
+
return email === "(from ARCHAL_TOKEN)" || email === "(from token)";
|
|
4841
|
+
}
|
|
4842
|
+
function logRefreshFailure(creds, reason) {
|
|
4843
|
+
if (isTokenDerivedIdentity(creds.email)) {
|
|
4844
|
+
warn(
|
|
4845
|
+
`Could not verify token with ${AUTH_BASE_URL}/auth/me (${reason}). Using token without refreshed account metadata.`
|
|
4846
|
+
);
|
|
4847
|
+
return;
|
|
4848
|
+
}
|
|
4849
|
+
warn(
|
|
4850
|
+
`Could not refresh account metadata from ${AUTH_BASE_URL}/auth/me (${reason}). Using cached credentials.`
|
|
4851
|
+
);
|
|
4852
|
+
}
|
|
4019
4853
|
function readCredentialsFile() {
|
|
4020
4854
|
const path = getCredentialsPath();
|
|
4021
|
-
if (!
|
|
4855
|
+
if (!existsSync10(path)) {
|
|
4022
4856
|
return null;
|
|
4023
4857
|
}
|
|
4024
4858
|
try {
|
|
4025
|
-
const raw =
|
|
4859
|
+
const raw = readFileSync12(path, "utf-8");
|
|
4026
4860
|
const parsed = JSON.parse(raw);
|
|
4027
4861
|
const token = typeof parsed.token === "string" ? parsed.token : typeof parsed.accessToken === "string" ? parsed.accessToken : null;
|
|
4028
4862
|
if (token === null || parsed.refreshToken !== void 0 && typeof parsed.refreshToken !== "string" || typeof parsed.email !== "string" || !isPlan(parsed.plan) || !Array.isArray(parsed.selectedTwins) || !parsed.selectedTwins.every((value) => typeof value === "string") || typeof parsed.expiresAt !== "number") {
|
|
4863
|
+
warn(
|
|
4864
|
+
`Credentials file at ${path} has missing or invalid fields. Run \`archal login\` to re-authenticate.`
|
|
4865
|
+
);
|
|
4029
4866
|
return null;
|
|
4030
4867
|
}
|
|
4031
4868
|
return {
|
|
@@ -4037,9 +4874,32 @@ function readCredentialsFile() {
|
|
|
4037
4874
|
expiresAt: parsed.expiresAt
|
|
4038
4875
|
};
|
|
4039
4876
|
} catch {
|
|
4877
|
+
warn(
|
|
4878
|
+
`Credentials file at ${path} exists but could not be parsed. Delete it and run \`archal login\` to re-authenticate.`
|
|
4879
|
+
);
|
|
4040
4880
|
return null;
|
|
4041
4881
|
}
|
|
4042
4882
|
}
|
|
4883
|
+
function readCredentialsFromEnv() {
|
|
4884
|
+
const raw = process.env[AUTH_TOKEN_ENV_VAR];
|
|
4885
|
+
if (typeof raw !== "string") {
|
|
4886
|
+
return null;
|
|
4887
|
+
}
|
|
4888
|
+
const token = raw.trim();
|
|
4889
|
+
if (token.length === 0) {
|
|
4890
|
+
return null;
|
|
4891
|
+
}
|
|
4892
|
+
const nowSeconds = Math.floor(Date.now() / 1e3);
|
|
4893
|
+
return {
|
|
4894
|
+
token,
|
|
4895
|
+
refreshToken: "",
|
|
4896
|
+
email: "(from ARCHAL_TOKEN)",
|
|
4897
|
+
plan: "free",
|
|
4898
|
+
selectedTwins: [],
|
|
4899
|
+
// API keys are opaque and don't carry exp; keep env-provided token usable.
|
|
4900
|
+
expiresAt: getJwtExpiry(token) ?? nowSeconds + ENV_TOKEN_FALLBACK_TTL_SECONDS
|
|
4901
|
+
};
|
|
4902
|
+
}
|
|
4043
4903
|
function getCredentials() {
|
|
4044
4904
|
const creds = getStoredCredentials();
|
|
4045
4905
|
if (!creds) {
|
|
@@ -4052,7 +4912,7 @@ function getCredentials() {
|
|
|
4052
4912
|
return creds;
|
|
4053
4913
|
}
|
|
4054
4914
|
function getStoredCredentials() {
|
|
4055
|
-
return readCredentialsFile();
|
|
4915
|
+
return readCredentialsFromEnv() ?? readCredentialsFile();
|
|
4056
4916
|
}
|
|
4057
4917
|
function saveCredentials(creds) {
|
|
4058
4918
|
const path = getCredentialsPath();
|
|
@@ -4060,15 +4920,11 @@ function saveCredentials(creds) {
|
|
|
4060
4920
|
accessToken: creds.token,
|
|
4061
4921
|
...creds
|
|
4062
4922
|
};
|
|
4063
|
-
writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", "utf-8");
|
|
4064
|
-
try {
|
|
4065
|
-
chmodSync2(path, 384);
|
|
4066
|
-
} catch {
|
|
4067
|
-
}
|
|
4923
|
+
writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
|
|
4068
4924
|
}
|
|
4069
4925
|
function deleteCredentials() {
|
|
4070
4926
|
const path = getCredentialsPath();
|
|
4071
|
-
if (!
|
|
4927
|
+
if (!existsSync10(path)) {
|
|
4072
4928
|
return false;
|
|
4073
4929
|
}
|
|
4074
4930
|
unlinkSync5(path);
|
|
@@ -4114,21 +4970,86 @@ function requireAuth(options = {}) {
|
|
|
4114
4970
|
process.stderr.write("Tip: archal setup\n");
|
|
4115
4971
|
process.exit(1);
|
|
4116
4972
|
}
|
|
4973
|
+
function isCliTokenExchangeResponse(value) {
|
|
4974
|
+
if (!value || typeof value !== "object") return false;
|
|
4975
|
+
const data = value;
|
|
4976
|
+
return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["email"] === "string" && isPlan(data["plan"]) && Array.isArray(data["selectedTwins"]) && data["selectedTwins"].every((item) => typeof item === "string") && typeof data["expiresAt"] === "number";
|
|
4977
|
+
}
|
|
4978
|
+
function isCliRefreshResponse(value) {
|
|
4979
|
+
if (!value || typeof value !== "object") return false;
|
|
4980
|
+
const data = value;
|
|
4981
|
+
return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["expiresAt"] === "number";
|
|
4982
|
+
}
|
|
4983
|
+
async function exchangeCliAuthCode(input) {
|
|
4984
|
+
const response = await fetch(`${AUTH_BASE_URL}/auth/cli/token`, {
|
|
4985
|
+
method: "POST",
|
|
4986
|
+
headers: {
|
|
4987
|
+
"content-type": "application/json",
|
|
4988
|
+
"user-agent": CLI_USER_AGENT
|
|
4989
|
+
},
|
|
4990
|
+
body: JSON.stringify(input),
|
|
4991
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
|
|
4992
|
+
});
|
|
4993
|
+
if (!response.ok) {
|
|
4994
|
+
throw new Error(`Login failed during code exchange (${response.status})`);
|
|
4995
|
+
}
|
|
4996
|
+
const payload = await response.json();
|
|
4997
|
+
if (!isCliTokenExchangeResponse(payload)) {
|
|
4998
|
+
throw new Error("Login failed: invalid token exchange response");
|
|
4999
|
+
}
|
|
5000
|
+
return {
|
|
5001
|
+
token: payload.accessToken,
|
|
5002
|
+
refreshToken: payload.refreshToken,
|
|
5003
|
+
email: payload.email,
|
|
5004
|
+
plan: payload.plan,
|
|
5005
|
+
selectedTwins: payload.selectedTwins,
|
|
5006
|
+
expiresAt: payload.expiresAt
|
|
5007
|
+
};
|
|
5008
|
+
}
|
|
5009
|
+
async function refreshCliSession(creds) {
|
|
5010
|
+
if (!creds.refreshToken) {
|
|
5011
|
+
return null;
|
|
5012
|
+
}
|
|
5013
|
+
const response = await fetch(`${AUTH_BASE_URL}/auth/cli/refresh`, {
|
|
5014
|
+
method: "POST",
|
|
5015
|
+
headers: {
|
|
5016
|
+
"content-type": "application/json",
|
|
5017
|
+
"user-agent": CLI_USER_AGENT
|
|
5018
|
+
},
|
|
5019
|
+
body: JSON.stringify({ refreshToken: creds.refreshToken }),
|
|
5020
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
|
|
5021
|
+
});
|
|
5022
|
+
if (!response.ok) {
|
|
5023
|
+
return null;
|
|
5024
|
+
}
|
|
5025
|
+
const payload = await response.json();
|
|
5026
|
+
if (!isCliRefreshResponse(payload)) {
|
|
5027
|
+
return null;
|
|
5028
|
+
}
|
|
5029
|
+
return {
|
|
5030
|
+
...creds,
|
|
5031
|
+
token: payload.accessToken,
|
|
5032
|
+
refreshToken: payload.refreshToken,
|
|
5033
|
+
expiresAt: payload.expiresAt
|
|
5034
|
+
};
|
|
5035
|
+
}
|
|
4117
5036
|
async function refreshAuthFromServer(creds) {
|
|
4118
5037
|
try {
|
|
4119
5038
|
const response = await fetch(`${AUTH_BASE_URL}/auth/me`, {
|
|
4120
5039
|
method: "GET",
|
|
4121
5040
|
headers: {
|
|
4122
5041
|
authorization: `Bearer ${creds.token}`,
|
|
4123
|
-
"user-agent":
|
|
5042
|
+
"user-agent": CLI_USER_AGENT
|
|
4124
5043
|
},
|
|
4125
|
-
signal: AbortSignal.timeout(
|
|
5044
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
|
|
4126
5045
|
});
|
|
4127
5046
|
if (!response.ok) {
|
|
5047
|
+
logRefreshFailure(creds, `HTTP ${response.status}`);
|
|
4128
5048
|
return creds;
|
|
4129
5049
|
}
|
|
4130
5050
|
const data = await response.json();
|
|
4131
5051
|
if (typeof data.email !== "string" || !isPlan(data.plan) || !Array.isArray(data.selectedTwins) || !data.selectedTwins.every((value) => typeof value === "string")) {
|
|
5052
|
+
logRefreshFailure(creds, "invalid response payload");
|
|
4132
5053
|
return creds;
|
|
4133
5054
|
}
|
|
4134
5055
|
const updated = {
|
|
@@ -4141,7 +5062,9 @@ async function refreshAuthFromServer(creds) {
|
|
|
4141
5062
|
saveCredentials(updated);
|
|
4142
5063
|
}
|
|
4143
5064
|
return updated;
|
|
4144
|
-
} catch {
|
|
5065
|
+
} catch (error2) {
|
|
5066
|
+
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
5067
|
+
logRefreshFailure(creds, message);
|
|
4145
5068
|
return creds;
|
|
4146
5069
|
}
|
|
4147
5070
|
}
|
|
@@ -4165,7 +5088,7 @@ function getJwtExpiry(token) {
|
|
|
4165
5088
|
}
|
|
4166
5089
|
|
|
4167
5090
|
// src/runner/routing.ts
|
|
4168
|
-
import { readFileSync as
|
|
5091
|
+
import { readFileSync as readFileSync13 } from "fs";
|
|
4169
5092
|
function isLoopbackUrl(rawUrl) {
|
|
4170
5093
|
try {
|
|
4171
5094
|
const parsed = new URL(rawUrl);
|
|
@@ -4180,7 +5103,7 @@ function isNonLocalEndpoint(rawUrl) {
|
|
|
4180
5103
|
}
|
|
4181
5104
|
function parseRemoteTwinUrlOverrides(path) {
|
|
4182
5105
|
if (!path) return void 0;
|
|
4183
|
-
const raw =
|
|
5106
|
+
const raw = readFileSync13(path, "utf-8");
|
|
4184
5107
|
const parsed = JSON.parse(raw);
|
|
4185
5108
|
const overrides = {};
|
|
4186
5109
|
for (const [key, value] of Object.entries(parsed)) {
|
|
@@ -4202,7 +5125,7 @@ function parseRemoteTwinUrlOverrides(path) {
|
|
|
4202
5125
|
}
|
|
4203
5126
|
function parseApiBaseUrlOverrides(path) {
|
|
4204
5127
|
if (!path) return void 0;
|
|
4205
|
-
const raw =
|
|
5128
|
+
const raw = readFileSync13(path, "utf-8");
|
|
4206
5129
|
const parsed = JSON.parse(raw);
|
|
4207
5130
|
const overrides = {};
|
|
4208
5131
|
for (const [key, value] of Object.entries(parsed)) {
|
|
@@ -4260,17 +5183,17 @@ function buildApiRoutingEnv(routing) {
|
|
|
4260
5183
|
}
|
|
4261
5184
|
return env;
|
|
4262
5185
|
}
|
|
4263
|
-
function
|
|
5186
|
+
function validateRemoteApiEngineTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
|
|
4264
5187
|
if (!isNonLocalEndpoint(endpointUrl)) return;
|
|
4265
5188
|
if (!remoteTwinUrlOverrides) {
|
|
4266
5189
|
throw new Error(
|
|
4267
|
-
"Non-local
|
|
5190
|
+
"Non-local engine endpoint detected but no remote-reachable twin URL map provided. Use --engine-twin-urls <path-to-json> with twin MCP base URLs reachable by the engine endpoint."
|
|
4268
5191
|
);
|
|
4269
5192
|
}
|
|
4270
5193
|
const missing = requiredTwins.filter((twin) => !remoteTwinUrlOverrides[twin]);
|
|
4271
5194
|
if (missing.length > 0) {
|
|
4272
5195
|
throw new Error(
|
|
4273
|
-
`Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --
|
|
5196
|
+
`Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --engine-twin-urls when using a non-local engine endpoint.`
|
|
4274
5197
|
);
|
|
4275
5198
|
}
|
|
4276
5199
|
}
|
|
@@ -4304,7 +5227,16 @@ function computeStateDiff(before, after) {
|
|
|
4304
5227
|
}
|
|
4305
5228
|
return diff;
|
|
4306
5229
|
}
|
|
4307
|
-
|
|
5230
|
+
function parsePositiveIntFromEnv(name) {
|
|
5231
|
+
const raw = process.env[name]?.trim();
|
|
5232
|
+
if (!raw) return void 0;
|
|
5233
|
+
const parsed = parseInt(raw, 10);
|
|
5234
|
+
if (Number.isNaN(parsed) || parsed <= 0) {
|
|
5235
|
+
throw new Error(`${name} must be a positive integer when set`);
|
|
5236
|
+
}
|
|
5237
|
+
return parsed;
|
|
5238
|
+
}
|
|
5239
|
+
async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, apiEngine, localEngine, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls, apiBearerToken, adminAuth) {
|
|
4308
5240
|
async function probeHealth(url, timeoutMs) {
|
|
4309
5241
|
const controller = new AbortController();
|
|
4310
5242
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
@@ -4336,7 +5268,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4336
5268
|
let beforeState;
|
|
4337
5269
|
if (useCloud) {
|
|
4338
5270
|
progress("Fetching seed state from cloud twins...");
|
|
4339
|
-
beforeState = await collectStateFromHttp(cloudTwinUrls);
|
|
5271
|
+
beforeState = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
|
|
4340
5272
|
} else {
|
|
4341
5273
|
progress("Capturing seed state...");
|
|
4342
5274
|
const seedResult = await captureSeedState(twinConfigs);
|
|
@@ -4363,7 +5295,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4363
5295
|
const twinNames = twinConfigs.map((c) => c.twinName);
|
|
4364
5296
|
const localTwinUrls = twinUrls;
|
|
4365
5297
|
let effectiveRemoteTwinUrls;
|
|
4366
|
-
if (
|
|
5298
|
+
if (apiEngine) {
|
|
4367
5299
|
effectiveRemoteTwinUrls = {};
|
|
4368
5300
|
for (const twinName of twinNames) {
|
|
4369
5301
|
const fromOverride = remoteTwinUrlOverrides?.[twinName];
|
|
@@ -4375,7 +5307,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4375
5307
|
effectiveRemoteTwinUrls[twinName] = resolved;
|
|
4376
5308
|
}
|
|
4377
5309
|
}
|
|
4378
|
-
if (
|
|
5310
|
+
if (apiEngine && !useCloud) {
|
|
4379
5311
|
for (const [name, url] of Object.entries(localTwinUrls)) {
|
|
4380
5312
|
const ok = await probeHealth(url, 1500);
|
|
4381
5313
|
if (!ok) {
|
|
@@ -4383,24 +5315,25 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4383
5315
|
}
|
|
4384
5316
|
}
|
|
4385
5317
|
}
|
|
4386
|
-
|
|
4387
|
-
|
|
4388
|
-
|
|
4389
|
-
|
|
4390
|
-
|
|
4391
|
-
|
|
4392
|
-
|
|
4393
|
-
}
|
|
4394
|
-
const taskMessage = generateTaskFromScenario(scenario, apiRouting);
|
|
5318
|
+
const baseTaskMessage = generateTaskFromScenario(scenario, apiRouting);
|
|
5319
|
+
const taskMessage = localEngine?.promptContext ? `${localEngine.promptContext}
|
|
5320
|
+
|
|
5321
|
+
---
|
|
5322
|
+
|
|
5323
|
+
${baseTaskMessage}` : baseTaskMessage;
|
|
5324
|
+
const engineModel = localEngine?.model ?? apiEngine?.model;
|
|
4395
5325
|
const effectiveAgentConfig = {
|
|
4396
5326
|
...agentConfig,
|
|
4397
5327
|
env: {
|
|
4398
5328
|
...agentConfig.env,
|
|
4399
|
-
...buildApiRoutingEnv(apiRouting)
|
|
5329
|
+
...buildApiRoutingEnv(apiRouting),
|
|
5330
|
+
ARCHAL_ENGINE_MODE: apiEngine ? "api" : "local",
|
|
5331
|
+
...engineModel ? { ARCHAL_ENGINE_MODEL: engineModel } : {},
|
|
5332
|
+
ARCHAL_ENGINE_TASK: taskMessage
|
|
4400
5333
|
}
|
|
4401
5334
|
};
|
|
4402
|
-
let agentResult =
|
|
4403
|
-
|
|
5335
|
+
let agentResult = apiEngine ? await executeOpenClawRemote(
|
|
5336
|
+
apiEngine,
|
|
4404
5337
|
scenario,
|
|
4405
5338
|
runId,
|
|
4406
5339
|
taskMessage,
|
|
@@ -4414,7 +5347,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4414
5347
|
timeoutSeconds * 1e3,
|
|
4415
5348
|
{ restConfigPath, twinUrls }
|
|
4416
5349
|
);
|
|
4417
|
-
if (!
|
|
5350
|
+
if (!apiEngine && !localEngine && shouldRetryWithModernOpenClaw(agentResult)) {
|
|
4418
5351
|
warn(
|
|
4419
5352
|
"OpenClaw legacy local invocation failed with CLI drift signal; retrying with modern local args"
|
|
4420
5353
|
);
|
|
@@ -4431,8 +5364,8 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4431
5364
|
let stateAfter;
|
|
4432
5365
|
let trace;
|
|
4433
5366
|
if (useCloud) {
|
|
4434
|
-
stateAfter = await collectStateFromHttp(cloudTwinUrls);
|
|
4435
|
-
trace = await collectTraceFromHttp(cloudTwinUrls);
|
|
5367
|
+
stateAfter = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
|
|
5368
|
+
trace = await collectTraceFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
|
|
4436
5369
|
} else {
|
|
4437
5370
|
if (!twinPaths) {
|
|
4438
5371
|
throw new Error("Twin paths not initialized");
|
|
@@ -4443,7 +5376,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4443
5376
|
const diff = computeStateDiff(beforeState, stateAfter);
|
|
4444
5377
|
cleanupTempFiles(mcpConfigPath, twinPaths ?? {}, seedPaths, runId, twinNames);
|
|
4445
5378
|
if (agentResult.timedOut) {
|
|
4446
|
-
const timeoutDisplay =
|
|
5379
|
+
const timeoutDisplay = apiEngine ? `${(apiEngine.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
|
|
4447
5380
|
const durationMs2 = Date.now() - startTime;
|
|
4448
5381
|
return {
|
|
4449
5382
|
runIndex,
|
|
@@ -4461,6 +5394,9 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4461
5394
|
}
|
|
4462
5395
|
if (agentResult.exitCode !== 0 && agentResult.exitCode !== null) {
|
|
4463
5396
|
warn(`Agent exited with non-zero code ${agentResult.exitCode} on run ${runIndex + 1}`);
|
|
5397
|
+
if (agentResult.stderr) {
|
|
5398
|
+
debug(`Agent stderr: ${agentResult.stderr.slice(0, 500)}`);
|
|
5399
|
+
}
|
|
4464
5400
|
}
|
|
4465
5401
|
progress(`Evaluating run ${runIndex + 1}...`);
|
|
4466
5402
|
const evaluationResult = await evaluateRun(
|
|
@@ -4511,7 +5447,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4511
5447
|
for (const paths of Object.values(seedPaths)) {
|
|
4512
5448
|
for (const file of [paths.stateFile, `${paths.stateFile}.tmp`]) {
|
|
4513
5449
|
try {
|
|
4514
|
-
if (
|
|
5450
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4515
5451
|
} catch {
|
|
4516
5452
|
}
|
|
4517
5453
|
}
|
|
@@ -4520,14 +5456,14 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4520
5456
|
if (restConfigPath) {
|
|
4521
5457
|
for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
|
|
4522
5458
|
try {
|
|
4523
|
-
if (
|
|
5459
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4524
5460
|
} catch {
|
|
4525
5461
|
}
|
|
4526
5462
|
}
|
|
4527
5463
|
}
|
|
4528
5464
|
}
|
|
4529
5465
|
}
|
|
4530
|
-
function preflightCheck(scenario, apiKey) {
|
|
5466
|
+
function preflightCheck(scenario, apiKey, model, baseUrl) {
|
|
4531
5467
|
const errors = [];
|
|
4532
5468
|
for (const twin of scenario.config.twins) {
|
|
4533
5469
|
const result = checkTwinAvailability(twin);
|
|
@@ -4540,17 +5476,30 @@ function preflightCheck(scenario, apiKey) {
|
|
|
4540
5476
|
}
|
|
4541
5477
|
}
|
|
4542
5478
|
const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
|
|
4543
|
-
if (hasProbabilistic
|
|
4544
|
-
const
|
|
4545
|
-
|
|
4546
|
-
|
|
4547
|
-
|
|
4548
|
-
|
|
4549
|
-
|
|
5479
|
+
if (hasProbabilistic) {
|
|
5480
|
+
const provider = detectProvider(model);
|
|
5481
|
+
const resolvedKey = resolveProviderApiKey(apiKey, provider);
|
|
5482
|
+
if (provider === "openai-compatible" && !baseUrl) {
|
|
5483
|
+
errors.push({
|
|
5484
|
+
check: "evaluator.baseUrl",
|
|
5485
|
+
message: `Model "${model}" requires a base URL for the OpenAI-compatible endpoint`,
|
|
5486
|
+
detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>"
|
|
5487
|
+
});
|
|
5488
|
+
}
|
|
5489
|
+
if (!resolvedKey) {
|
|
5490
|
+
const envVar = getProviderEnvVar(provider);
|
|
5491
|
+
const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
|
|
5492
|
+
errors.push({
|
|
5493
|
+
check: envVar,
|
|
5494
|
+
message: `Scenario has ${pCount} probabilistic criteria that will be skipped (no API key for ${provider})`,
|
|
5495
|
+
detail: `Set via: export ${envVar}=<your-key> or archal config set evaluator.apiKey <key>`,
|
|
5496
|
+
warning: true
|
|
5497
|
+
});
|
|
5498
|
+
}
|
|
4550
5499
|
}
|
|
4551
5500
|
return errors;
|
|
4552
5501
|
}
|
|
4553
|
-
async function
|
|
5502
|
+
async function runRemoteApiEnginePreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
|
|
4554
5503
|
const runId = `archal-preflight-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
4555
5504
|
const twinConfigs = seedSelections.map((sel) => ({
|
|
4556
5505
|
twinName: sel.twinName,
|
|
@@ -4592,14 +5541,14 @@ async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, r
|
|
|
4592
5541
|
for (const paths of Object.values(restResult.twinPaths)) {
|
|
4593
5542
|
for (const file of [paths.stateFile, `${paths.stateFile}.tmp`, paths.traceFile, `${paths.traceFile}.tmp`]) {
|
|
4594
5543
|
try {
|
|
4595
|
-
if (
|
|
5544
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4596
5545
|
} catch {
|
|
4597
5546
|
}
|
|
4598
5547
|
}
|
|
4599
5548
|
}
|
|
4600
5549
|
for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
|
|
4601
5550
|
try {
|
|
4602
|
-
if (
|
|
5551
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4603
5552
|
} catch {
|
|
4604
5553
|
}
|
|
4605
5554
|
}
|
|
@@ -4622,9 +5571,14 @@ async function runScenario(options) {
|
|
|
4622
5571
|
);
|
|
4623
5572
|
}
|
|
4624
5573
|
}
|
|
4625
|
-
const preflightErrors = preflightCheck(scenario, config.apiKey);
|
|
4626
|
-
|
|
4627
|
-
|
|
5574
|
+
const preflightErrors = preflightCheck(scenario, config.apiKey, model, config.baseUrl);
|
|
5575
|
+
const hardErrors = preflightErrors.filter((e) => !e.warning);
|
|
5576
|
+
const warnings = preflightErrors.filter((e) => e.warning);
|
|
5577
|
+
for (const w of warnings) {
|
|
5578
|
+
warn(`${w.check}: ${w.message}${w.detail ? ` (${w.detail})` : ""}`);
|
|
5579
|
+
}
|
|
5580
|
+
if (hardErrors.length > 0) {
|
|
5581
|
+
const lines = hardErrors.map((e) => {
|
|
4628
5582
|
let line = ` - ${e.check}: ${e.message}`;
|
|
4629
5583
|
if (e.detail) line += `
|
|
4630
5584
|
${e.detail}`;
|
|
@@ -4651,7 +5605,7 @@ Run 'archal doctor' for a full system check.`
|
|
|
4651
5605
|
}
|
|
4652
5606
|
seedSelections = overrideSeedSelection(seedSelections, overrides);
|
|
4653
5607
|
}
|
|
4654
|
-
if (config.
|
|
5608
|
+
if (config.geminiApiKey && !options.noDynamicSeed) {
|
|
4655
5609
|
progress("Generating dynamic seeds from setup description...");
|
|
4656
5610
|
const baseTwinConfigs = seedSelections.map((sel) => ({
|
|
4657
5611
|
twinName: sel.twinName,
|
|
@@ -4659,8 +5613,8 @@ Run 'archal doctor' for a full system check.`
|
|
|
4659
5613
|
}));
|
|
4660
5614
|
const { beforeState: baseSeedStates } = await captureSeedState(baseTwinConfigs);
|
|
4661
5615
|
const dynamicConfig = {
|
|
4662
|
-
|
|
4663
|
-
model,
|
|
5616
|
+
geminiApiKey: config.geminiApiKey,
|
|
5617
|
+
model: config.seedModel,
|
|
4664
5618
|
noCache: options.noSeedCache
|
|
4665
5619
|
};
|
|
4666
5620
|
for (const sel of seedSelections) {
|
|
@@ -4683,24 +5637,28 @@ Run 'archal doctor' for a full system check.`
|
|
|
4683
5637
|
sel.seedData = result.seed;
|
|
4684
5638
|
}
|
|
4685
5639
|
}
|
|
4686
|
-
const scenarioDir = dirname2(
|
|
5640
|
+
const scenarioDir = dirname2(resolve7(options.scenarioPath));
|
|
4687
5641
|
let projectConfigPath;
|
|
4688
5642
|
for (const dir of [scenarioDir, process.cwd()]) {
|
|
4689
|
-
const candidate =
|
|
4690
|
-
if (
|
|
5643
|
+
const candidate = resolve7(dir, ".archal.json");
|
|
5644
|
+
if (existsSync11(candidate)) {
|
|
4691
5645
|
projectConfigPath = candidate;
|
|
4692
5646
|
break;
|
|
4693
5647
|
}
|
|
4694
5648
|
}
|
|
4695
|
-
function
|
|
4696
|
-
if (!raw || !raw.trim()) return
|
|
5649
|
+
function resolveOpenClawModel2(raw) {
|
|
5650
|
+
if (!raw || !raw.trim()) return void 0;
|
|
4697
5651
|
const value = raw.trim();
|
|
4698
5652
|
return value.includes(":") ? value : `openclaw:${value}`;
|
|
4699
5653
|
}
|
|
4700
|
-
function
|
|
5654
|
+
function resolveEngineToken2(explicitToken) {
|
|
4701
5655
|
if (explicitToken && explicitToken.trim()) {
|
|
4702
5656
|
return explicitToken.trim();
|
|
4703
5657
|
}
|
|
5658
|
+
const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
|
|
5659
|
+
if (engineToken) {
|
|
5660
|
+
return engineToken;
|
|
5661
|
+
}
|
|
4704
5662
|
const gatewayToken = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
|
|
4705
5663
|
if (gatewayToken) {
|
|
4706
5664
|
return gatewayToken;
|
|
@@ -4711,42 +5669,124 @@ Run 'archal doctor' for a full system check.`
|
|
|
4711
5669
|
}
|
|
4712
5670
|
return void 0;
|
|
4713
5671
|
}
|
|
4714
|
-
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
|
|
4719
|
-
|
|
4720
|
-
|
|
5672
|
+
const openclawEndpointAlias = options.openclawUrl ?? process.env["OPENCLAW_URL"];
|
|
5673
|
+
const engineMode = (() => {
|
|
5674
|
+
if (options.engine) {
|
|
5675
|
+
return options.engine;
|
|
5676
|
+
}
|
|
5677
|
+
if (options.engineEndpoint || openclawEndpointAlias || process.env["ARCHAL_ENGINE_ENDPOINT"]) {
|
|
5678
|
+
return "api";
|
|
5679
|
+
}
|
|
5680
|
+
if (options.harnessDir || process.env["ARCHAL_HARNESS_DIR"]) {
|
|
5681
|
+
return "local";
|
|
5682
|
+
}
|
|
5683
|
+
return "legacy";
|
|
5684
|
+
})();
|
|
5685
|
+
const apiEndpoint = options.engineEndpoint ?? openclawEndpointAlias ?? process.env["ARCHAL_ENGINE_ENDPOINT"];
|
|
5686
|
+
const rawOpenClawAgent = options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"];
|
|
5687
|
+
const rawEngineModel = options.engineModel ?? process.env["ARCHAL_ENGINE_MODEL"];
|
|
5688
|
+
const resolvedEngineToken = resolveEngineToken2(options.engineToken ?? options.openclawToken);
|
|
5689
|
+
const harnessDir = options.harnessDir ?? process.env["ARCHAL_HARNESS_DIR"];
|
|
5690
|
+
let apiEngine;
|
|
5691
|
+
if (engineMode === "api") {
|
|
5692
|
+
const apiTimeoutSeconds = options.engineTimeout ?? options.openclawTimeout ?? parsePositiveIntFromEnv("ARCHAL_ENGINE_TIMEOUT") ?? timeoutSeconds;
|
|
5693
|
+
if (!apiEndpoint || !apiEndpoint.trim()) {
|
|
5694
|
+
throw new Error(
|
|
5695
|
+
"API engine mode requires --engine-endpoint (or --openclaw-url for legacy compatibility)."
|
|
5696
|
+
);
|
|
5697
|
+
}
|
|
5698
|
+
if (!Number.isFinite(apiTimeoutSeconds) || apiTimeoutSeconds <= 0) {
|
|
5699
|
+
throw new Error("Engine timeout must be a positive integer number of seconds.");
|
|
5700
|
+
}
|
|
5701
|
+
const resolvedApiModel = rawEngineModel?.trim() || resolveOpenClawModel2(rawOpenClawAgent) || (openclawEndpointAlias ? "openclaw:main" : void 0);
|
|
5702
|
+
if (!resolvedApiModel) {
|
|
5703
|
+
throw new Error(
|
|
5704
|
+
"API engine mode requires --engine-model/ARCHAL_ENGINE_MODEL (or --openclaw-agent/OPENCLAW_AGENT_ID)."
|
|
5705
|
+
);
|
|
5706
|
+
}
|
|
5707
|
+
apiEngine = {
|
|
5708
|
+
url: apiEndpoint.trim(),
|
|
5709
|
+
token: resolvedEngineToken,
|
|
5710
|
+
model: resolvedApiModel,
|
|
5711
|
+
timeoutMs: apiTimeoutSeconds * 1e3,
|
|
5712
|
+
agentId: rawOpenClawAgent?.trim() || void 0
|
|
4721
5713
|
};
|
|
4722
|
-
if (!
|
|
5714
|
+
if (openclawEndpointAlias && !apiEngine.token) {
|
|
4723
5715
|
throw new Error(
|
|
4724
5716
|
"OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
|
|
4725
5717
|
);
|
|
4726
5718
|
}
|
|
4727
5719
|
}
|
|
4728
|
-
|
|
5720
|
+
let localEngine;
|
|
5721
|
+
if (engineMode === "local") {
|
|
5722
|
+
if (!harnessDir) {
|
|
5723
|
+
throw new Error(
|
|
5724
|
+
"Local engine mode requires --harness-dir (or ARCHAL_HARNESS_DIR)."
|
|
5725
|
+
);
|
|
5726
|
+
}
|
|
5727
|
+
const resolvedHarness = resolveLocalHarness(harnessDir, rawEngineModel);
|
|
5728
|
+
const resolvedFallbackLocalAgentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath);
|
|
5729
|
+
const fallbackLocalAgentConfig = resolvedFallbackLocalAgentConfig ?? { command: "openclaw", args: [] };
|
|
5730
|
+
if (!resolvedHarness.manifest) {
|
|
5731
|
+
debug(
|
|
5732
|
+
"Harness manifest not found for local mode; using agent command defaults.",
|
|
5733
|
+
{ manifestPath: resolvedHarness.manifestPath }
|
|
5734
|
+
);
|
|
5735
|
+
} else if (!resolvedHarness.localCommand) {
|
|
5736
|
+
warn(
|
|
5737
|
+
`Harness manifest at ${resolvedHarness.manifestPath} does not define local.command; falling back to agent command defaults.`
|
|
5738
|
+
);
|
|
5739
|
+
}
|
|
5740
|
+
if (!resolvedHarness.localCommand && !resolvedFallbackLocalAgentConfig) {
|
|
5741
|
+
warn(
|
|
5742
|
+
'No local command configured via harness manifest/.archal.json/ARCHAL_AGENT_COMMAND; defaulting to "openclaw".'
|
|
5743
|
+
);
|
|
5744
|
+
}
|
|
5745
|
+
const commandConfig = resolvedHarness.localCommand ?? fallbackLocalAgentConfig;
|
|
5746
|
+
localEngine = {
|
|
5747
|
+
model: resolvedHarness.model,
|
|
5748
|
+
command: commandConfig.command,
|
|
5749
|
+
args: commandConfig.args,
|
|
5750
|
+
env: commandConfig.env,
|
|
5751
|
+
cwd: resolvedHarness.harnessDir,
|
|
5752
|
+
promptContext: resolvedHarness.promptContext
|
|
5753
|
+
};
|
|
5754
|
+
}
|
|
5755
|
+
const remoteTwinUrlOverrides = apiEngine ? parseRemoteTwinUrlOverrides(
|
|
5756
|
+
options.engineTwinUrls ?? options.openclawTwinUrls ?? process.env["ARCHAL_ENGINE_TWIN_URLS"]
|
|
5757
|
+
) : void 0;
|
|
4729
5758
|
const apiBaseUrlOverrides = parseApiBaseUrlOverrides(options.apiBaseUrls);
|
|
4730
5759
|
const apiProxyUrl = parseProxyUrl(options.apiProxyUrl ?? process.env["ARCHAL_API_PROXY_URL"]);
|
|
4731
5760
|
const apiRouting = apiBaseUrlOverrides && Object.keys(apiBaseUrlOverrides).length > 0 || apiProxyUrl ? {
|
|
4732
5761
|
baseUrls: apiBaseUrlOverrides,
|
|
4733
|
-
proxyUrl: apiProxyUrl
|
|
5762
|
+
proxyUrl: apiProxyUrl,
|
|
5763
|
+
bearerToken: options.apiBearerToken,
|
|
5764
|
+
adminToken: options.apiAdminToken,
|
|
5765
|
+
adminUserId: options.apiAdminUserId
|
|
4734
5766
|
} : void 0;
|
|
4735
|
-
const agentConfig =
|
|
5767
|
+
const agentConfig = localEngine ? {
|
|
5768
|
+
command: localEngine.command,
|
|
5769
|
+
args: localEngine.args,
|
|
5770
|
+
env: localEngine.env,
|
|
5771
|
+
cwd: localEngine.cwd
|
|
5772
|
+
} : options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (apiEngine ? { command: "openclaw", args: [] } : {
|
|
4736
5773
|
command: process.env["ARCHAL_AGENT_COMMAND"] ?? "echo",
|
|
4737
5774
|
args: process.env["ARCHAL_AGENT_COMMAND"] ? [] : ["No agent command configured"]
|
|
4738
5775
|
});
|
|
4739
|
-
if (!
|
|
5776
|
+
if (!apiEngine && !localEngine && agentConfig.command === "echo") {
|
|
4740
5777
|
process.stderr.write(
|
|
4741
|
-
"Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json or
|
|
5778
|
+
"Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json, use --engine-endpoint, or run --engine local with --harness-dir.\n"
|
|
4742
5779
|
);
|
|
4743
5780
|
}
|
|
4744
|
-
if (
|
|
4745
|
-
info("Remote
|
|
5781
|
+
if (apiEngine) {
|
|
5782
|
+
info("Remote API engine mode enabled", { url: apiEngine.url });
|
|
4746
5783
|
warn(
|
|
4747
|
-
"Remote
|
|
5784
|
+
"Remote engine mode requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate the engine with Archal or expose twins via a reachable network path."
|
|
4748
5785
|
);
|
|
4749
|
-
|
|
5786
|
+
validateRemoteApiEngineTopology(apiEngine.url, scenario.config.twins, remoteTwinUrlOverrides);
|
|
5787
|
+
}
|
|
5788
|
+
if (localEngine) {
|
|
5789
|
+
info("Local harness engine mode enabled", { harnessDir: localEngine.cwd });
|
|
4750
5790
|
}
|
|
4751
5791
|
if (apiRouting) {
|
|
4752
5792
|
info("API routing context enabled", {
|
|
@@ -4755,18 +5795,18 @@ Run 'archal doctor' for a full system check.`
|
|
|
4755
5795
|
});
|
|
4756
5796
|
}
|
|
4757
5797
|
if (options.preflightOnly) {
|
|
4758
|
-
if (
|
|
4759
|
-
await
|
|
5798
|
+
if (apiEngine) {
|
|
5799
|
+
await runRemoteApiEnginePreflight(
|
|
4760
5800
|
scenario,
|
|
4761
5801
|
seedSelections,
|
|
4762
5802
|
options.rateLimit,
|
|
4763
|
-
|
|
5803
|
+
apiEngine,
|
|
4764
5804
|
remoteTwinUrlOverrides
|
|
4765
5805
|
);
|
|
4766
5806
|
}
|
|
4767
5807
|
info("Preflight checks passed", {
|
|
4768
5808
|
scenario: scenario.title,
|
|
4769
|
-
|
|
5809
|
+
engineMode: apiEngine ? "api" : localEngine ? "local" : "legacy-local"
|
|
4770
5810
|
});
|
|
4771
5811
|
return {
|
|
4772
5812
|
scenarioTitle: scenario.title,
|
|
@@ -4786,6 +5826,7 @@ Run 'archal doctor' for a full system check.`
|
|
|
4786
5826
|
};
|
|
4787
5827
|
const runs = [];
|
|
4788
5828
|
for (let i = 0; i < numRuns; i++) {
|
|
5829
|
+
const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
|
|
4789
5830
|
const result = await executeSingleRun(
|
|
4790
5831
|
i,
|
|
4791
5832
|
scenario,
|
|
@@ -4794,10 +5835,13 @@ Run 'archal doctor' for a full system check.`
|
|
|
4794
5835
|
evaluatorConfig,
|
|
4795
5836
|
timeoutSeconds,
|
|
4796
5837
|
options.rateLimit,
|
|
4797
|
-
|
|
5838
|
+
apiEngine,
|
|
5839
|
+
localEngine,
|
|
4798
5840
|
remoteTwinUrlOverrides,
|
|
4799
5841
|
apiRouting,
|
|
4800
|
-
options.cloudTwinUrls
|
|
5842
|
+
options.cloudTwinUrls,
|
|
5843
|
+
options.apiBearerToken,
|
|
5844
|
+
adminAuth
|
|
4801
5845
|
);
|
|
4802
5846
|
runs.push(result);
|
|
4803
5847
|
printRunProgress(i, numRuns, result.overallScore, result.error);
|
|
@@ -4836,10 +5880,10 @@ function normalizeBaseUrl(value, fallback) {
|
|
|
4836
5880
|
const normalized = trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
|
|
4837
5881
|
return normalized.length > 0 ? normalized : fallback;
|
|
4838
5882
|
}
|
|
4839
|
-
var DEFAULT_BASE_URL = "https://archal.ai";
|
|
5883
|
+
var DEFAULT_BASE_URL = "https://www.archal.ai";
|
|
4840
5884
|
var AUTH_BASE_URL2 = normalizeBaseUrl(process.env["ARCHAL_AUTH_URL"] ?? DEFAULT_BASE_URL, DEFAULT_BASE_URL);
|
|
4841
5885
|
var API_BASE_URL = normalizeBaseUrl(process.env["ARCHAL_API_URL"] ?? AUTH_BASE_URL2, AUTH_BASE_URL2);
|
|
4842
|
-
var
|
|
5886
|
+
var REQUEST_TIMEOUT_MS4 = 8e3;
|
|
4843
5887
|
var RETRYABLE_STATUS_CODES2 = /* @__PURE__ */ new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
4844
5888
|
var RETRYABLE_NETWORK_CODES = /* @__PURE__ */ new Set([
|
|
4845
5889
|
"ECONNABORTED",
|
|
@@ -4864,7 +5908,7 @@ var MAX_RETRIES2 = parseBoundedInt(process.env["ARCHAL_API_MAX_RETRIES"], 3, 0,
|
|
|
4864
5908
|
var RETRY_BASE_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_BASE_MS"], 250, 25, 1e4);
|
|
4865
5909
|
var RETRY_MAX_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_MAX_MS"], 3e3, RETRY_BASE_DELAY_MS, 2e4);
|
|
4866
5910
|
function sleep2(ms) {
|
|
4867
|
-
return new Promise((
|
|
5911
|
+
return new Promise((resolve13) => setTimeout(resolve13, ms));
|
|
4868
5912
|
}
|
|
4869
5913
|
function retryDelayMs(attempt, retryAfter) {
|
|
4870
5914
|
if (retryAfter) {
|
|
@@ -4924,13 +5968,30 @@ function isFinalizeEvidencePath(path) {
|
|
|
4924
5968
|
}
|
|
4925
5969
|
return /^\/api\/sessions\/[^/]+\/evidence\/finalize$/.test(pathname);
|
|
4926
5970
|
}
|
|
5971
|
+
async function tryRefreshToken() {
|
|
5972
|
+
try {
|
|
5973
|
+
const creds = getStoredCredentials();
|
|
5974
|
+
if (!creds || !creds.refreshToken) return null;
|
|
5975
|
+
const refreshed = await refreshCliSession(creds);
|
|
5976
|
+
if (!refreshed) return null;
|
|
5977
|
+
saveCredentials(refreshed);
|
|
5978
|
+
return refreshed.token;
|
|
5979
|
+
} catch {
|
|
5980
|
+
return null;
|
|
5981
|
+
}
|
|
5982
|
+
}
|
|
4927
5983
|
async function request(method, path, token, body) {
|
|
4928
5984
|
const url = `${resolveBaseUrl(path)}${path}`;
|
|
4929
5985
|
const headers = {
|
|
4930
5986
|
"content-type": "application/json",
|
|
4931
|
-
"user-agent":
|
|
5987
|
+
"user-agent": CLI_USER_AGENT
|
|
4932
5988
|
};
|
|
4933
|
-
|
|
5989
|
+
const runtimeAdminToken = process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"]?.trim();
|
|
5990
|
+
if (runtimeAdminToken) {
|
|
5991
|
+
headers["x-archal-admin-token"] = runtimeAdminToken;
|
|
5992
|
+
headers["x-archal-user-id"] = process.env["ARCHAL_RUNTIME_USER_ID"]?.trim() || "cli-user";
|
|
5993
|
+
headers["x-archal-plan"] = process.env["ARCHAL_RUNTIME_PLAN"]?.trim() || "free";
|
|
5994
|
+
} else if (token) {
|
|
4934
5995
|
headers["authorization"] = `Bearer ${token}`;
|
|
4935
5996
|
}
|
|
4936
5997
|
const isIdempotentFinalize = method === "POST" && isFinalizeEvidencePath(path);
|
|
@@ -4938,16 +5999,28 @@ async function request(method, path, token, body) {
|
|
|
4938
5999
|
const attempts = retriesAllowed ? MAX_RETRIES2 + 1 : 1;
|
|
4939
6000
|
let lastError = "request failed";
|
|
4940
6001
|
let lastOffline = false;
|
|
6002
|
+
let refreshAttempted = false;
|
|
4941
6003
|
for (let attempt = 1; attempt <= attempts; attempt += 1) {
|
|
4942
6004
|
try {
|
|
4943
6005
|
const response = await fetch(url, {
|
|
4944
6006
|
method,
|
|
4945
6007
|
headers,
|
|
4946
6008
|
body: body ? JSON.stringify(body) : void 0,
|
|
4947
|
-
signal: AbortSignal.timeout(
|
|
6009
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS4)
|
|
4948
6010
|
});
|
|
4949
6011
|
if (!response.ok) {
|
|
4950
|
-
|
|
6012
|
+
if (response.status === 401 && token && !refreshAttempted) {
|
|
6013
|
+
refreshAttempted = true;
|
|
6014
|
+
const refreshed = await tryRefreshToken();
|
|
6015
|
+
if (refreshed) {
|
|
6016
|
+
token = refreshed;
|
|
6017
|
+
headers["authorization"] = `Bearer ${token}`;
|
|
6018
|
+
attempt -= 1;
|
|
6019
|
+
continue;
|
|
6020
|
+
}
|
|
6021
|
+
}
|
|
6022
|
+
const rawText = await response.text().catch(() => "");
|
|
6023
|
+
const text = rawText.length > 200 ? rawText.slice(0, 200) + "..." : rawText;
|
|
4951
6024
|
const retryable = retriesAllowed && attempt < attempts && RETRYABLE_STATUS_CODES2.has(response.status);
|
|
4952
6025
|
if (retryable) {
|
|
4953
6026
|
await sleep2(retryDelayMs(attempt, response.headers.get("retry-after")));
|
|
@@ -5018,7 +6091,7 @@ function fetchScenarioCatalog(token) {
|
|
|
5018
6091
|
return request("GET", "/api/scenarios", token);
|
|
5019
6092
|
}
|
|
5020
6093
|
|
|
5021
|
-
// src/commands/
|
|
6094
|
+
// src/commands/twins.ts
|
|
5022
6095
|
import { Command as Command2 } from "commander";
|
|
5023
6096
|
|
|
5024
6097
|
// src/constants.ts
|
|
@@ -5045,10 +6118,10 @@ var PLAN_LIMITS = {
|
|
|
5045
6118
|
import { createInterface as createInterface2 } from "readline";
|
|
5046
6119
|
function askLine(question) {
|
|
5047
6120
|
const rl = createInterface2({ input: process.stdin, output: process.stderr });
|
|
5048
|
-
return new Promise((
|
|
6121
|
+
return new Promise((resolve13) => {
|
|
5049
6122
|
rl.question(question, (answer) => {
|
|
5050
6123
|
rl.close();
|
|
5051
|
-
|
|
6124
|
+
resolve13(answer.trim());
|
|
5052
6125
|
});
|
|
5053
6126
|
});
|
|
5054
6127
|
}
|
|
@@ -5057,8 +6130,7 @@ async function askConfirm(question) {
|
|
|
5057
6130
|
return answer.toLowerCase().startsWith("y");
|
|
5058
6131
|
}
|
|
5059
6132
|
|
|
5060
|
-
// src/commands/
|
|
5061
|
-
var runningTwins = /* @__PURE__ */ new Map();
|
|
6133
|
+
// src/commands/twins.ts
|
|
5062
6134
|
var KNOWN_TWINS2 = [
|
|
5063
6135
|
{ name: "github", package: "@archal/twin-github", description: "GitHub digital twin" },
|
|
5064
6136
|
{ name: "slack", package: "@archal/twin-slack", description: "Slack digital twin" },
|
|
@@ -5083,7 +6155,7 @@ async function runInteractiveTwinSelect(token) {
|
|
|
5083
6155
|
const marker = currentlySelected.has(twin.id) ? "\x1B[32m\u2713\x1B[0m" : " ";
|
|
5084
6156
|
const num = String(i + 1).padStart(2);
|
|
5085
6157
|
process.stderr.write(
|
|
5086
|
-
` ${marker} [${num}] ${twin.name.padEnd(18)} (${twin.toolCount} tools) \u2014 ${twin.description}
|
|
6158
|
+
` ${marker} [${num}] ${twin.name.padEnd(18)}${twin.toolCount != null ? ` (${twin.toolCount} tools)` : ""} \u2014 ${twin.description}
|
|
5087
6159
|
`
|
|
5088
6160
|
);
|
|
5089
6161
|
}
|
|
@@ -5169,7 +6241,7 @@ async function listTwinCatalog() {
|
|
|
5169
6241
|
} else {
|
|
5170
6242
|
status = "\x1B[90m\u2717 not selected\x1B[0m";
|
|
5171
6243
|
}
|
|
5172
|
-
return [twin.name, String(twin.toolCount), twin.description, status];
|
|
6244
|
+
return [twin.name, twin.toolCount != null ? String(twin.toolCount) : "\u2014", twin.description, status];
|
|
5173
6245
|
});
|
|
5174
6246
|
table(headers, rows);
|
|
5175
6247
|
if (isUnlimited) {
|
|
@@ -5194,85 +6266,12 @@ async function selectTwinsForPlan() {
|
|
|
5194
6266
|
const refreshed = await refreshAuthFromServer(creds);
|
|
5195
6267
|
saveCredentials(refreshed);
|
|
5196
6268
|
}
|
|
5197
|
-
function
|
|
5198
|
-
const cmd = new Command2("
|
|
5199
|
-
cmd.command("
|
|
5200
|
-
requireAuth({
|
|
5201
|
-
action: `start the "${name}" twin`,
|
|
5202
|
-
nextCommand: `archal twin start ${name}`
|
|
5203
|
-
});
|
|
5204
|
-
const knownTwin = KNOWN_TWINS2.find((t) => t.name === name);
|
|
5205
|
-
if (!knownTwin) {
|
|
5206
|
-
const available = KNOWN_TWINS2.map((t) => t.name).join(", ");
|
|
5207
|
-
error(`Unknown twin: "${name}". Available twins: ${available}`);
|
|
5208
|
-
process.exit(1);
|
|
5209
|
-
}
|
|
5210
|
-
if (runningTwins.has(name)) {
|
|
5211
|
-
warn(`Twin "${name}" is already running (PID: ${runningTwins.get(name)?.pid ?? "unknown"})`);
|
|
5212
|
-
return;
|
|
5213
|
-
}
|
|
5214
|
-
info("`archal run` uses hosted cloud twins. `archal twin start` is for local debugging only.");
|
|
5215
|
-
const args = [knownTwin.package, "--seed", opts.seed, "--transport", "rest"];
|
|
5216
|
-
if (opts.port) {
|
|
5217
|
-
args.push("--port", opts.port);
|
|
5218
|
-
}
|
|
5219
|
-
info(`Starting twin: ${name}`, { seed: opts.seed, transport: "rest" });
|
|
5220
|
-
const child = spawnMcpStdioProcess({
|
|
5221
|
-
command: "npx",
|
|
5222
|
-
args
|
|
5223
|
-
});
|
|
5224
|
-
const pid = child.pid ?? 0;
|
|
5225
|
-
runningTwins.set(name, {
|
|
5226
|
-
name,
|
|
5227
|
-
pid,
|
|
5228
|
-
startedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5229
|
-
process: child
|
|
5230
|
-
});
|
|
5231
|
-
child.on("exit", (code) => {
|
|
5232
|
-
info(`Twin "${name}" exited`, { code: String(code ?? "unknown") });
|
|
5233
|
-
runningTwins.delete(name);
|
|
5234
|
-
});
|
|
5235
|
-
success(`Twin "${name}" started (PID: ${pid})`);
|
|
5236
|
-
});
|
|
5237
|
-
cmd.command("stop").description("Stop a running digital twin").argument("<name>", "Twin name to stop").action(async (name) => {
|
|
5238
|
-
const twin = runningTwins.get(name);
|
|
5239
|
-
if (!twin) {
|
|
5240
|
-
error(`Twin "${name}" is not running`);
|
|
5241
|
-
const running = Array.from(runningTwins.keys());
|
|
5242
|
-
if (running.length > 0) {
|
|
5243
|
-
info(`Running twins: ${running.join(", ")}`);
|
|
5244
|
-
}
|
|
5245
|
-
process.exit(1);
|
|
5246
|
-
}
|
|
5247
|
-
info(`Stopping twin: ${name}`, { pid: String(twin.pid) });
|
|
5248
|
-
await killProcess(twin.process);
|
|
5249
|
-
runningTwins.delete(name);
|
|
5250
|
-
success(`Twin "${name}" stopped`);
|
|
5251
|
-
});
|
|
5252
|
-
cmd.command("status").description("Show status of running digital twins").action(() => {
|
|
5253
|
-
if (runningTwins.size === 0) {
|
|
5254
|
-
info("No twins currently running");
|
|
5255
|
-
return;
|
|
5256
|
-
}
|
|
5257
|
-
const headers = ["Name", "PID", "Started", "Status"];
|
|
5258
|
-
const rows = [];
|
|
5259
|
-
for (const twin of runningTwins.values()) {
|
|
5260
|
-
const isAlive = twin.process.exitCode === null;
|
|
5261
|
-
rows.push([
|
|
5262
|
-
twin.name,
|
|
5263
|
-
String(twin.pid),
|
|
5264
|
-
twin.startedAt,
|
|
5265
|
-
isAlive ? "running" : `exited (${twin.process.exitCode})`
|
|
5266
|
-
]);
|
|
5267
|
-
}
|
|
5268
|
-
table(headers, rows);
|
|
5269
|
-
});
|
|
5270
|
-
cmd.command("list").description("List available digital twins and entitlement status").action(async () => {
|
|
5271
|
-
warn("`archal twin list` is deprecated. Use `archal twins list`.");
|
|
6269
|
+
function createTwinsCommand() {
|
|
6270
|
+
const cmd = new Command2("twins").description("Manage twin catalog entitlements");
|
|
6271
|
+
cmd.command("list").description("List available twins and entitlement status").action(async () => {
|
|
5272
6272
|
await listTwinCatalog();
|
|
5273
6273
|
});
|
|
5274
6274
|
cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
|
|
5275
|
-
warn("`archal twin select` is deprecated. Use `archal twins select`.");
|
|
5276
6275
|
await selectTwinsForPlan();
|
|
5277
6276
|
});
|
|
5278
6277
|
return cmd;
|
|
@@ -5280,7 +6279,13 @@ function createTwinCommand() {
|
|
|
5280
6279
|
|
|
5281
6280
|
// src/commands/run.ts
|
|
5282
6281
|
function createRunCommand() {
|
|
5283
|
-
const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--
|
|
6282
|
+
const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--engine-endpoint <url>", "API engine endpoint URL (base URL or /v1/responses)").option("--engine-token <token>", "Bearer token for API engine auth").option(
|
|
6283
|
+
"--engine-model <model>",
|
|
6284
|
+
"Model id for API mode; in local mode this is exported as ARCHAL_ENGINE_MODEL"
|
|
6285
|
+
).option("--engine-twin-urls <path>", "Path to JSON mapping twin names to remote-reachable MCP base URLs").option("--engine-timeout <seconds>", "Timeout for API engine HTTP call per run (defaults to run timeout)").option(
|
|
6286
|
+
"--harness-dir <path>",
|
|
6287
|
+
"Local agent execution directory (archal-harness.json is optional)"
|
|
6288
|
+
).option("--openclaw-url <url>", "Deprecated alias for --engine-endpoint").option("--openclaw-token <token>", "Deprecated alias for --engine-token").option("--openclaw-agent <id>", "Deprecated alias for --engine-model").option("--openclaw-twin-urls <path>", "Deprecated alias for --engine-twin-urls").option("--openclaw-timeout <seconds>", "Deprecated alias for --engine-timeout").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
|
|
5284
6289
|
const required = requireAuth({
|
|
5285
6290
|
action: "run a scenario",
|
|
5286
6291
|
nextCommand: `archal run ${scenarioArg}`
|
|
@@ -5296,8 +6301,8 @@ function createRunCommand() {
|
|
|
5296
6301
|
if (opts.verbose) {
|
|
5297
6302
|
configureLogger({ verbose: true, level: "debug" });
|
|
5298
6303
|
}
|
|
5299
|
-
const scenarioPath =
|
|
5300
|
-
if (!
|
|
6304
|
+
const scenarioPath = resolve8(scenarioArg);
|
|
6305
|
+
if (!existsSync12(scenarioPath)) {
|
|
5301
6306
|
process.stderr.write(`Error: Scenario file not found: ${scenarioPath}
|
|
5302
6307
|
`);
|
|
5303
6308
|
process.exit(1);
|
|
@@ -5387,26 +6392,20 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5387
6392
|
process.stderr.write("Error: --pass-threshold must be a number between 0 and 100\n");
|
|
5388
6393
|
process.exit(1);
|
|
5389
6394
|
}
|
|
5390
|
-
|
|
5391
|
-
|
|
5392
|
-
|
|
5393
|
-
|
|
6395
|
+
let engine;
|
|
6396
|
+
try {
|
|
6397
|
+
engine = resolveEngineConfig(opts, timeout);
|
|
6398
|
+
} catch (err) {
|
|
6399
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
6400
|
+
process.stderr.write(`Error: ${message}
|
|
6401
|
+
`);
|
|
5394
6402
|
process.exit(1);
|
|
5395
6403
|
}
|
|
5396
|
-
|
|
5397
|
-
if (opts.openclawTimeout) {
|
|
5398
|
-
openclawTimeout = parseInt(opts.openclawTimeout, 10);
|
|
5399
|
-
if (Number.isNaN(openclawTimeout) || openclawTimeout <= 0) {
|
|
5400
|
-
process.stderr.write("Error: --openclaw-timeout must be a positive integer\n");
|
|
5401
|
-
process.exit(1);
|
|
5402
|
-
}
|
|
5403
|
-
}
|
|
5404
|
-
const resolvedOpenClawToken = resolveOpenClawGatewayToken(opts.openclawToken);
|
|
5405
|
-
if (opts.openclawUrl && !resolvedOpenClawToken) {
|
|
6404
|
+
if (engine.deprecatedAliasesUsed.length > 0) {
|
|
5406
6405
|
process.stderr.write(
|
|
5407
|
-
|
|
6406
|
+
`Warning: OpenClaw flags are deprecated (${engine.deprecatedAliasesUsed.join(", ")}). Use --engine-* equivalents.
|
|
6407
|
+
`
|
|
5408
6408
|
);
|
|
5409
|
-
process.exit(1);
|
|
5410
6409
|
}
|
|
5411
6410
|
{
|
|
5412
6411
|
const sessionResult = await startSession(credentials.token, {
|
|
@@ -5433,9 +6432,9 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5433
6432
|
if (!runFailureMessage && Object.keys(endpointRoots).length > 0) {
|
|
5434
6433
|
cloudTwinUrls = endpointRoots;
|
|
5435
6434
|
}
|
|
5436
|
-
if (!runFailureMessage &&
|
|
5437
|
-
generatedTwinUrlMapPath =
|
|
5438
|
-
`.archal-session-${backendSessionId}-
|
|
6435
|
+
if (!runFailureMessage && engine.mode === "api" && !engine.twinUrlsPath) {
|
|
6436
|
+
generatedTwinUrlMapPath = resolve8(
|
|
6437
|
+
`.archal-session-${backendSessionId}-engine-twin-urls.json`
|
|
5439
6438
|
);
|
|
5440
6439
|
writeFileSync9(
|
|
5441
6440
|
generatedTwinUrlMapPath,
|
|
@@ -5444,7 +6443,7 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5444
6443
|
);
|
|
5445
6444
|
}
|
|
5446
6445
|
if (!runFailureMessage && !opts.apiBaseUrls && apiBaseUrls && Object.keys(apiBaseUrls).length > 0) {
|
|
5447
|
-
generatedApiBaseUrlMapPath =
|
|
6446
|
+
generatedApiBaseUrlMapPath = resolve8(
|
|
5448
6447
|
`.archal-session-${backendSessionId}-api-base-urls.json`
|
|
5449
6448
|
);
|
|
5450
6449
|
writeFileSync9(
|
|
@@ -5454,15 +6453,34 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5454
6453
|
);
|
|
5455
6454
|
}
|
|
5456
6455
|
if (!runFailureMessage) {
|
|
5457
|
-
const
|
|
5458
|
-
|
|
5459
|
-
|
|
5460
|
-
|
|
5461
|
-
|
|
5462
|
-
|
|
6456
|
+
const SESSION_READY_TIMEOUT_MS = 12e4;
|
|
6457
|
+
const SESSION_POLL_INTERVAL_MS = 3e3;
|
|
6458
|
+
const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
|
|
6459
|
+
let sessionReady = false;
|
|
6460
|
+
while (Date.now() < readyDeadline) {
|
|
6461
|
+
const [statusResult, healthResult] = await Promise.all([
|
|
6462
|
+
getSessionStatus(credentials.token, backendSessionId),
|
|
6463
|
+
getSessionHealth(credentials.token, backendSessionId)
|
|
6464
|
+
]);
|
|
6465
|
+
if (!statusResult.ok) {
|
|
6466
|
+
runFailureMessage = `session status check failed (${statusResult.error})`;
|
|
6467
|
+
break;
|
|
6468
|
+
}
|
|
6469
|
+
const status = statusResult.data.status;
|
|
6470
|
+
if (status === "failed" || status === "expired" || status === "ended") {
|
|
6471
|
+
runFailureMessage = `session ${status}`;
|
|
6472
|
+
break;
|
|
6473
|
+
}
|
|
6474
|
+
const healthAlive = healthResult.ok && healthResult.data.alive;
|
|
6475
|
+
const statusAlive = statusResult.data.alive || statusResult.data.status === "ready";
|
|
6476
|
+
if (statusAlive && healthAlive) {
|
|
6477
|
+
sessionReady = true;
|
|
6478
|
+
break;
|
|
6479
|
+
}
|
|
6480
|
+
await new Promise((resolve13) => setTimeout(resolve13, SESSION_POLL_INTERVAL_MS));
|
|
5463
6481
|
}
|
|
5464
|
-
if (!
|
|
5465
|
-
runFailureMessage =
|
|
6482
|
+
if (!sessionReady && !runFailureMessage) {
|
|
6483
|
+
runFailureMessage = "session timed out waiting for twins to become ready";
|
|
5466
6484
|
}
|
|
5467
6485
|
}
|
|
5468
6486
|
} else if (!sessionResult.offline) {
|
|
@@ -5482,17 +6500,26 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5482
6500
|
output: outputFormat,
|
|
5483
6501
|
seed: opts.seed,
|
|
5484
6502
|
rateLimit,
|
|
6503
|
+
engineEndpoint: engine.endpoint,
|
|
6504
|
+
engineToken: engine.token,
|
|
6505
|
+
engineModel: engine.model,
|
|
6506
|
+
engineTwinUrls: generatedTwinUrlMapPath ?? engine.twinUrlsPath,
|
|
6507
|
+
engineTimeout: engine.timeoutSeconds,
|
|
6508
|
+
harnessDir: engine.harnessDir,
|
|
5485
6509
|
openclawUrl: opts.openclawUrl,
|
|
5486
|
-
openclawToken:
|
|
6510
|
+
openclawToken: engine.token,
|
|
5487
6511
|
openclawAgent: opts.openclawAgent,
|
|
5488
6512
|
openclawTwinUrls: generatedTwinUrlMapPath ?? opts.openclawTwinUrls,
|
|
5489
|
-
openclawTimeout,
|
|
6513
|
+
openclawTimeout: engine.timeoutSeconds,
|
|
5490
6514
|
apiBaseUrls: generatedApiBaseUrlMapPath ?? opts.apiBaseUrls,
|
|
5491
6515
|
apiProxyUrl: opts.apiProxyUrl,
|
|
5492
6516
|
preflightOnly: opts.preflightOnly,
|
|
5493
6517
|
cloudTwinUrls,
|
|
5494
6518
|
noDynamicSeed: !opts.dynamicSeed,
|
|
5495
|
-
noSeedCache: !opts.seedCache
|
|
6519
|
+
noSeedCache: !opts.seedCache,
|
|
6520
|
+
apiBearerToken: credentials.token,
|
|
6521
|
+
apiAdminToken: process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"],
|
|
6522
|
+
apiAdminUserId: process.env["ARCHAL_RUNTIME_USER_ID"]
|
|
5496
6523
|
});
|
|
5497
6524
|
if (!opts.preflightOnly && report.satisfactionScore < passThreshold) {
|
|
5498
6525
|
runFailureMessage = `Satisfaction score ${report.satisfactionScore.toFixed(1)} is below pass threshold ${passThreshold}`;
|
|
@@ -5502,10 +6529,10 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5502
6529
|
const message = err instanceof Error ? err.message : String(err);
|
|
5503
6530
|
runFailureMessage = message;
|
|
5504
6531
|
} finally {
|
|
5505
|
-
if (generatedTwinUrlMapPath &&
|
|
6532
|
+
if (generatedTwinUrlMapPath && existsSync12(generatedTwinUrlMapPath)) {
|
|
5506
6533
|
unlinkSync7(generatedTwinUrlMapPath);
|
|
5507
6534
|
}
|
|
5508
|
-
if (generatedApiBaseUrlMapPath &&
|
|
6535
|
+
if (generatedApiBaseUrlMapPath && existsSync12(generatedApiBaseUrlMapPath)) {
|
|
5509
6536
|
unlinkSync7(generatedApiBaseUrlMapPath);
|
|
5510
6537
|
}
|
|
5511
6538
|
if (backendSessionId) {
|
|
@@ -5566,10 +6593,90 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5566
6593
|
});
|
|
5567
6594
|
return cmd;
|
|
5568
6595
|
}
|
|
5569
|
-
function
|
|
6596
|
+
function resolveEngineConfig(opts, runTimeoutSeconds) {
|
|
6597
|
+
const deprecatedAliasesUsed = collectDeprecatedAliases(opts);
|
|
6598
|
+
const mode = resolveEngineMode(opts);
|
|
6599
|
+
const openclawEndpointAlias = firstNonEmpty(opts.openclawUrl, process.env["OPENCLAW_URL"]);
|
|
6600
|
+
const endpoint = firstNonEmpty(
|
|
6601
|
+
opts.engineEndpoint,
|
|
6602
|
+
openclawEndpointAlias,
|
|
6603
|
+
process.env["ARCHAL_ENGINE_ENDPOINT"]
|
|
6604
|
+
);
|
|
6605
|
+
const token = resolveEngineToken(firstNonEmpty(opts.engineToken, opts.openclawToken));
|
|
6606
|
+
const openclawModel = resolveOpenClawModel(firstNonEmpty(opts.openclawAgent, process.env["OPENCLAW_AGENT_ID"]));
|
|
6607
|
+
const model = firstNonEmpty(
|
|
6608
|
+
opts.engineModel,
|
|
6609
|
+
process.env["ARCHAL_ENGINE_MODEL"],
|
|
6610
|
+
openclawModel,
|
|
6611
|
+
// Legacy OpenClaw alias path keeps the historical default model for compatibility.
|
|
6612
|
+
openclawEndpointAlias ? "openclaw:main" : void 0
|
|
6613
|
+
);
|
|
6614
|
+
const timeoutInput = firstNonEmpty(
|
|
6615
|
+
opts.engineTimeout,
|
|
6616
|
+
opts.openclawTimeout,
|
|
6617
|
+
process.env["ARCHAL_ENGINE_TIMEOUT"]
|
|
6618
|
+
);
|
|
6619
|
+
const timeoutSeconds = mode === "api" ? parsePositiveInteger(timeoutInput, "--engine-timeout") ?? runTimeoutSeconds : runTimeoutSeconds;
|
|
6620
|
+
const twinUrlsPath = firstNonEmpty(
|
|
6621
|
+
opts.engineTwinUrls,
|
|
6622
|
+
opts.openclawTwinUrls,
|
|
6623
|
+
process.env["ARCHAL_ENGINE_TWIN_URLS"]
|
|
6624
|
+
);
|
|
6625
|
+
const harnessDir = firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"]);
|
|
6626
|
+
if (mode === "api") {
|
|
6627
|
+
if (!model) {
|
|
6628
|
+
throw new Error(
|
|
6629
|
+
"--engine-model is required for API mode (or use --openclaw-agent/OPENCLAW_AGENT_ID)."
|
|
6630
|
+
);
|
|
6631
|
+
}
|
|
6632
|
+
if (openclawEndpointAlias && !token) {
|
|
6633
|
+
throw new Error(
|
|
6634
|
+
"OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
|
|
6635
|
+
);
|
|
6636
|
+
}
|
|
6637
|
+
}
|
|
6638
|
+
return {
|
|
6639
|
+
mode,
|
|
6640
|
+
endpoint,
|
|
6641
|
+
token,
|
|
6642
|
+
model,
|
|
6643
|
+
twinUrlsPath,
|
|
6644
|
+
timeoutSeconds,
|
|
6645
|
+
harnessDir,
|
|
6646
|
+
deprecatedAliasesUsed
|
|
6647
|
+
};
|
|
6648
|
+
}
|
|
6649
|
+
function resolveEngineMode(opts) {
|
|
6650
|
+
if (firstNonEmpty(
|
|
6651
|
+
opts.engineEndpoint,
|
|
6652
|
+
opts.openclawUrl,
|
|
6653
|
+
process.env["ARCHAL_ENGINE_ENDPOINT"],
|
|
6654
|
+
process.env["OPENCLAW_URL"]
|
|
6655
|
+
)) {
|
|
6656
|
+
return "api";
|
|
6657
|
+
}
|
|
6658
|
+
if (firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"])) {
|
|
6659
|
+
return "local";
|
|
6660
|
+
}
|
|
6661
|
+
throw new Error(
|
|
6662
|
+
"No agent execution mode configured. Provide --engine-endpoint for remote agent execution, or --harness-dir for local agent execution."
|
|
6663
|
+
);
|
|
6664
|
+
}
|
|
6665
|
+
function resolveOpenClawModel(raw) {
|
|
6666
|
+
if (!raw || !raw.trim()) {
|
|
6667
|
+
return void 0;
|
|
6668
|
+
}
|
|
6669
|
+
const value = raw.trim();
|
|
6670
|
+
return value.includes(":") ? value : `openclaw:${value}`;
|
|
6671
|
+
}
|
|
6672
|
+
function resolveEngineToken(rawToken) {
|
|
5570
6673
|
if (rawToken && rawToken.trim()) {
|
|
5571
6674
|
return rawToken.trim();
|
|
5572
6675
|
}
|
|
6676
|
+
const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
|
|
6677
|
+
if (engineToken) {
|
|
6678
|
+
return engineToken;
|
|
6679
|
+
}
|
|
5573
6680
|
const token = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
|
|
5574
6681
|
if (token) {
|
|
5575
6682
|
return token;
|
|
@@ -5580,11 +6687,36 @@ function resolveOpenClawGatewayToken(rawToken) {
|
|
|
5580
6687
|
}
|
|
5581
6688
|
return void 0;
|
|
5582
6689
|
}
|
|
6690
|
+
function firstNonEmpty(...values) {
|
|
6691
|
+
for (const value of values) {
|
|
6692
|
+
if (value && value.trim()) {
|
|
6693
|
+
return value.trim();
|
|
6694
|
+
}
|
|
6695
|
+
}
|
|
6696
|
+
return void 0;
|
|
6697
|
+
}
|
|
6698
|
+
function parsePositiveInteger(raw, flagName) {
|
|
6699
|
+
if (!raw) return void 0;
|
|
6700
|
+
const parsed = parseInt(raw, 10);
|
|
6701
|
+
if (Number.isNaN(parsed) || parsed <= 0) {
|
|
6702
|
+
throw new Error(`${flagName} must be a positive integer`);
|
|
6703
|
+
}
|
|
6704
|
+
return parsed;
|
|
6705
|
+
}
|
|
6706
|
+
function collectDeprecatedAliases(opts) {
|
|
6707
|
+
const aliases = [];
|
|
6708
|
+
if (opts.openclawUrl) aliases.push("--openclaw-url");
|
|
6709
|
+
if (opts.openclawToken) aliases.push("--openclaw-token");
|
|
6710
|
+
if (opts.openclawAgent) aliases.push("--openclaw-agent");
|
|
6711
|
+
if (opts.openclawTwinUrls) aliases.push("--openclaw-twin-urls");
|
|
6712
|
+
if (opts.openclawTimeout) aliases.push("--openclaw-timeout");
|
|
6713
|
+
return aliases;
|
|
6714
|
+
}
|
|
5583
6715
|
|
|
5584
6716
|
// src/commands/init.ts
|
|
5585
6717
|
import { Command as Command4 } from "commander";
|
|
5586
|
-
import { existsSync as
|
|
5587
|
-
import { join as join9, resolve as
|
|
6718
|
+
import { existsSync as existsSync13, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
|
|
6719
|
+
import { join as join9, resolve as resolve9 } from "path";
|
|
5588
6720
|
var SAMPLE_SCENARIO = `# Close Stale Issues
|
|
5589
6721
|
|
|
5590
6722
|
## Setup
|
|
@@ -5759,7 +6891,7 @@ var SAMPLE_PACKAGE_JSON = `{
|
|
|
5759
6891
|
}
|
|
5760
6892
|
`;
|
|
5761
6893
|
function writeIfMissing(filePath, content) {
|
|
5762
|
-
if (!
|
|
6894
|
+
if (!existsSync13(filePath)) {
|
|
5763
6895
|
writeFileSync10(filePath, content);
|
|
5764
6896
|
info(`Created ${filePath}`);
|
|
5765
6897
|
} else {
|
|
@@ -5768,8 +6900,8 @@ function writeIfMissing(filePath, content) {
|
|
|
5768
6900
|
}
|
|
5769
6901
|
function createInitCommand() {
|
|
5770
6902
|
const cmd = new Command4("init").description("Initialize an Archal test directory with sample scenario and agent").argument("[directory]", "Directory to initialize", "archal").action((directory) => {
|
|
5771
|
-
const targetDir =
|
|
5772
|
-
if (
|
|
6903
|
+
const targetDir = resolve9(directory);
|
|
6904
|
+
if (existsSync13(targetDir)) {
|
|
5773
6905
|
warn(`Directory already exists: ${targetDir}`);
|
|
5774
6906
|
warn("Skipping files that already exist.");
|
|
5775
6907
|
} else {
|
|
@@ -5792,23 +6924,10 @@ function createInitCommand() {
|
|
|
5792
6924
|
return cmd;
|
|
5793
6925
|
}
|
|
5794
6926
|
|
|
5795
|
-
// src/commands/twins.ts
|
|
5796
|
-
import { Command as Command5 } from "commander";
|
|
5797
|
-
function createTwinsCommand() {
|
|
5798
|
-
const cmd = new Command5("twins").description("Manage twin catalog entitlements");
|
|
5799
|
-
cmd.command("list").description("List available twins and entitlement status").action(async () => {
|
|
5800
|
-
await listTwinCatalog();
|
|
5801
|
-
});
|
|
5802
|
-
cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
|
|
5803
|
-
await selectTwinsForPlan();
|
|
5804
|
-
});
|
|
5805
|
-
return cmd;
|
|
5806
|
-
}
|
|
5807
|
-
|
|
5808
6927
|
// src/commands/scenario.ts
|
|
5809
|
-
import { Command as
|
|
5810
|
-
import { existsSync as
|
|
5811
|
-
import { resolve as
|
|
6928
|
+
import { Command as Command5 } from "commander";
|
|
6929
|
+
import { existsSync as existsSync14, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
|
|
6930
|
+
import { resolve as resolve10, join as join10, extname, relative } from "path";
|
|
5812
6931
|
var SCENARIO_TEMPLATE = `# {{NAME}}
|
|
5813
6932
|
|
|
5814
6933
|
## Setup
|
|
@@ -5834,15 +6953,15 @@ timeout: 120
|
|
|
5834
6953
|
runs: 5
|
|
5835
6954
|
`;
|
|
5836
6955
|
var SCENARIO_DIR_CANDIDATES = [
|
|
5837
|
-
|
|
5838
|
-
|
|
5839
|
-
|
|
5840
|
-
|
|
5841
|
-
|
|
6956
|
+
resolve10("scenarios"),
|
|
6957
|
+
resolve10("scenario"),
|
|
6958
|
+
resolve10("test", "scenarios"),
|
|
6959
|
+
resolve10("tests", "scenarios"),
|
|
6960
|
+
resolve10(".archal", "scenarios")
|
|
5842
6961
|
];
|
|
5843
6962
|
function findScenarioFiles(dir) {
|
|
5844
6963
|
const files = [];
|
|
5845
|
-
if (!
|
|
6964
|
+
if (!existsSync14(dir)) return files;
|
|
5846
6965
|
const entries = readdirSync3(dir, { withFileTypes: true });
|
|
5847
6966
|
for (const entry of entries) {
|
|
5848
6967
|
const fullPath = join10(dir, entry.name);
|
|
@@ -5856,22 +6975,19 @@ function findScenarioFiles(dir) {
|
|
|
5856
6975
|
}
|
|
5857
6976
|
function findLocalScenariosDir() {
|
|
5858
6977
|
for (const candidate of SCENARIO_DIR_CANDIDATES) {
|
|
5859
|
-
if (
|
|
6978
|
+
if (existsSync14(candidate)) {
|
|
5860
6979
|
return { dir: candidate, candidates: SCENARIO_DIR_CANDIDATES };
|
|
5861
6980
|
}
|
|
5862
6981
|
}
|
|
5863
6982
|
return {
|
|
5864
|
-
dir:
|
|
6983
|
+
dir: resolve10("scenarios"),
|
|
5865
6984
|
candidates: SCENARIO_DIR_CANDIDATES
|
|
5866
6985
|
};
|
|
5867
6986
|
}
|
|
5868
6987
|
function toDisplayPath(path) {
|
|
5869
|
-
const
|
|
5870
|
-
if (
|
|
5871
|
-
|
|
5872
|
-
return `.${path.slice(cwd.length)}`;
|
|
5873
|
-
}
|
|
5874
|
-
return path;
|
|
6988
|
+
const rel = relative(resolve10("."), path);
|
|
6989
|
+
if (!rel) return ".";
|
|
6990
|
+
return rel.startsWith("..") ? path : rel;
|
|
5875
6991
|
}
|
|
5876
6992
|
function getCachedScenariosDir() {
|
|
5877
6993
|
return join10(ensureArchalDir(), "scenarios");
|
|
@@ -5897,14 +7013,14 @@ async function syncRemoteScenarios(token) {
|
|
|
5897
7013
|
return scenarios;
|
|
5898
7014
|
}
|
|
5899
7015
|
function createScenarioCommand() {
|
|
5900
|
-
const cmd = new
|
|
7016
|
+
const cmd = new Command5("scenario").description("Manage test scenarios");
|
|
5901
7017
|
cmd.command("list").description("List available scenarios").option("-d, --dir <directory>", "Scenario directory to search").option("--local", "Only show local scenarios (skip remote fetch)").action(async (opts) => {
|
|
5902
7018
|
const creds = getCredentials();
|
|
5903
7019
|
const headers = ["Scenario", "Source", "Criteria", "Twins"];
|
|
5904
7020
|
const rows = [];
|
|
5905
|
-
const localResolution = opts.dir ? { dir:
|
|
7021
|
+
const localResolution = opts.dir ? { dir: resolve10(opts.dir), candidates: [resolve10(opts.dir)] } : findLocalScenariosDir();
|
|
5906
7022
|
const localDir = localResolution.dir;
|
|
5907
|
-
if (
|
|
7023
|
+
if (existsSync14(localDir)) {
|
|
5908
7024
|
const localFiles = findScenarioFiles(localDir);
|
|
5909
7025
|
let hiddenCount = 0;
|
|
5910
7026
|
for (const file of localFiles) {
|
|
@@ -5917,7 +7033,7 @@ function createScenarioCommand() {
|
|
|
5917
7033
|
continue;
|
|
5918
7034
|
}
|
|
5919
7035
|
}
|
|
5920
|
-
const relativePath =
|
|
7036
|
+
const relativePath = relative(resolve10("."), file);
|
|
5921
7037
|
rows.push([
|
|
5922
7038
|
scenario.title,
|
|
5923
7039
|
relativePath,
|
|
@@ -5926,7 +7042,7 @@ function createScenarioCommand() {
|
|
|
5926
7042
|
]);
|
|
5927
7043
|
} catch (err) {
|
|
5928
7044
|
const message = err instanceof Error ? err.message : String(err);
|
|
5929
|
-
const relativePath =
|
|
7045
|
+
const relativePath = relative(resolve10("."), file);
|
|
5930
7046
|
rows.push([`(parse error)`, relativePath, "-", message]);
|
|
5931
7047
|
}
|
|
5932
7048
|
}
|
|
@@ -5971,8 +7087,8 @@ function createScenarioCommand() {
|
|
|
5971
7087
|
Found ${rows.length} scenario(s)`);
|
|
5972
7088
|
});
|
|
5973
7089
|
cmd.command("validate").description("Parse and validate a scenario file").argument("<file>", "Path to scenario markdown file").action((file) => {
|
|
5974
|
-
const filePath =
|
|
5975
|
-
if (!
|
|
7090
|
+
const filePath = resolve10(file);
|
|
7091
|
+
if (!existsSync14(filePath)) {
|
|
5976
7092
|
error(`File not found: ${filePath}`);
|
|
5977
7093
|
process.exit(1);
|
|
5978
7094
|
}
|
|
@@ -6014,14 +7130,14 @@ Found ${rows.length} scenario(s)`);
|
|
|
6014
7130
|
info("Run `archal twins select` to change your selection or `archal upgrade` to unlock all twins.");
|
|
6015
7131
|
process.exit(1);
|
|
6016
7132
|
}
|
|
6017
|
-
const scenariosDir = opts.dir ?
|
|
6018
|
-
if (!
|
|
7133
|
+
const scenariosDir = opts.dir ? resolve10(opts.dir) : findLocalScenariosDir().dir;
|
|
7134
|
+
if (!existsSync14(scenariosDir)) {
|
|
6019
7135
|
mkdirSync7(scenariosDir, { recursive: true });
|
|
6020
7136
|
info(`Created scenarios directory: ${scenariosDir}`);
|
|
6021
7137
|
}
|
|
6022
7138
|
const fileName = name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "") + ".md";
|
|
6023
7139
|
const filePath = join10(scenariosDir, fileName);
|
|
6024
|
-
if (
|
|
7140
|
+
if (existsSync14(filePath)) {
|
|
6025
7141
|
error(`Scenario file already exists: ${filePath}`);
|
|
6026
7142
|
process.exit(1);
|
|
6027
7143
|
}
|
|
@@ -6038,9 +7154,9 @@ Found ${rows.length} scenario(s)`);
|
|
|
6038
7154
|
|
|
6039
7155
|
// src/commands/trace.ts
|
|
6040
7156
|
import { writeFileSync as writeFileSync12 } from "fs";
|
|
6041
|
-
import { resolve as
|
|
7157
|
+
import { resolve as resolve11 } from "path";
|
|
6042
7158
|
import { createInterface as createInterface3 } from "readline";
|
|
6043
|
-
import { Command as
|
|
7159
|
+
import { Command as Command6 } from "commander";
|
|
6044
7160
|
function formatTimestamp2(iso) {
|
|
6045
7161
|
try {
|
|
6046
7162
|
return new Date(iso).toLocaleString();
|
|
@@ -6063,10 +7179,10 @@ var TRACE_HEADERS = ["ID", "Scenario", "Score", "Runs", "Entries", "Timestamp"];
|
|
|
6063
7179
|
function confirmPrompt(message) {
|
|
6064
7180
|
if (!process.stdin.isTTY) return Promise.resolve(false);
|
|
6065
7181
|
const rl = createInterface3({ input: process.stdin, output: process.stderr });
|
|
6066
|
-
return new Promise((
|
|
7182
|
+
return new Promise((resolve13) => {
|
|
6067
7183
|
rl.question(`${message} [y/N] `, (answer) => {
|
|
6068
7184
|
rl.close();
|
|
6069
|
-
|
|
7185
|
+
resolve13(answer.trim().toLowerCase() === "y");
|
|
6070
7186
|
});
|
|
6071
7187
|
});
|
|
6072
7188
|
}
|
|
@@ -6079,7 +7195,7 @@ function parsePositiveInt(val, flag) {
|
|
|
6079
7195
|
return n;
|
|
6080
7196
|
}
|
|
6081
7197
|
function createTraceCommand() {
|
|
6082
|
-
const cmd = new
|
|
7198
|
+
const cmd = new Command6("trace").description("Inspect, search, and manage run traces");
|
|
6083
7199
|
cmd.command("list").description("List recent traces").option("-n, --limit <count>", "Number of traces to show", "20").action((opts) => {
|
|
6084
7200
|
const traces = listTraces(parsePositiveInt(opts.limit, "--limit"));
|
|
6085
7201
|
if (traces.length === 0) {
|
|
@@ -6183,7 +7299,7 @@ ${traces.length} trace(s) found`);
|
|
|
6183
7299
|
process.exit(1);
|
|
6184
7300
|
}
|
|
6185
7301
|
if (opts.output) {
|
|
6186
|
-
const outPath =
|
|
7302
|
+
const outPath = resolve11(opts.output);
|
|
6187
7303
|
writeFileSync12(outPath, json, "utf-8");
|
|
6188
7304
|
info(`Trace exported to: ${outPath}`);
|
|
6189
7305
|
} else {
|
|
@@ -6260,10 +7376,10 @@ ${traces.length} trace(s) found`);
|
|
|
6260
7376
|
}
|
|
6261
7377
|
|
|
6262
7378
|
// src/commands/config.ts
|
|
6263
|
-
import { existsSync as
|
|
6264
|
-
import { Command as
|
|
7379
|
+
import { existsSync as existsSync15, unlinkSync as unlinkSync8 } from "fs";
|
|
7380
|
+
import { Command as Command7 } from "commander";
|
|
6265
7381
|
function createConfigCommand() {
|
|
6266
|
-
const cmd = new
|
|
7382
|
+
const cmd = new Command7("config").description("Manage Archal configuration");
|
|
6267
7383
|
cmd.command("show").description("Print current configuration").option("--json", "Output as JSON").action((opts) => {
|
|
6268
7384
|
const display = getConfigDisplay();
|
|
6269
7385
|
if (opts.json) {
|
|
@@ -6279,6 +7395,11 @@ function createConfigCommand() {
|
|
|
6279
7395
|
model: evaluator["model"] ?? "(not set)",
|
|
6280
7396
|
apiKey: evaluator["apiKey"] ?? "(not set)"
|
|
6281
7397
|
});
|
|
7398
|
+
const seedGen = display["seedGeneration"];
|
|
7399
|
+
printConfigSection("Seed Generation", {
|
|
7400
|
+
model: seedGen["model"] ?? "(not set)",
|
|
7401
|
+
geminiApiKey: seedGen["geminiApiKey"] ?? "(not set)"
|
|
7402
|
+
});
|
|
6282
7403
|
const defaults = display["defaults"];
|
|
6283
7404
|
printConfigSection("Defaults", {
|
|
6284
7405
|
runs: String(defaults["runs"]),
|
|
@@ -6291,12 +7412,16 @@ function createConfigCommand() {
|
|
|
6291
7412
|
});
|
|
6292
7413
|
process.stdout.write("\n");
|
|
6293
7414
|
info("Set values with: archal config set <key> <value>");
|
|
6294
|
-
info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout");
|
|
7415
|
+
info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout");
|
|
6295
7416
|
});
|
|
6296
7417
|
cmd.command("set").description("Set a configuration value").argument("<key>", "Configuration key (e.g., evaluator.model, defaults.runs)").argument("<value>", "Value to set").action((key, value) => {
|
|
6297
7418
|
try {
|
|
6298
7419
|
setConfigValue(key, value);
|
|
6299
7420
|
success(`Set ${key} = ${key.includes("apiKey") ? "***" : value}`);
|
|
7421
|
+
if (key.includes("apiKey") && !value.startsWith("env:")) {
|
|
7422
|
+
warn("API key stored in plaintext in config file. Consider using env: prefix instead:");
|
|
7423
|
+
info(` archal config set ${key} env:YOUR_ENV_VAR_NAME`);
|
|
7424
|
+
}
|
|
6300
7425
|
} catch (err) {
|
|
6301
7426
|
const message = err instanceof Error ? err.message : String(err);
|
|
6302
7427
|
error(message);
|
|
@@ -6306,7 +7431,7 @@ function createConfigCommand() {
|
|
|
6306
7431
|
cmd.command("init").description("Create default configuration file").option("--force", "Overwrite existing config").action((opts) => {
|
|
6307
7432
|
const configPath = getConfigPath();
|
|
6308
7433
|
if (opts.force) {
|
|
6309
|
-
if (
|
|
7434
|
+
if (existsSync15(configPath)) {
|
|
6310
7435
|
unlinkSync8(configPath);
|
|
6311
7436
|
}
|
|
6312
7437
|
}
|
|
@@ -6316,7 +7441,7 @@ function createConfigCommand() {
|
|
|
6316
7441
|
info("\nNext steps:");
|
|
6317
7442
|
info(" 1. Set your API key:");
|
|
6318
7443
|
info(" archal config set evaluator.apiKey your-key-here");
|
|
6319
|
-
info(" or set
|
|
7444
|
+
info(" or set GEMINI_API_KEY environment variable (default provider)");
|
|
6320
7445
|
info("");
|
|
6321
7446
|
info(" 2. Create a scenario:");
|
|
6322
7447
|
info(" archal scenario create my-first-test");
|
|
@@ -6345,31 +7470,33 @@ function printConfigSection(name, values) {
|
|
|
6345
7470
|
}
|
|
6346
7471
|
|
|
6347
7472
|
// src/commands/demo.ts
|
|
6348
|
-
import { Command as
|
|
6349
|
-
import { existsSync as
|
|
6350
|
-
import { resolve as
|
|
6351
|
-
import { fileURLToPath as
|
|
7473
|
+
import { Command as Command8 } from "commander";
|
|
7474
|
+
import { existsSync as existsSync16 } from "fs";
|
|
7475
|
+
import { resolve as resolve12, dirname as dirname4 } from "path";
|
|
7476
|
+
import { fileURLToPath as fileURLToPath5 } from "url";
|
|
6352
7477
|
import { createRequire as createRequire4 } from "module";
|
|
6353
|
-
var
|
|
7478
|
+
var __dirname5 = fileURLToPath5(new URL(".", import.meta.url));
|
|
6354
7479
|
function resolveDemoDir() {
|
|
6355
|
-
const
|
|
6356
|
-
if (
|
|
6357
|
-
return
|
|
7480
|
+
const demoDir = resolve12(__dirname5, "..", "demo");
|
|
7481
|
+
if (existsSync16(resolve12(demoDir, "scenario.md"))) {
|
|
7482
|
+
return demoDir;
|
|
6358
7483
|
}
|
|
6359
7484
|
try {
|
|
6360
7485
|
const require2 = createRequire4(import.meta.url);
|
|
6361
7486
|
const cliMain = require2.resolve("@archal/cli");
|
|
6362
7487
|
const pkgDir = dirname4(dirname4(cliMain));
|
|
6363
|
-
const npmDemoDir =
|
|
6364
|
-
if (
|
|
7488
|
+
const npmDemoDir = resolve12(pkgDir, "demo");
|
|
7489
|
+
if (existsSync16(resolve12(npmDemoDir, "scenario.md"))) {
|
|
6365
7490
|
return npmDemoDir;
|
|
6366
7491
|
}
|
|
6367
7492
|
} catch {
|
|
6368
7493
|
}
|
|
6369
|
-
throw new Error(
|
|
7494
|
+
throw new Error(
|
|
7495
|
+
"Demo files not found. Ensure @archal/cli is installed correctly.\nIf installed globally, try reinstalling: npm install -g @archal/cli"
|
|
7496
|
+
);
|
|
6370
7497
|
}
|
|
6371
7498
|
function createDemoCommand() {
|
|
6372
|
-
const cmd = new
|
|
7499
|
+
const cmd = new Command8("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
|
|
6373
7500
|
if (opts.quiet) {
|
|
6374
7501
|
configureLogger({ quiet: true });
|
|
6375
7502
|
}
|
|
@@ -6377,9 +7504,9 @@ function createDemoCommand() {
|
|
|
6377
7504
|
configureLogger({ verbose: true, level: "debug" });
|
|
6378
7505
|
}
|
|
6379
7506
|
const demoDir = resolveDemoDir();
|
|
6380
|
-
const scenarioPath =
|
|
6381
|
-
const goodAgentPath =
|
|
6382
|
-
const badAgentPath =
|
|
7507
|
+
const scenarioPath = resolve12(demoDir, "scenario.md");
|
|
7508
|
+
const goodAgentPath = resolve12(demoDir, "good-agent.mjs");
|
|
7509
|
+
const badAgentPath = resolve12(demoDir, "bad-agent.mjs");
|
|
6383
7510
|
process.stderr.write("\n\x1B[36m\x1B[1marchal demo\x1B[0m \x1B[2m\u2014 same scenario, two agents\x1B[0m\n\n");
|
|
6384
7511
|
process.stderr.write("\x1B[1m\x1B[32m\u25B8 Good agent\x1B[0m \x1B[2m(checks labels, skips keep-open)\x1B[0m\n");
|
|
6385
7512
|
const goodReport = await runScenario({
|
|
@@ -6412,100 +7539,194 @@ function createDemoCommand() {
|
|
|
6412
7539
|
}
|
|
6413
7540
|
|
|
6414
7541
|
// src/commands/login.ts
|
|
6415
|
-
import { Command as
|
|
7542
|
+
import { Command as Command9 } from "commander";
|
|
6416
7543
|
import { exec } from "child_process";
|
|
6417
|
-
import { randomBytes } from "crypto";
|
|
7544
|
+
import { createHash as createHash3, randomBytes } from "crypto";
|
|
6418
7545
|
import { createServer } from "http";
|
|
6419
|
-
|
|
7546
|
+
function normalizeAuthUrl2(value) {
|
|
7547
|
+
const trimmed = value.trim().replace(/\/+$/, "");
|
|
7548
|
+
return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
|
|
7549
|
+
}
|
|
7550
|
+
var AUTH_BASE_URL3 = normalizeAuthUrl2(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
|
|
6420
7551
|
var START_PORT = 51423;
|
|
6421
7552
|
var LOGIN_TIMEOUT_MS = 5 * 60 * 1e3;
|
|
7553
|
+
var TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
|
|
7554
|
+
function escapeHtml(value) {
|
|
7555
|
+
return value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """).replaceAll("'", "'");
|
|
7556
|
+
}
|
|
6422
7557
|
function openBrowser(url) {
|
|
6423
7558
|
const platform = process.platform;
|
|
6424
7559
|
const command = platform === "darwin" ? `open "${url}"` : platform === "win32" ? `start "" "${url}"` : `xdg-open "${url}"`;
|
|
6425
|
-
exec(command, () => {
|
|
7560
|
+
exec(command, (err) => {
|
|
7561
|
+
if (err) {
|
|
7562
|
+
info("Could not open browser automatically.");
|
|
7563
|
+
info(`Please visit the URL above manually to complete login.`);
|
|
7564
|
+
}
|
|
6426
7565
|
});
|
|
6427
7566
|
}
|
|
7567
|
+
function createPkcePair() {
|
|
7568
|
+
const codeVerifier = randomBytes(32).toString("base64url");
|
|
7569
|
+
const codeChallenge = createHash3("sha256").update(codeVerifier).digest("base64url");
|
|
7570
|
+
return { codeVerifier, codeChallenge };
|
|
7571
|
+
}
|
|
7572
|
+
function isPlan2(value) {
|
|
7573
|
+
return value === "free" || value === "pro" || value === "enterprise";
|
|
7574
|
+
}
|
|
7575
|
+
function credentialsFromApiToken(token) {
|
|
7576
|
+
const nowSeconds = Math.floor(Date.now() / 1e3);
|
|
7577
|
+
return {
|
|
7578
|
+
token,
|
|
7579
|
+
refreshToken: "",
|
|
7580
|
+
email: "(from token)",
|
|
7581
|
+
plan: "free",
|
|
7582
|
+
selectedTwins: [],
|
|
7583
|
+
expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
|
|
7584
|
+
};
|
|
7585
|
+
}
|
|
7586
|
+
function credentialsFromLegacyCallback(requestUrl) {
|
|
7587
|
+
const token = requestUrl.searchParams.get("token") ?? requestUrl.searchParams.get("access_token");
|
|
7588
|
+
const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
|
|
7589
|
+
const email = requestUrl.searchParams.get("email");
|
|
7590
|
+
const planParam = requestUrl.searchParams.get("plan");
|
|
7591
|
+
const twins = requestUrl.searchParams.get("twins");
|
|
7592
|
+
if (!token || !email || !isPlan2(planParam)) {
|
|
7593
|
+
return null;
|
|
7594
|
+
}
|
|
7595
|
+
const nowSeconds = Math.floor(Date.now() / 1e3);
|
|
7596
|
+
return {
|
|
7597
|
+
token,
|
|
7598
|
+
refreshToken,
|
|
7599
|
+
email,
|
|
7600
|
+
plan: planParam,
|
|
7601
|
+
selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
|
|
7602
|
+
expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
|
|
7603
|
+
};
|
|
7604
|
+
}
|
|
6428
7605
|
function findFreePort(startPort) {
|
|
6429
|
-
return new Promise((
|
|
7606
|
+
return new Promise((resolve13, reject) => {
|
|
6430
7607
|
const server = createServer();
|
|
6431
7608
|
server.listen(startPort, "127.0.0.1", () => {
|
|
6432
7609
|
const address = server.address();
|
|
6433
7610
|
const port = typeof address === "object" && address ? address.port : startPort;
|
|
6434
|
-
server.close(() =>
|
|
7611
|
+
server.close(() => resolve13(port));
|
|
6435
7612
|
});
|
|
6436
7613
|
server.on("error", () => {
|
|
6437
7614
|
if (startPort < START_PORT + 100) {
|
|
6438
|
-
findFreePort(startPort + 1).then(
|
|
7615
|
+
findFreePort(startPort + 1).then(resolve13).catch(reject);
|
|
6439
7616
|
} else {
|
|
6440
|
-
reject(new Error(
|
|
7617
|
+
reject(new Error(
|
|
7618
|
+
"Could not find a free localhost callback port (tried ports 51423-51523).\nTry closing other services, or use token login: archal login --token <your-token>"
|
|
7619
|
+
));
|
|
6441
7620
|
}
|
|
6442
7621
|
});
|
|
6443
7622
|
});
|
|
6444
7623
|
}
|
|
6445
7624
|
function createLoginCommand() {
|
|
6446
|
-
return new
|
|
7625
|
+
return new Command9("login").description("Log in via archal.ai browser auth").option("--no-browser", "Do not automatically open the login URL in a browser").option("--token <token>", "Use an API key/token directly (CI/service fallback)").action(async (opts) => {
|
|
7626
|
+
const directToken = opts.token?.trim();
|
|
7627
|
+
if (directToken) {
|
|
7628
|
+
let credentials = credentialsFromApiToken(directToken);
|
|
7629
|
+
credentials = await refreshAuthFromServer(credentials);
|
|
7630
|
+
saveCredentials(credentials);
|
|
7631
|
+
success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
|
|
7632
|
+
return;
|
|
7633
|
+
}
|
|
6447
7634
|
const port = await findFreePort(START_PORT);
|
|
6448
7635
|
const state = randomBytes(16).toString("hex");
|
|
6449
7636
|
const redirectUrl = `http://localhost:${port}/callback`;
|
|
6450
|
-
const
|
|
7637
|
+
const { codeVerifier, codeChallenge } = createPkcePair();
|
|
7638
|
+
const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}&code_challenge=${encodeURIComponent(codeChallenge)}&code_challenge_method=S256`;
|
|
6451
7639
|
info("Opening browser for authentication...");
|
|
6452
7640
|
info(`If your browser does not open, visit:
|
|
6453
7641
|
${authUrl}`);
|
|
6454
|
-
|
|
6455
|
-
|
|
6456
|
-
|
|
6457
|
-
|
|
6458
|
-
|
|
6459
|
-
|
|
6460
|
-
|
|
6461
|
-
|
|
6462
|
-
|
|
6463
|
-
|
|
6464
|
-
|
|
6465
|
-
|
|
6466
|
-
|
|
6467
|
-
|
|
6468
|
-
|
|
7642
|
+
if (opts.browser !== false) {
|
|
7643
|
+
openBrowser(authUrl);
|
|
7644
|
+
}
|
|
7645
|
+
await new Promise((resolve13, reject) => {
|
|
7646
|
+
let settled = false;
|
|
7647
|
+
const settleResolve = () => {
|
|
7648
|
+
if (settled) return;
|
|
7649
|
+
settled = true;
|
|
7650
|
+
resolve13();
|
|
7651
|
+
};
|
|
7652
|
+
const settleReject = (error2) => {
|
|
7653
|
+
if (settled) return;
|
|
7654
|
+
settled = true;
|
|
7655
|
+
reject(error2);
|
|
7656
|
+
};
|
|
7657
|
+
function closeAndResolve() {
|
|
7658
|
+
if (!server.listening) {
|
|
7659
|
+
settleResolve();
|
|
6469
7660
|
return;
|
|
6470
7661
|
}
|
|
6471
|
-
|
|
6472
|
-
|
|
6473
|
-
|
|
6474
|
-
|
|
6475
|
-
|
|
6476
|
-
if (!token || !email || !plan) {
|
|
6477
|
-
res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
|
|
6478
|
-
res.end("<h1>Login failed</h1><p>Missing callback parameters.</p>");
|
|
6479
|
-
server.close();
|
|
6480
|
-
reject(new Error("Missing token/email/plan in callback"));
|
|
7662
|
+
server.close(() => settleResolve());
|
|
7663
|
+
}
|
|
7664
|
+
function closeAndReject(error2) {
|
|
7665
|
+
if (!server.listening) {
|
|
7666
|
+
settleReject(error2);
|
|
6481
7667
|
return;
|
|
6482
7668
|
}
|
|
6483
|
-
|
|
6484
|
-
|
|
6485
|
-
|
|
6486
|
-
|
|
6487
|
-
|
|
6488
|
-
|
|
6489
|
-
|
|
6490
|
-
|
|
6491
|
-
|
|
6492
|
-
|
|
6493
|
-
|
|
6494
|
-
|
|
6495
|
-
|
|
6496
|
-
|
|
6497
|
-
|
|
6498
|
-
|
|
6499
|
-
|
|
6500
|
-
|
|
6501
|
-
|
|
7669
|
+
server.close(() => settleReject(error2));
|
|
7670
|
+
}
|
|
7671
|
+
const server = createServer((req, res) => {
|
|
7672
|
+
void (async () => {
|
|
7673
|
+
try {
|
|
7674
|
+
const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
|
|
7675
|
+
if (requestUrl.pathname !== "/callback") {
|
|
7676
|
+
res.writeHead(404);
|
|
7677
|
+
res.end("Not found");
|
|
7678
|
+
return;
|
|
7679
|
+
}
|
|
7680
|
+
const returnedState = requestUrl.searchParams.get("state");
|
|
7681
|
+
if (returnedState !== state) {
|
|
7682
|
+
res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
|
|
7683
|
+
res.end("<h1>Login failed</h1><p>State mismatch.</p>");
|
|
7684
|
+
closeAndReject(new Error("State mismatch in callback"));
|
|
7685
|
+
return;
|
|
7686
|
+
}
|
|
7687
|
+
const code = requestUrl.searchParams.get("code");
|
|
7688
|
+
const credentials = code ? await exchangeCliAuthCode({
|
|
7689
|
+
code,
|
|
7690
|
+
codeVerifier,
|
|
7691
|
+
redirectUri: redirectUrl
|
|
7692
|
+
}) : credentialsFromLegacyCallback(requestUrl);
|
|
7693
|
+
if (!credentials) {
|
|
7694
|
+
res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
|
|
7695
|
+
res.end("<h1>Login failed</h1><p>Missing auth code.</p>");
|
|
7696
|
+
closeAndReject(new Error("Missing code in callback"));
|
|
7697
|
+
return;
|
|
7698
|
+
}
|
|
7699
|
+
saveCredentials(credentials);
|
|
7700
|
+
res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
|
|
7701
|
+
res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
|
|
7702
|
+
success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
|
|
7703
|
+
if (credentials.plan === "free" && credentials.selectedTwins.length === 0) {
|
|
7704
|
+
info(
|
|
7705
|
+
"You haven't selected any twins yet.\n Run `archal twins select` to choose up to 5 twins for your free plan."
|
|
7706
|
+
);
|
|
7707
|
+
}
|
|
7708
|
+
closeAndResolve();
|
|
7709
|
+
} catch (error2) {
|
|
7710
|
+
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
7711
|
+
if (!res.headersSent) {
|
|
7712
|
+
res.writeHead(500, { "content-type": "text/html; charset=utf-8" });
|
|
7713
|
+
res.end(`<h1>Login failed</h1><p>${escapeHtml(message)}</p>`);
|
|
7714
|
+
}
|
|
7715
|
+
closeAndReject(error2);
|
|
7716
|
+
}
|
|
7717
|
+
})().catch((error2) => {
|
|
7718
|
+
closeAndReject(error2);
|
|
7719
|
+
});
|
|
6502
7720
|
});
|
|
6503
|
-
server.listen(port, "127.0.0.1");
|
|
6504
7721
|
const timeout = setTimeout(() => {
|
|
6505
|
-
|
|
6506
|
-
reject(new Error("Login timed out. Run archal login again."));
|
|
7722
|
+
closeAndReject(new Error("Login timed out. Run archal login again."));
|
|
6507
7723
|
}, LOGIN_TIMEOUT_MS);
|
|
6508
7724
|
server.on("close", () => clearTimeout(timeout));
|
|
7725
|
+
server.once("error", (error2) => {
|
|
7726
|
+
clearTimeout(timeout);
|
|
7727
|
+
closeAndReject(error2);
|
|
7728
|
+
});
|
|
7729
|
+
server.listen(port, "127.0.0.1");
|
|
6509
7730
|
}).catch((error2) => {
|
|
6510
7731
|
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
6511
7732
|
error(message);
|
|
@@ -6515,9 +7736,9 @@ function createLoginCommand() {
|
|
|
6515
7736
|
}
|
|
6516
7737
|
|
|
6517
7738
|
// src/commands/logout.ts
|
|
6518
|
-
import { Command as
|
|
7739
|
+
import { Command as Command10 } from "commander";
|
|
6519
7740
|
function createLogoutCommand() {
|
|
6520
|
-
return new
|
|
7741
|
+
return new Command10("logout").description("Log out and remove stored credentials").action(() => {
|
|
6521
7742
|
const creds = getCredentials();
|
|
6522
7743
|
if (!creds) {
|
|
6523
7744
|
info("Not currently logged in.");
|
|
@@ -6535,7 +7756,7 @@ function createLogoutCommand() {
|
|
|
6535
7756
|
}
|
|
6536
7757
|
|
|
6537
7758
|
// src/commands/whoami.ts
|
|
6538
|
-
import { Command as
|
|
7759
|
+
import { Command as Command11 } from "commander";
|
|
6539
7760
|
var RESET2 = "\x1B[0m";
|
|
6540
7761
|
var BOLD2 = "\x1B[1m";
|
|
6541
7762
|
var DIM2 = "\x1B[2m";
|
|
@@ -6543,11 +7764,12 @@ var CYAN2 = "\x1B[36m";
|
|
|
6543
7764
|
var GREEN2 = "\x1B[32m";
|
|
6544
7765
|
var YELLOW2 = "\x1B[33m";
|
|
6545
7766
|
function createWhoamiCommand() {
|
|
6546
|
-
return new
|
|
6547
|
-
let current =
|
|
6548
|
-
|
|
6549
|
-
|
|
6550
|
-
|
|
7767
|
+
return new Command11("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
|
|
7768
|
+
let current = getCredentials();
|
|
7769
|
+
if (!current) {
|
|
7770
|
+
info("Not logged in. Run: archal login");
|
|
7771
|
+
return;
|
|
7772
|
+
}
|
|
6551
7773
|
if (opts.refresh) {
|
|
6552
7774
|
current = await refreshAuthFromServer(current);
|
|
6553
7775
|
saveCredentials(current);
|
|
@@ -6611,7 +7833,7 @@ function planBadge(plan) {
|
|
|
6611
7833
|
}
|
|
6612
7834
|
|
|
6613
7835
|
// src/commands/upgrade.ts
|
|
6614
|
-
import { Command as
|
|
7836
|
+
import { Command as Command12 } from "commander";
|
|
6615
7837
|
import { exec as exec2 } from "child_process";
|
|
6616
7838
|
var BILLING_URL = "https://archal.ai/dashboard/billing";
|
|
6617
7839
|
function openBrowser2(url) {
|
|
@@ -6621,7 +7843,7 @@ function openBrowser2(url) {
|
|
|
6621
7843
|
});
|
|
6622
7844
|
}
|
|
6623
7845
|
function createUpgradeCommand() {
|
|
6624
|
-
return new
|
|
7846
|
+
return new Command12("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
|
|
6625
7847
|
const creds = getCredentials();
|
|
6626
7848
|
if (creds?.plan === "enterprise") {
|
|
6627
7849
|
info("You are already on the enterprise plan.");
|
|
@@ -6640,7 +7862,7 @@ function createUpgradeCommand() {
|
|
|
6640
7862
|
}
|
|
6641
7863
|
|
|
6642
7864
|
// src/commands/help.ts
|
|
6643
|
-
import { Command as
|
|
7865
|
+
import { Command as Command13 } from "commander";
|
|
6644
7866
|
var RESET3 = "\x1B[0m";
|
|
6645
7867
|
var BOLD3 = "\x1B[1m";
|
|
6646
7868
|
var DIM3 = "\x1B[2m";
|
|
@@ -6668,15 +7890,7 @@ var COMMAND_GROUPS = [
|
|
|
6668
7890
|
]
|
|
6669
7891
|
},
|
|
6670
7892
|
{
|
|
6671
|
-
heading: "
|
|
6672
|
-
commands: [
|
|
6673
|
-
{ name: "twin start <name>", description: "Start a local twin process (debug/local only)" },
|
|
6674
|
-
{ name: "twin stop <name>", description: "Stop a running local twin process" },
|
|
6675
|
-
{ name: "twin status", description: "Show running local twin processes" }
|
|
6676
|
-
]
|
|
6677
|
-
},
|
|
6678
|
-
{
|
|
6679
|
-
heading: "Twin Catalog",
|
|
7893
|
+
heading: "Twins",
|
|
6680
7894
|
commands: [
|
|
6681
7895
|
{ name: "twins list", description: "List available twins and entitlement status" },
|
|
6682
7896
|
{ name: "twins select", description: "Choose which twins to use on your free plan" }
|
|
@@ -6700,7 +7914,7 @@ var COMMAND_GROUPS = [
|
|
|
6700
7914
|
];
|
|
6701
7915
|
function showHelp() {
|
|
6702
7916
|
process.stderr.write(`
|
|
6703
|
-
${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}
|
|
7917
|
+
${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v${CLI_VERSION}${RESET3}
|
|
6704
7918
|
`);
|
|
6705
7919
|
process.stderr.write(`${DIM3}The QA layer for the software factory era${RESET3}
|
|
6706
7920
|
|
|
@@ -6722,21 +7936,21 @@ ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v0.1.0${RESET3}
|
|
|
6722
7936
|
`);
|
|
6723
7937
|
}
|
|
6724
7938
|
function createHelpCommand() {
|
|
6725
|
-
return new
|
|
7939
|
+
return new Command13("help").description("Show all available commands").action(() => {
|
|
6726
7940
|
showHelp();
|
|
6727
7941
|
});
|
|
6728
7942
|
}
|
|
6729
7943
|
|
|
6730
7944
|
// src/commands/setup.ts
|
|
6731
|
-
import { Command as
|
|
6732
|
-
import { existsSync as
|
|
7945
|
+
import { Command as Command14 } from "commander";
|
|
7946
|
+
import { existsSync as existsSync17 } from "fs";
|
|
6733
7947
|
var RESET4 = "\x1B[0m";
|
|
6734
7948
|
var BOLD4 = "\x1B[1m";
|
|
6735
7949
|
var DIM4 = "\x1B[2m";
|
|
6736
7950
|
var CYAN4 = "\x1B[36m";
|
|
6737
7951
|
var GREEN3 = "\x1B[32m";
|
|
6738
7952
|
function createSetupCommand() {
|
|
6739
|
-
return new
|
|
7953
|
+
return new Command14("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
|
|
6740
7954
|
process.stderr.write(`
|
|
6741
7955
|
${CYAN4}${BOLD4}Archal Setup${RESET4}
|
|
6742
7956
|
`);
|
|
@@ -6758,7 +7972,7 @@ ${CYAN4}${BOLD4}Archal Setup${RESET4}
|
|
|
6758
7972
|
${BOLD4}Step 2: Configuration${RESET4}
|
|
6759
7973
|
`);
|
|
6760
7974
|
const configPath = getConfigPath();
|
|
6761
|
-
if (
|
|
7975
|
+
if (existsSync17(configPath)) {
|
|
6762
7976
|
success(`Config file exists: ${configPath}`);
|
|
6763
7977
|
} else {
|
|
6764
7978
|
const create = await askConfirm("Create a default config file?");
|
|
@@ -6823,7 +8037,7 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
|
|
|
6823
8037
|
`);
|
|
6824
8038
|
process.stderr.write(` ${CYAN4}archal scenario create my-first-test${RESET4} ${DIM4}Create a scenario${RESET4}
|
|
6825
8039
|
`);
|
|
6826
|
-
process.stderr.write(` ${CYAN4}archal run scenario.md --
|
|
8040
|
+
process.stderr.write(` ${CYAN4}archal run scenario.md --engine-endpoint "..." --engine-model "..."${RESET4} ${DIM4}Run a scenario${RESET4}
|
|
6827
8041
|
`);
|
|
6828
8042
|
process.stderr.write(` ${CYAN4}archal help${RESET4} ${DIM4}See all commands${RESET4}
|
|
6829
8043
|
|
|
@@ -6832,8 +8046,8 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
|
|
|
6832
8046
|
}
|
|
6833
8047
|
|
|
6834
8048
|
// src/index.ts
|
|
6835
|
-
var program = new
|
|
6836
|
-
program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version(
|
|
8049
|
+
var program = new Command15();
|
|
8050
|
+
program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version(CLI_VERSION).option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
|
|
6837
8051
|
const opts = program.opts();
|
|
6838
8052
|
if (opts.quiet) {
|
|
6839
8053
|
configureLogger({ quiet: true });
|
|
@@ -6848,7 +8062,6 @@ program.addCommand(createWhoamiCommand());
|
|
|
6848
8062
|
program.addCommand(createSetupCommand());
|
|
6849
8063
|
program.addCommand(createRunCommand());
|
|
6850
8064
|
program.addCommand(createInitCommand());
|
|
6851
|
-
program.addCommand(createTwinCommand());
|
|
6852
8065
|
program.addCommand(createTwinsCommand());
|
|
6853
8066
|
program.addCommand(createScenarioCommand());
|
|
6854
8067
|
program.addCommand(createTraceCommand());
|
|
@@ -6864,6 +8077,14 @@ program.action(() => {
|
|
|
6864
8077
|
process.stderr.write("\x1B[33mNot logged in.\x1B[0m Get started with: \x1B[36marchal login\x1B[0m\n\n");
|
|
6865
8078
|
}
|
|
6866
8079
|
});
|
|
8080
|
+
function handleShutdown(signal) {
|
|
8081
|
+
process.stderr.write(`
|
|
8082
|
+
Received ${signal}, shutting down...
|
|
8083
|
+
`);
|
|
8084
|
+
process.exit(128 + (signal === "SIGINT" ? 2 : 15));
|
|
8085
|
+
}
|
|
8086
|
+
process.on("SIGINT", () => handleShutdown("SIGINT"));
|
|
8087
|
+
process.on("SIGTERM", () => handleShutdown("SIGTERM"));
|
|
6867
8088
|
program.parseAsync(process.argv).catch((err) => {
|
|
6868
8089
|
const message = err instanceof Error ? err.message : String(err);
|
|
6869
8090
|
process.stderr.write(`Error: ${message}
|