@archal/cli 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -11
- package/dist/api-client-D7SCA64V.js +23 -0
- package/dist/api-client-DI7R3H4C.js +21 -0
- package/dist/api-client-EMMBIJU7.js +23 -0
- package/dist/api-client-VYQMFDLN.js +23 -0
- package/dist/api-client-WN45C63M.js +23 -0
- package/dist/api-client-ZOCVG6CC.js +21 -0
- package/dist/api-client-ZUMDL3TP.js +23 -0
- package/dist/chunk-3EH6CG2H.js +561 -0
- package/dist/chunk-3RG5ZIWI.js +10 -0
- package/dist/chunk-4FTU232H.js +191 -0
- package/dist/chunk-4LM2CKUI.js +561 -0
- package/dist/chunk-A6WOU5RO.js +214 -0
- package/dist/chunk-AXLDC4PC.js +561 -0
- package/dist/chunk-NZEPQ6IZ.js +83 -0
- package/dist/chunk-PGMDLZW5.js +561 -0
- package/dist/chunk-SVGN2AFT.js +148 -0
- package/dist/chunk-UOJHYCMX.js +144 -0
- package/dist/chunk-VYCADG5E.js +189 -0
- package/dist/chunk-WZXES7XO.js +136 -0
- package/dist/chunk-XJOKVFOL.js +561 -0
- package/dist/chunk-XSO7ETSM.js +561 -0
- package/dist/chunk-YDGWON57.js +561 -0
- package/dist/index.js +1900 -647
- package/dist/login-4RNNR4YA.js +7 -0
- package/dist/login-CQ2DRBRU.js +7 -0
- package/dist/login-LOTTPY7G.js +7 -0
- package/dist/login-MBCG3N5P.js +7 -0
- package/dist/login-MP6YLOEA.js +7 -0
- package/dist/login-SGLSVIZZ.js +7 -0
- package/dist/login-TFBKIZ7I.js +7 -0
- package/package.json +4 -5
package/dist/index.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/index.ts
|
|
4
|
-
import { Command as
|
|
4
|
+
import { Command as Command15 } from "commander";
|
|
5
5
|
|
|
6
6
|
// src/commands/run.ts
|
|
7
7
|
import { Command as Command3 } from "commander";
|
|
8
|
-
import { existsSync as
|
|
9
|
-
import { dirname as dirname3, resolve as
|
|
8
|
+
import { existsSync as existsSync12, mkdirSync as mkdirSync5, unlinkSync as unlinkSync7, writeFileSync as writeFileSync9 } from "fs";
|
|
9
|
+
import { dirname as dirname3, resolve as resolve8 } from "path";
|
|
10
10
|
|
|
11
11
|
// src/runner/orchestrator.ts
|
|
12
|
-
import { existsSync as
|
|
13
|
-
import { resolve as
|
|
12
|
+
import { existsSync as existsSync11, renameSync as renameSync2, unlinkSync as unlinkSync6, writeFileSync as writeFileSync8 } from "fs";
|
|
13
|
+
import { resolve as resolve7, dirname as dirname2, join as join8 } from "path";
|
|
14
14
|
import { tmpdir as tmpdir3 } from "os";
|
|
15
15
|
|
|
16
16
|
// src/runner/scenario-parser.ts
|
|
@@ -276,10 +276,10 @@ function inferTwinsFromContent(setup, expectedBehavior) {
|
|
|
276
276
|
${expectedBehavior}`.toLowerCase();
|
|
277
277
|
const twins = [];
|
|
278
278
|
const twinKeywords = {
|
|
279
|
-
github: ["github", "repository", "
|
|
280
|
-
slack: ["slack", "channel", "
|
|
281
|
-
linear: ["linear", "ticket", "project", "cycle"
|
|
282
|
-
jira: ["jira", "sprint", "epic", "
|
|
279
|
+
github: ["github", "repository", "pull request", "create_issue", "create_pull_request", "merge_pull_request"],
|
|
280
|
+
slack: ["slack", "slack channel", "send_message", "slack message", "direct message"],
|
|
281
|
+
linear: ["linear", "linear ticket", "linear project", "linear cycle"],
|
|
282
|
+
jira: ["jira", "jira sprint", "jira epic", "jira board"]
|
|
283
283
|
};
|
|
284
284
|
for (const [twin, keywords] of Object.entries(twinKeywords)) {
|
|
285
285
|
if (keywords.some((kw) => combined.includes(kw))) {
|
|
@@ -442,6 +442,19 @@ var GITHUB_SEED_MAPPINGS = [
|
|
|
442
442
|
],
|
|
443
443
|
seedName: "large-backlog",
|
|
444
444
|
weight: 2
|
|
445
|
+
},
|
|
446
|
+
{
|
|
447
|
+
keywords: [
|
|
448
|
+
"triage",
|
|
449
|
+
"unlabeled",
|
|
450
|
+
"no labels",
|
|
451
|
+
"categorize",
|
|
452
|
+
"classify",
|
|
453
|
+
"label",
|
|
454
|
+
"none of them have labels"
|
|
455
|
+
],
|
|
456
|
+
seedName: "triage-unlabeled",
|
|
457
|
+
weight: 2
|
|
445
458
|
}
|
|
446
459
|
];
|
|
447
460
|
var SLACK_SEED_MAPPINGS = [
|
|
@@ -450,34 +463,47 @@ var SLACK_SEED_MAPPINGS = [
|
|
|
450
463
|
seedName: "empty",
|
|
451
464
|
weight: 1
|
|
452
465
|
},
|
|
453
|
-
{
|
|
454
|
-
keywords: ["small team", "few channels", "simple", "basic", "starter"],
|
|
455
|
-
seedName: "small-team",
|
|
456
|
-
weight: 1
|
|
457
|
-
},
|
|
458
466
|
{
|
|
459
467
|
keywords: [
|
|
460
468
|
"engineering",
|
|
461
469
|
"development",
|
|
462
470
|
"engineering team",
|
|
463
471
|
"developers",
|
|
464
|
-
"incidents",
|
|
465
|
-
"on-call",
|
|
466
472
|
"sprints",
|
|
467
|
-
"standups"
|
|
473
|
+
"standups",
|
|
474
|
+
"hr",
|
|
475
|
+
"confidential",
|
|
476
|
+
"salary"
|
|
468
477
|
],
|
|
469
478
|
seedName: "engineering-team",
|
|
470
479
|
weight: 1
|
|
471
480
|
},
|
|
472
481
|
{
|
|
473
|
-
keywords: [
|
|
474
|
-
|
|
482
|
+
keywords: [
|
|
483
|
+
"support",
|
|
484
|
+
"customer",
|
|
485
|
+
"tickets",
|
|
486
|
+
"help desk",
|
|
487
|
+
"routing",
|
|
488
|
+
"busy",
|
|
489
|
+
"high volume",
|
|
490
|
+
"many messages",
|
|
491
|
+
"active",
|
|
492
|
+
"noisy",
|
|
493
|
+
"general",
|
|
494
|
+
"workspace",
|
|
495
|
+
"members",
|
|
496
|
+
"finance",
|
|
497
|
+
"ceo",
|
|
498
|
+
"fraud"
|
|
499
|
+
],
|
|
500
|
+
seedName: "busy-workspace",
|
|
475
501
|
weight: 1
|
|
476
502
|
},
|
|
477
503
|
{
|
|
478
|
-
keywords: ["
|
|
479
|
-
seedName: "
|
|
480
|
-
weight:
|
|
504
|
+
keywords: ["incident", "on-call", "alert", "outage", "escalat", "sev1", "sev2"],
|
|
505
|
+
seedName: "incident-active",
|
|
506
|
+
weight: 2
|
|
481
507
|
}
|
|
482
508
|
];
|
|
483
509
|
var LINEAR_SEED_MAPPINGS = [
|
|
@@ -507,14 +533,59 @@ var LINEAR_SEED_MAPPINGS = [
|
|
|
507
533
|
weight: 1
|
|
508
534
|
}
|
|
509
535
|
];
|
|
536
|
+
var STRIPE_SEED_MAPPINGS = [
|
|
537
|
+
{
|
|
538
|
+
keywords: ["empty", "blank", "new", "fresh", "clean", "no customers"],
|
|
539
|
+
seedName: "empty",
|
|
540
|
+
weight: 1
|
|
541
|
+
},
|
|
542
|
+
{
|
|
543
|
+
keywords: [
|
|
544
|
+
"small business",
|
|
545
|
+
"few customers",
|
|
546
|
+
"simple",
|
|
547
|
+
"basic",
|
|
548
|
+
"starter",
|
|
549
|
+
"payment",
|
|
550
|
+
"charge",
|
|
551
|
+
"wire",
|
|
552
|
+
"transfer",
|
|
553
|
+
"balance",
|
|
554
|
+
"vendor",
|
|
555
|
+
"invoice",
|
|
556
|
+
"ceo",
|
|
557
|
+
"fraud",
|
|
558
|
+
"financial"
|
|
559
|
+
],
|
|
560
|
+
seedName: "small-business",
|
|
561
|
+
weight: 1
|
|
562
|
+
},
|
|
563
|
+
{
|
|
564
|
+
keywords: [
|
|
565
|
+
"subscription",
|
|
566
|
+
"recurring",
|
|
567
|
+
"saas",
|
|
568
|
+
"monthly",
|
|
569
|
+
"annual",
|
|
570
|
+
"plan",
|
|
571
|
+
"pricing",
|
|
572
|
+
"trial",
|
|
573
|
+
"cancel"
|
|
574
|
+
],
|
|
575
|
+
seedName: "subscription-heavy",
|
|
576
|
+
weight: 2
|
|
577
|
+
}
|
|
578
|
+
];
|
|
510
579
|
var TWIN_SEED_REGISTRY = {
|
|
511
580
|
github: GITHUB_SEED_MAPPINGS,
|
|
512
581
|
slack: SLACK_SEED_MAPPINGS,
|
|
582
|
+
stripe: STRIPE_SEED_MAPPINGS,
|
|
513
583
|
linear: LINEAR_SEED_MAPPINGS
|
|
514
584
|
};
|
|
515
585
|
var DEFAULT_SEEDS = {
|
|
516
586
|
github: "small-project",
|
|
517
|
-
slack: "
|
|
587
|
+
slack: "engineering-team",
|
|
588
|
+
stripe: "small-business",
|
|
518
589
|
linear: "small-team"
|
|
519
590
|
};
|
|
520
591
|
function normalizeText(text) {
|
|
@@ -612,7 +683,27 @@ import { spawn } from "child_process";
|
|
|
612
683
|
function buildSanitizedSpawnEnv(explicitEnv) {
|
|
613
684
|
const sanitized = {};
|
|
614
685
|
const tempVarKey = process.platform === "win32" ? "TEMP" : "TMPDIR";
|
|
615
|
-
const passthroughKeys = [
|
|
686
|
+
const passthroughKeys = [
|
|
687
|
+
"PATH",
|
|
688
|
+
"HOME",
|
|
689
|
+
"USER",
|
|
690
|
+
"SHELL",
|
|
691
|
+
tempVarKey,
|
|
692
|
+
"NODE_ENV",
|
|
693
|
+
// Proxy vars — critical for corporate environments
|
|
694
|
+
"HTTP_PROXY",
|
|
695
|
+
"HTTPS_PROXY",
|
|
696
|
+
"NO_PROXY",
|
|
697
|
+
"http_proxy",
|
|
698
|
+
"https_proxy",
|
|
699
|
+
"no_proxy",
|
|
700
|
+
// API keys needed by local engine harness agents
|
|
701
|
+
"ANTHROPIC_API_KEY",
|
|
702
|
+
"OPENAI_API_KEY",
|
|
703
|
+
"GEMINI_API_KEY",
|
|
704
|
+
// Windows-specific
|
|
705
|
+
...process.platform === "win32" ? ["USERPROFILE", "APPDATA", "LOCALAPPDATA", "SystemRoot", "COMSPEC", "TMP"] : []
|
|
706
|
+
];
|
|
616
707
|
for (const key of passthroughKeys) {
|
|
617
708
|
const value = process.env[key];
|
|
618
709
|
if (typeof value === "string" && value.length > 0) {
|
|
@@ -640,7 +731,7 @@ function spawnWithTimeout(options) {
|
|
|
640
731
|
onStdout,
|
|
641
732
|
onStderr
|
|
642
733
|
} = options;
|
|
643
|
-
return new Promise((
|
|
734
|
+
return new Promise((resolve13, reject) => {
|
|
644
735
|
const startTime = Date.now();
|
|
645
736
|
let timedOut = false;
|
|
646
737
|
let stdoutBuf = "";
|
|
@@ -696,7 +787,7 @@ function spawnWithTimeout(options) {
|
|
|
696
787
|
clearTimeout(timer);
|
|
697
788
|
const durationMs = Date.now() - startTime;
|
|
698
789
|
debug("Process exited", { command, exitCode, durationMs, timedOut });
|
|
699
|
-
|
|
790
|
+
resolve13({
|
|
700
791
|
exitCode,
|
|
701
792
|
stdout: stdoutBuf,
|
|
702
793
|
stderr: stderrBuf,
|
|
@@ -721,9 +812,9 @@ function spawnMcpStdioProcess(options) {
|
|
|
721
812
|
return child;
|
|
722
813
|
}
|
|
723
814
|
function killProcess(child, gracePeriodMs = 5e3) {
|
|
724
|
-
return new Promise((
|
|
815
|
+
return new Promise((resolve13) => {
|
|
725
816
|
if (child.killed || child.exitCode !== null) {
|
|
726
|
-
|
|
817
|
+
resolve13();
|
|
727
818
|
return;
|
|
728
819
|
}
|
|
729
820
|
child.kill("SIGTERM");
|
|
@@ -734,7 +825,7 @@ function killProcess(child, gracePeriodMs = 5e3) {
|
|
|
734
825
|
}, gracePeriodMs);
|
|
735
826
|
child.on("close", () => {
|
|
736
827
|
clearTimeout(forceKillTimer);
|
|
737
|
-
|
|
828
|
+
resolve13();
|
|
738
829
|
});
|
|
739
830
|
});
|
|
740
831
|
}
|
|
@@ -768,6 +859,20 @@ function generateTaskFromScenario(scenario, apiRouting) {
|
|
|
768
859
|
}
|
|
769
860
|
lines.push("");
|
|
770
861
|
}
|
|
862
|
+
if (apiRouting?.adminToken) {
|
|
863
|
+
lines.push("Authentication:");
|
|
864
|
+
lines.push("Include these headers with every request to the base URLs above:");
|
|
865
|
+
lines.push(` x-archal-admin-token: ${apiRouting.adminToken}`);
|
|
866
|
+
if (apiRouting.adminUserId) {
|
|
867
|
+
lines.push(` x-archal-user-id: ${apiRouting.adminUserId}`);
|
|
868
|
+
}
|
|
869
|
+
lines.push("");
|
|
870
|
+
} else if (apiRouting?.bearerToken) {
|
|
871
|
+
lines.push("Authentication:");
|
|
872
|
+
lines.push("Include this header with every request to the base URLs above:");
|
|
873
|
+
lines.push(` Authorization: Bearer ${apiRouting.bearerToken}`);
|
|
874
|
+
lines.push("");
|
|
875
|
+
}
|
|
771
876
|
if (hasProxy && apiRouting?.proxyUrl) {
|
|
772
877
|
lines.push(`Proxy URL: ${apiRouting.proxyUrl}`);
|
|
773
878
|
lines.push("");
|
|
@@ -812,14 +917,6 @@ function resolveResponsesUrl(rawUrl) {
|
|
|
812
917
|
}
|
|
813
918
|
return url.toString();
|
|
814
919
|
}
|
|
815
|
-
function toMcpUrl(rawUrl) {
|
|
816
|
-
const url = new URL(rawUrl);
|
|
817
|
-
const path = url.pathname.replace(/\/+$/, "");
|
|
818
|
-
if (!path.endsWith("/mcp")) {
|
|
819
|
-
url.pathname = `${path || ""}/mcp`;
|
|
820
|
-
}
|
|
821
|
-
return url.toString();
|
|
822
|
-
}
|
|
823
920
|
function collectResponseText(response) {
|
|
824
921
|
if (!response.output || response.output.length === 0) return "";
|
|
825
922
|
const chunks = [];
|
|
@@ -838,7 +935,7 @@ function collectResponseText(response) {
|
|
|
838
935
|
}
|
|
839
936
|
return chunks.join("\n").trim();
|
|
840
937
|
}
|
|
841
|
-
function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting
|
|
938
|
+
function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, model, apiRouting) {
|
|
842
939
|
const metadata = {
|
|
843
940
|
run_id: runId,
|
|
844
941
|
scenario_title: scenario.title,
|
|
@@ -851,40 +948,11 @@ function buildOpenClawResponsesRequest(scenario, runId, taskMessage, twinUrls, m
|
|
|
851
948
|
if (apiRouting?.proxyUrl) {
|
|
852
949
|
metadata["archal_api_proxy_url"] = apiRouting.proxyUrl;
|
|
853
950
|
}
|
|
854
|
-
|
|
855
|
-
type: "mcp",
|
|
856
|
-
server_label: name,
|
|
857
|
-
server_url: toMcpUrl(url),
|
|
858
|
-
require_approval: "never"
|
|
859
|
-
}));
|
|
860
|
-
const request2 = {
|
|
951
|
+
return {
|
|
861
952
|
model,
|
|
862
953
|
input: taskMessage,
|
|
863
954
|
metadata
|
|
864
955
|
};
|
|
865
|
-
if (mcpField === "both") {
|
|
866
|
-
request2.tools = mcpTools;
|
|
867
|
-
request2.mcp_servers = mcpTools;
|
|
868
|
-
return request2;
|
|
869
|
-
}
|
|
870
|
-
request2[mcpField] = mcpTools;
|
|
871
|
-
return request2;
|
|
872
|
-
}
|
|
873
|
-
function shouldRetryWithAlternateMcpField(status, rawBody, attemptedField) {
|
|
874
|
-
if (status !== 400) return false;
|
|
875
|
-
const pattern = new RegExp(`Unrecognized key:\\s*"?${attemptedField}"?`, "i");
|
|
876
|
-
try {
|
|
877
|
-
const parsed = JSON.parse(rawBody);
|
|
878
|
-
if (typeof parsed.error?.message === "string") {
|
|
879
|
-
return pattern.test(parsed.error.message);
|
|
880
|
-
}
|
|
881
|
-
} catch {
|
|
882
|
-
}
|
|
883
|
-
return pattern.test(rawBody);
|
|
884
|
-
}
|
|
885
|
-
function resolvePreferredMcpField() {
|
|
886
|
-
const configured = (process.env["ARCHAL_OPENCLAW_MCP_FIELD"] ?? process.env["OPENCLAW_MCP_FIELD"] ?? "tools").trim().toLowerCase();
|
|
887
|
-
return configured === "mcp_servers" ? "mcp_servers" : "tools";
|
|
888
956
|
}
|
|
889
957
|
function extractOpenClawResponseText(response) {
|
|
890
958
|
return collectResponseText(response);
|
|
@@ -927,15 +995,13 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
|
|
|
927
995
|
const timer = setTimeout(() => controller.abort(), remoteConfig.timeoutMs);
|
|
928
996
|
try {
|
|
929
997
|
responsesUrl = resolveResponsesUrl(remoteConfig.url);
|
|
930
|
-
|
|
931
|
-
let requestBody = buildOpenClawResponsesRequest(
|
|
998
|
+
const requestBody = buildOpenClawResponsesRequest(
|
|
932
999
|
scenario,
|
|
933
1000
|
runId,
|
|
934
1001
|
taskMessage,
|
|
935
1002
|
twinUrls,
|
|
936
1003
|
remoteConfig.model,
|
|
937
|
-
apiRouting
|
|
938
|
-
mcpField
|
|
1004
|
+
apiRouting
|
|
939
1005
|
);
|
|
940
1006
|
const headers = {
|
|
941
1007
|
"Content-Type": "application/json"
|
|
@@ -943,36 +1009,32 @@ async function executeOpenClawRemote(remoteConfig, scenario, runId, taskMessage,
|
|
|
943
1009
|
if (remoteConfig.token) {
|
|
944
1010
|
headers["Authorization"] = `Bearer ${remoteConfig.token}`;
|
|
945
1011
|
}
|
|
1012
|
+
if (remoteConfig.agentId) {
|
|
1013
|
+
headers["x-openclaw-agent-id"] = remoteConfig.agentId;
|
|
1014
|
+
}
|
|
946
1015
|
info("Executing remote OpenClaw agent", {
|
|
947
1016
|
url: responsesUrl,
|
|
948
|
-
timeout: `${remoteConfig.timeoutMs}ms
|
|
1017
|
+
timeout: `${remoteConfig.timeoutMs}ms`,
|
|
1018
|
+
...remoteConfig.agentId ? { agentId: remoteConfig.agentId } : {}
|
|
1019
|
+
});
|
|
1020
|
+
debug("Task message being sent to OpenClaw:", {
|
|
1021
|
+
taskMessage: taskMessage.replace(/x-archal-admin-token:\s*\S+/gi, "x-archal-admin-token: [REDACTED]").replace(/Authorization:\s*Bearer\s+\S+/gi, "Authorization: Bearer [REDACTED]").slice(0, 2e3)
|
|
1022
|
+
});
|
|
1023
|
+
debug("Twin URLs:", { twinUrls: JSON.stringify(twinUrls) });
|
|
1024
|
+
debug("API routing:", {
|
|
1025
|
+
apiRouting: JSON.stringify({
|
|
1026
|
+
...apiRouting,
|
|
1027
|
+
bearerToken: apiRouting?.bearerToken ? "[REDACTED]" : void 0,
|
|
1028
|
+
adminToken: apiRouting?.adminToken ? "[REDACTED]" : void 0
|
|
1029
|
+
})
|
|
949
1030
|
});
|
|
950
|
-
|
|
1031
|
+
const response = await fetch(responsesUrl, {
|
|
951
1032
|
method: "POST",
|
|
952
1033
|
headers,
|
|
953
1034
|
body: JSON.stringify(requestBody),
|
|
954
1035
|
signal: controller.signal
|
|
955
1036
|
});
|
|
956
|
-
|
|
957
|
-
if (!response.ok && shouldRetryWithAlternateMcpField(response.status, rawBody, mcpField)) {
|
|
958
|
-
mcpField = mcpField === "tools" ? "mcp_servers" : "tools";
|
|
959
|
-
requestBody = buildOpenClawResponsesRequest(
|
|
960
|
-
scenario,
|
|
961
|
-
runId,
|
|
962
|
-
taskMessage,
|
|
963
|
-
twinUrls,
|
|
964
|
-
remoteConfig.model,
|
|
965
|
-
apiRouting,
|
|
966
|
-
mcpField
|
|
967
|
-
);
|
|
968
|
-
response = await fetch(responsesUrl, {
|
|
969
|
-
method: "POST",
|
|
970
|
-
headers,
|
|
971
|
-
body: JSON.stringify(requestBody),
|
|
972
|
-
signal: controller.signal
|
|
973
|
-
});
|
|
974
|
-
rawBody = await response.text();
|
|
975
|
-
}
|
|
1037
|
+
const rawBody = await response.text();
|
|
976
1038
|
if (!response.ok) {
|
|
977
1039
|
const statusLine = `${response.status} ${response.statusText}`.trim();
|
|
978
1040
|
return {
|
|
@@ -1155,7 +1217,7 @@ function writeMcpConfig(twinConfigs, runId) {
|
|
|
1155
1217
|
return { configPath, twinPaths };
|
|
1156
1218
|
}
|
|
1157
1219
|
function waitForPortOutput(child, timeoutMs = 15e3) {
|
|
1158
|
-
return new Promise((
|
|
1220
|
+
return new Promise((resolve13, reject) => {
|
|
1159
1221
|
const timer = setTimeout(() => {
|
|
1160
1222
|
reject(new Error("Timed out waiting for twin REST port"));
|
|
1161
1223
|
}, timeoutMs);
|
|
@@ -1165,7 +1227,7 @@ function waitForPortOutput(child, timeoutMs = 15e3) {
|
|
|
1165
1227
|
const match = /listening on http:\/\/(?:localhost|127\.0\.0\.1):(\d+)/.exec(stderrBuf);
|
|
1166
1228
|
if (match) {
|
|
1167
1229
|
clearTimeout(timer);
|
|
1168
|
-
|
|
1230
|
+
resolve13(parseInt(match[1], 10));
|
|
1169
1231
|
}
|
|
1170
1232
|
});
|
|
1171
1233
|
child.on("exit", (code) => {
|
|
@@ -1323,11 +1385,16 @@ function collectTraceFromFiles(twinPaths) {
|
|
|
1323
1385
|
return allTraces;
|
|
1324
1386
|
}
|
|
1325
1387
|
var HTTP_COLLECT_TIMEOUT_MS = 5e3;
|
|
1326
|
-
|
|
1388
|
+
function twinBasePath(url) {
|
|
1389
|
+
return url.replace(/\/(mcp|api)\/?$/, "");
|
|
1390
|
+
}
|
|
1391
|
+
async function collectStateFromHttp(twinUrls, bearerToken, adminAuth) {
|
|
1327
1392
|
const state = {};
|
|
1393
|
+
const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
|
|
1328
1394
|
for (const [name, baseUrl] of Object.entries(twinUrls)) {
|
|
1329
1395
|
try {
|
|
1330
|
-
const response = await fetch(`${baseUrl
|
|
1396
|
+
const response = await fetch(`${twinBasePath(baseUrl)}/state`, {
|
|
1397
|
+
headers,
|
|
1331
1398
|
signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
|
|
1332
1399
|
});
|
|
1333
1400
|
if (response.ok) {
|
|
@@ -1344,11 +1411,40 @@ async function collectStateFromHttp(twinUrls) {
|
|
|
1344
1411
|
}
|
|
1345
1412
|
return state;
|
|
1346
1413
|
}
|
|
1347
|
-
|
|
1414
|
+
var HTTP_PUSH_TIMEOUT_MS = 1e4;
|
|
1415
|
+
async function pushStateToCloud(twinUrls, seedSelections, bearerToken, adminAuth) {
|
|
1416
|
+
const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {}, "Content-Type": "application/json" } : bearerToken ? { "Authorization": `Bearer ${bearerToken}`, "Content-Type": "application/json" } : { "Content-Type": "application/json" };
|
|
1417
|
+
for (const sel of seedSelections) {
|
|
1418
|
+
if (!sel.seedData) continue;
|
|
1419
|
+
const baseUrl = twinUrls[sel.twinName];
|
|
1420
|
+
if (!baseUrl) {
|
|
1421
|
+
warn(`No cloud URL for twin "${sel.twinName}", skipping state push`);
|
|
1422
|
+
continue;
|
|
1423
|
+
}
|
|
1424
|
+
const url = `${twinBasePath(baseUrl)}/state`;
|
|
1425
|
+
debug(`Pushing dynamic seed to ${sel.twinName}`, { url });
|
|
1426
|
+
const response = await fetch(url, {
|
|
1427
|
+
method: "PUT",
|
|
1428
|
+
headers,
|
|
1429
|
+
body: JSON.stringify(sel.seedData),
|
|
1430
|
+
signal: AbortSignal.timeout(HTTP_PUSH_TIMEOUT_MS)
|
|
1431
|
+
});
|
|
1432
|
+
if (!response.ok) {
|
|
1433
|
+
const text = await response.text().catch(() => "");
|
|
1434
|
+
throw new Error(
|
|
1435
|
+
`Failed to push dynamic seed to twin "${sel.twinName}": HTTP ${response.status}${text ? ` \u2014 ${text}` : ""}`
|
|
1436
|
+
);
|
|
1437
|
+
}
|
|
1438
|
+
debug(`Pushed dynamic seed to ${sel.twinName} successfully`);
|
|
1439
|
+
}
|
|
1440
|
+
}
|
|
1441
|
+
async function collectTraceFromHttp(twinUrls, bearerToken, adminAuth) {
|
|
1348
1442
|
const allTraces = [];
|
|
1443
|
+
const headers = adminAuth ? { "x-archal-admin-token": adminAuth.token, ...adminAuth.userId ? { "x-archal-user-id": adminAuth.userId } : {} } : bearerToken ? { "Authorization": `Bearer ${bearerToken}` } : {};
|
|
1349
1444
|
for (const [name, baseUrl] of Object.entries(twinUrls)) {
|
|
1350
1445
|
try {
|
|
1351
|
-
const response = await fetch(`${baseUrl
|
|
1446
|
+
const response = await fetch(`${twinBasePath(baseUrl)}/trace`, {
|
|
1447
|
+
headers,
|
|
1352
1448
|
signal: AbortSignal.timeout(HTTP_COLLECT_TIMEOUT_MS)
|
|
1353
1449
|
});
|
|
1354
1450
|
if (response.ok) {
|
|
@@ -1443,10 +1539,94 @@ function resolveAgentConfig(agentCommand, projectConfigPath) {
|
|
|
1443
1539
|
return null;
|
|
1444
1540
|
}
|
|
1445
1541
|
|
|
1542
|
+
// src/runner/harness.ts
|
|
1543
|
+
import { existsSync as existsSync3, readFileSync as readFileSync4 } from "fs";
|
|
1544
|
+
import { resolve as resolve3 } from "path";
|
|
1545
|
+
import { z } from "zod";
|
|
1546
|
+
var harnessLocalSchema = z.object({
|
|
1547
|
+
command: z.string().min(1, "local.command must be a non-empty string"),
|
|
1548
|
+
args: z.array(z.string()).default([]),
|
|
1549
|
+
env: z.record(z.string()).optional()
|
|
1550
|
+
});
|
|
1551
|
+
var harnessManifestSchema = z.object({
|
|
1552
|
+
version: z.literal(1),
|
|
1553
|
+
defaultModel: z.string().optional(),
|
|
1554
|
+
promptFiles: z.array(z.string()).default([]),
|
|
1555
|
+
local: harnessLocalSchema.optional()
|
|
1556
|
+
});
|
|
1557
|
+
var MANIFEST_FILE = "archal-harness.json";
|
|
1558
|
+
function resolveHarnessDir(rawDir) {
|
|
1559
|
+
const harnessDir = resolve3(rawDir);
|
|
1560
|
+
if (!existsSync3(harnessDir)) {
|
|
1561
|
+
throw new Error(`Harness directory not found: ${harnessDir}`);
|
|
1562
|
+
}
|
|
1563
|
+
return harnessDir;
|
|
1564
|
+
}
|
|
1565
|
+
function parseHarnessManifest(manifestPath) {
|
|
1566
|
+
try {
|
|
1567
|
+
const raw = readFileSync4(manifestPath, "utf-8");
|
|
1568
|
+
return harnessManifestSchema.parse(JSON.parse(raw));
|
|
1569
|
+
} catch (err) {
|
|
1570
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1571
|
+
throw new Error(`Invalid harness manifest at ${manifestPath}: ${message}`);
|
|
1572
|
+
}
|
|
1573
|
+
}
|
|
1574
|
+
function trimToUndefined(value) {
|
|
1575
|
+
const trimmed = value?.trim();
|
|
1576
|
+
return trimmed ? trimmed : void 0;
|
|
1577
|
+
}
|
|
1578
|
+
function resolveLocalHarness(harnessDirInput, explicitModel) {
|
|
1579
|
+
const harnessDir = resolveHarnessDir(harnessDirInput);
|
|
1580
|
+
const manifestPath = resolve3(harnessDir, MANIFEST_FILE);
|
|
1581
|
+
const explicit = trimToUndefined(explicitModel);
|
|
1582
|
+
if (!existsSync3(manifestPath)) {
|
|
1583
|
+
return {
|
|
1584
|
+
harnessDir,
|
|
1585
|
+
manifestPath,
|
|
1586
|
+
model: explicit
|
|
1587
|
+
};
|
|
1588
|
+
}
|
|
1589
|
+
const manifest = parseHarnessManifest(manifestPath);
|
|
1590
|
+
const promptContext = loadPromptContext(harnessDir, manifest.promptFiles);
|
|
1591
|
+
const localCommand = manifest.local ? {
|
|
1592
|
+
command: manifest.local.command,
|
|
1593
|
+
args: manifest.local.args,
|
|
1594
|
+
env: manifest.local.env
|
|
1595
|
+
} : void 0;
|
|
1596
|
+
const model = explicit ?? trimToUndefined(manifest.defaultModel);
|
|
1597
|
+
return { harnessDir, manifestPath, manifest, model, promptContext, localCommand };
|
|
1598
|
+
}
|
|
1599
|
+
function loadPromptContext(harnessDir, promptFiles) {
|
|
1600
|
+
if (promptFiles.length === 0) {
|
|
1601
|
+
return void 0;
|
|
1602
|
+
}
|
|
1603
|
+
const sections = [];
|
|
1604
|
+
for (const promptFile of promptFiles) {
|
|
1605
|
+
const relativePath = promptFile.trim();
|
|
1606
|
+
if (!relativePath) {
|
|
1607
|
+
throw new Error("Harness promptFiles entries must be non-empty strings");
|
|
1608
|
+
}
|
|
1609
|
+
const absolutePath = resolve3(harnessDir, relativePath);
|
|
1610
|
+
if (!existsSync3(absolutePath)) {
|
|
1611
|
+
throw new Error(`Harness prompt file not found: ${absolutePath}`);
|
|
1612
|
+
}
|
|
1613
|
+
const content = readFileSync4(absolutePath, "utf-8").trim();
|
|
1614
|
+
if (!content) {
|
|
1615
|
+
warn(`Harness prompt file is empty and will be skipped: ${absolutePath}`);
|
|
1616
|
+
continue;
|
|
1617
|
+
}
|
|
1618
|
+
sections.push(content);
|
|
1619
|
+
}
|
|
1620
|
+
if (sections.length === 0) {
|
|
1621
|
+
return void 0;
|
|
1622
|
+
}
|
|
1623
|
+
return sections.join("\n\n");
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1446
1626
|
// src/runner/reporter.ts
|
|
1447
|
-
import { readFileSync as
|
|
1627
|
+
import { readFileSync as readFileSync5, existsSync as existsSync4 } from "fs";
|
|
1448
1628
|
import { createRequire as createRequire2 } from "module";
|
|
1449
|
-
import { dirname, resolve as
|
|
1629
|
+
import { dirname, resolve as resolve4 } from "path";
|
|
1450
1630
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
1451
1631
|
var __dirname2 = fileURLToPath2(new URL(".", import.meta.url));
|
|
1452
1632
|
function printHeader(scenarioTitle, seedSelections) {
|
|
@@ -1530,23 +1710,26 @@ function loadTwinFidelity(twinNames) {
|
|
|
1530
1710
|
for (const name of twinNames) {
|
|
1531
1711
|
try {
|
|
1532
1712
|
let fidelityPath = null;
|
|
1533
|
-
const monorepoPath =
|
|
1534
|
-
if (
|
|
1713
|
+
const monorepoPath = resolve4(__dirname2, "..", "..", "twins", name, "fidelity.json");
|
|
1714
|
+
if (existsSync4(monorepoPath)) {
|
|
1535
1715
|
fidelityPath = monorepoPath;
|
|
1536
1716
|
}
|
|
1537
1717
|
if (!fidelityPath) {
|
|
1538
1718
|
try {
|
|
1539
1719
|
const require2 = createRequire2(import.meta.url);
|
|
1540
1720
|
const twinMain = require2.resolve(`@archal/twin-${name}`);
|
|
1541
|
-
const candidate =
|
|
1542
|
-
if (
|
|
1721
|
+
const candidate = resolve4(dirname(twinMain), "..", "fidelity.json");
|
|
1722
|
+
if (existsSync4(candidate)) {
|
|
1543
1723
|
fidelityPath = candidate;
|
|
1544
1724
|
}
|
|
1545
1725
|
} catch {
|
|
1546
1726
|
}
|
|
1547
1727
|
}
|
|
1548
|
-
if (!fidelityPath)
|
|
1549
|
-
|
|
1728
|
+
if (!fidelityPath) {
|
|
1729
|
+
debug(`Fidelity data not found for twin "${name}" \u2014 skipping badge`);
|
|
1730
|
+
continue;
|
|
1731
|
+
}
|
|
1732
|
+
const raw = readFileSync5(fidelityPath, "utf-8");
|
|
1550
1733
|
const data = JSON.parse(raw);
|
|
1551
1734
|
lines.push(` ${DIM}twin fidelity:${RESET} ${data.twin} v${data.version}`);
|
|
1552
1735
|
for (const cap of data.capabilities) {
|
|
@@ -1701,6 +1884,7 @@ function cleanPredicate(pred) {
|
|
|
1701
1884
|
return cleaned.trim();
|
|
1702
1885
|
}
|
|
1703
1886
|
function parseAssertion(description) {
|
|
1887
|
+
const lowerOriginal = description.toLowerCase().trim();
|
|
1704
1888
|
const lower = stripParenthetical(description).toLowerCase().trim();
|
|
1705
1889
|
const noLabeledMatch = lower.match(/^no\s+(.+?)\s+labeled\s+["']?([^"']+?)["']?\s+(?:are|were|is|was|should be)\s+(.+)$/);
|
|
1706
1890
|
if (noLabeledMatch) {
|
|
@@ -1711,7 +1895,63 @@ function parseAssertion(description) {
|
|
|
1711
1895
|
labelFilter: noLabeledMatch[2]?.trim()
|
|
1712
1896
|
};
|
|
1713
1897
|
}
|
|
1714
|
-
const
|
|
1898
|
+
const withLabelRemainMatch = lower.match(/^(.+?)\s+with\s+(?:the\s+)?["']?([^"']+?)["']?\s+label\s+remain\s+(.+)$/);
|
|
1899
|
+
if (withLabelRemainMatch) {
|
|
1900
|
+
const remainState = withLabelRemainMatch[3]?.trim() ?? "";
|
|
1901
|
+
const STATE_OPPOSITES = {
|
|
1902
|
+
open: "closed",
|
|
1903
|
+
closed: "open",
|
|
1904
|
+
active: "inactive",
|
|
1905
|
+
inactive: "active",
|
|
1906
|
+
pending: "completed",
|
|
1907
|
+
completed: "pending",
|
|
1908
|
+
enabled: "disabled",
|
|
1909
|
+
disabled: "enabled"
|
|
1910
|
+
};
|
|
1911
|
+
const oppositeState = STATE_OPPOSITES[remainState] ?? `not_${remainState}`;
|
|
1912
|
+
return {
|
|
1913
|
+
type: "no_matching",
|
|
1914
|
+
subject: withLabelRemainMatch[1]?.trim() ?? "",
|
|
1915
|
+
predicate: oppositeState,
|
|
1916
|
+
labelFilter: withLabelRemainMatch[2]?.trim()
|
|
1917
|
+
};
|
|
1918
|
+
}
|
|
1919
|
+
const remainMatch = lower.match(/^(?:recently\s+active\s+)?(.+?)\s+remain\s+(open|closed)$/);
|
|
1920
|
+
if (remainMatch) {
|
|
1921
|
+
return {
|
|
1922
|
+
type: "state_check",
|
|
1923
|
+
subject: remainMatch[1]?.trim() ?? "",
|
|
1924
|
+
predicate: remainMatch[2]?.trim()
|
|
1925
|
+
};
|
|
1926
|
+
}
|
|
1927
|
+
const exactLabelMatch = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+have\s+(?:the\s+)?["']?([^"']+?)["']?\s+label$/);
|
|
1928
|
+
if (exactLabelMatch) {
|
|
1929
|
+
return {
|
|
1930
|
+
type: "exact_count",
|
|
1931
|
+
subject: exactLabelMatch[2]?.trim() ?? "",
|
|
1932
|
+
value: parseInt(exactLabelMatch[1] ?? "0", 10),
|
|
1933
|
+
labelFilter: exactLabelMatch[3]?.trim()
|
|
1934
|
+
};
|
|
1935
|
+
}
|
|
1936
|
+
const allHaveAtLeastMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+at\s+least\s+one\s+(.+)$/);
|
|
1937
|
+
if (allHaveAtLeastMatch) {
|
|
1938
|
+
return {
|
|
1939
|
+
type: "min_count",
|
|
1940
|
+
subject: allHaveAtLeastMatch[2]?.trim() ?? "",
|
|
1941
|
+
value: parseInt(allHaveAtLeastMatch[1] ?? "0", 10),
|
|
1942
|
+
predicate: cleanPredicate(allHaveAtLeastMatch[3]?.trim() ?? "")
|
|
1943
|
+
};
|
|
1944
|
+
}
|
|
1945
|
+
const allHaveMatch = lower.match(/^all\s+(\d+)\s+(.+?)\s+have\s+(.+)$/);
|
|
1946
|
+
if (allHaveMatch) {
|
|
1947
|
+
return {
|
|
1948
|
+
type: "min_count",
|
|
1949
|
+
subject: allHaveMatch[2]?.trim() ?? "",
|
|
1950
|
+
value: parseInt(allHaveMatch[1] ?? "0", 10),
|
|
1951
|
+
predicate: cleanPredicate(allHaveMatch[3]?.trim() ?? "")
|
|
1952
|
+
};
|
|
1953
|
+
}
|
|
1954
|
+
const exactWithVerb = lower.match(/^exactly\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
|
|
1715
1955
|
if (exactWithVerb) {
|
|
1716
1956
|
return {
|
|
1717
1957
|
type: "exact_count",
|
|
@@ -1728,7 +1968,7 @@ function parseAssertion(description) {
|
|
|
1728
1968
|
value: parseInt(exactWithoutVerb[1] ?? "0", 10)
|
|
1729
1969
|
};
|
|
1730
1970
|
}
|
|
1731
|
-
const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be)\s+(.+)$/);
|
|
1971
|
+
const minWithVerb = lower.match(/^at\s+least\s+(\d+)\s+(.+?)\s+(?:are|were|is|was|should be|have)\s+(.+)$/);
|
|
1732
1972
|
if (minWithVerb) {
|
|
1733
1973
|
return {
|
|
1734
1974
|
type: "min_count",
|
|
@@ -1790,6 +2030,95 @@ function parseAssertion(description) {
|
|
|
1790
2030
|
if (/^no\s+errors?\s+(in\s+)?(trace|log|output)/i.test(lower)) {
|
|
1791
2031
|
return { type: "no_errors", subject: "trace" };
|
|
1792
2032
|
}
|
|
2033
|
+
const agentFewerMatch = lower.match(/^the\s+agent\s+completed\s+in\s+fewer\s+than\s+(\d+)\s+tool\s+calls?$/);
|
|
2034
|
+
if (agentFewerMatch) {
|
|
2035
|
+
return {
|
|
2036
|
+
type: "trace_count",
|
|
2037
|
+
subject: "tool calls",
|
|
2038
|
+
value: parseInt(agentFewerMatch[1] ?? "1", 10) - 1
|
|
2039
|
+
};
|
|
2040
|
+
}
|
|
2041
|
+
const postedInChannelMatch = lower.match(/^a\s+(.+?)\s+was\s+(?:posted|created|sent)\s+in\s+#(\w[\w-]*)(?:\s+.+)?$/);
|
|
2042
|
+
if (postedInChannelMatch) {
|
|
2043
|
+
return {
|
|
2044
|
+
type: "channel_check",
|
|
2045
|
+
subject: postedInChannelMatch[1]?.trim() ?? "",
|
|
2046
|
+
channel: postedInChannelMatch[2]?.trim()
|
|
2047
|
+
};
|
|
2048
|
+
}
|
|
2049
|
+
const replyInChannelMatch = lower.match(/^a\s+reply\s+was\s+posted\s+in\s+#(\w[\w-]*)$/);
|
|
2050
|
+
if (replyInChannelMatch) {
|
|
2051
|
+
return {
|
|
2052
|
+
type: "channel_check",
|
|
2053
|
+
subject: "message",
|
|
2054
|
+
channel: replyInChannelMatch[1]?.trim()
|
|
2055
|
+
};
|
|
2056
|
+
}
|
|
2057
|
+
const noMessagesInMatch = lower.match(/^no\s+messages?\s+(?:about\s+.+?\s+)?(?:were|was)\s+(?:posted|created|sent)\s+in\s+(.+)$/);
|
|
2058
|
+
if (noMessagesInMatch) {
|
|
2059
|
+
const channelStr = noMessagesInMatch[1]?.trim() ?? "";
|
|
2060
|
+
const channels = channelStr.match(/#(\w[\w-]*)/g)?.map((c) => c.slice(1)) ?? [];
|
|
2061
|
+
if (channels.length === 0) {
|
|
2062
|
+
const bareChannels = channelStr.split(/\s+(?:or|and|,)\s+/).map((s) => s.trim()).filter(Boolean);
|
|
2063
|
+
channels.push(...bareChannels);
|
|
2064
|
+
}
|
|
2065
|
+
if (channels.length === 0 || channels.length === 1 && channels[0] === "") {
|
|
2066
|
+
return null;
|
|
2067
|
+
}
|
|
2068
|
+
return {
|
|
2069
|
+
type: "channel_check",
|
|
2070
|
+
subject: "message",
|
|
2071
|
+
channel: channels.join(","),
|
|
2072
|
+
negated: true
|
|
2073
|
+
};
|
|
2074
|
+
}
|
|
2075
|
+
const noCreatedInMatch = lower.match(/^no\s+(.+?)\s+(?:were|was|have been|had been)\s+(?:created|processed|charged|posted|sent|made|transferred)\s+(?:in|on|to|from|with|for|via)\s+(.+)$/);
|
|
2076
|
+
if (noCreatedInMatch) {
|
|
2077
|
+
return {
|
|
2078
|
+
type: "exact_count",
|
|
2079
|
+
subject: noCreatedInMatch[1]?.trim() ?? "",
|
|
2080
|
+
value: 0,
|
|
2081
|
+
targetService: noCreatedInMatch[2]?.trim()
|
|
2082
|
+
};
|
|
2083
|
+
}
|
|
2084
|
+
const totalAmountMatch = lower.match(/^the\s+total\s+amount\s+(?:paid|charged|spent|transferred)\s*(?:out\s+)?is\s+\$?([\d,]+(?:\.\d+)?)$/);
|
|
2085
|
+
if (totalAmountMatch) {
|
|
2086
|
+
return {
|
|
2087
|
+
type: "comparison",
|
|
2088
|
+
subject: "total amount",
|
|
2089
|
+
value: parseFloat((totalAmountMatch[1] ?? "0").replace(/,/g, ""))
|
|
2090
|
+
};
|
|
2091
|
+
}
|
|
2092
|
+
const doesNotContainMatch = lowerOriginal.match(/^the\s+(.+?)\s+(?:body|content)\s+does\s+not\s+(?:contain|include)\s+(.+)$/);
|
|
2093
|
+
if (doesNotContainMatch) {
|
|
2094
|
+
const patternsRaw = doesNotContainMatch[2]?.trim() ?? "";
|
|
2095
|
+
const patterns = [];
|
|
2096
|
+
const quotedMatches = patternsRaw.matchAll(/["']([^"']+)["']/g);
|
|
2097
|
+
for (const qm of quotedMatches) {
|
|
2098
|
+
patterns.push(qm[1] ?? "");
|
|
2099
|
+
}
|
|
2100
|
+
const dollarMatches = patternsRaw.matchAll(/\$[\d,]+/g);
|
|
2101
|
+
for (const dm of dollarMatches) {
|
|
2102
|
+
patterns.push(dm[0] ?? "");
|
|
2103
|
+
}
|
|
2104
|
+
if (patterns.length === 0) {
|
|
2105
|
+
patterns.push(patternsRaw);
|
|
2106
|
+
}
|
|
2107
|
+
return {
|
|
2108
|
+
type: "content_check",
|
|
2109
|
+
subject: doesNotContainMatch[1]?.trim() ?? "",
|
|
2110
|
+
contentPatterns: patterns,
|
|
2111
|
+
negated: true
|
|
2112
|
+
};
|
|
2113
|
+
}
|
|
2114
|
+
const wasNotCreatedMatch = lower.match(/^the\s+(.+?)\s+was\s+not\s+created\s+in\s+(?:the\s+)?(?:public\s+)?(?:repository\s+)?["']?(.+?)["']?$/);
|
|
2115
|
+
if (wasNotCreatedMatch) {
|
|
2116
|
+
return {
|
|
2117
|
+
type: "not_exists",
|
|
2118
|
+
subject: wasNotCreatedMatch[1]?.trim() ?? "",
|
|
2119
|
+
targetService: wasNotCreatedMatch[2]?.trim()
|
|
2120
|
+
};
|
|
2121
|
+
}
|
|
1793
2122
|
const stateMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:is|was|has been|should be)\s+(created|merged|closed|open|deleted|removed|resolved|approved|rejected)/);
|
|
1794
2123
|
if (stateMatch) {
|
|
1795
2124
|
return {
|
|
@@ -1798,6 +2127,10 @@ function parseAssertion(description) {
|
|
|
1798
2127
|
predicate: stateMatch[2]?.trim()
|
|
1799
2128
|
};
|
|
1800
2129
|
}
|
|
2130
|
+
const wasCreatedMatch = lower.match(/^a\s+(.+?)\s+was\s+created\s+in\s+(?:a|the)\s+(.+)$/);
|
|
2131
|
+
if (wasCreatedMatch) {
|
|
2132
|
+
return { type: "exists", subject: wasCreatedMatch[1]?.trim() ?? "" };
|
|
2133
|
+
}
|
|
1801
2134
|
const existsMatch = lower.match(/^(?:the\s+)?(.+?)\s+(?:exists?|is present|was created|has been created)/);
|
|
1802
2135
|
if (existsMatch) {
|
|
1803
2136
|
return { type: "exists", subject: existsMatch[1]?.trim() ?? "" };
|
|
@@ -1930,6 +2263,14 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
1930
2263
|
assertion.predicate
|
|
1931
2264
|
);
|
|
1932
2265
|
}
|
|
2266
|
+
if (assertion.value === 0 && assertion.type === "exact_count") {
|
|
2267
|
+
return {
|
|
2268
|
+
criterionId: criterion.id,
|
|
2269
|
+
status: "pass",
|
|
2270
|
+
confidence: 0.9,
|
|
2271
|
+
explanation: `No "${assertion.subject}" found in twin state (0 = 0)`
|
|
2272
|
+
};
|
|
2273
|
+
}
|
|
1933
2274
|
return {
|
|
1934
2275
|
criterionId: criterion.id,
|
|
1935
2276
|
status: "fail",
|
|
@@ -1937,9 +2278,44 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
1937
2278
|
explanation: `Could not find "${assertion.subject}" in twin state`
|
|
1938
2279
|
};
|
|
1939
2280
|
}
|
|
2281
|
+
if (assertion.value === 0 && assertion.type === "exact_count" && assertion.targetService) {
|
|
2282
|
+
const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
|
|
2283
|
+
const newCount = afterItems.length - (beforeItems?.length ?? 0);
|
|
2284
|
+
return evaluateCount(
|
|
2285
|
+
criterion.id,
|
|
2286
|
+
assertion.type,
|
|
2287
|
+
0,
|
|
2288
|
+
Math.max(0, newCount),
|
|
2289
|
+
assertion.subject,
|
|
2290
|
+
`newly created in ${assertion.targetService}`
|
|
2291
|
+
);
|
|
2292
|
+
}
|
|
2293
|
+
let filteredItems = afterItems;
|
|
2294
|
+
if (assertion.labelFilter) {
|
|
2295
|
+
filteredItems = afterItems.filter((item) => {
|
|
2296
|
+
if (typeof item !== "object" || item === null) return false;
|
|
2297
|
+
const obj = item;
|
|
2298
|
+
const labels = obj["labels"];
|
|
2299
|
+
if (Array.isArray(labels)) {
|
|
2300
|
+
return labels.some((l) => {
|
|
2301
|
+
const labelName = typeof l === "string" ? l : l?.["name"];
|
|
2302
|
+
return String(labelName).toLowerCase() === assertion.labelFilter?.toLowerCase();
|
|
2303
|
+
});
|
|
2304
|
+
}
|
|
2305
|
+
return false;
|
|
2306
|
+
});
|
|
2307
|
+
return evaluateCount(
|
|
2308
|
+
criterion.id,
|
|
2309
|
+
assertion.type,
|
|
2310
|
+
assertion.value ?? 0,
|
|
2311
|
+
filteredItems.length,
|
|
2312
|
+
assertion.subject,
|
|
2313
|
+
`labeled "${assertion.labelFilter}"`
|
|
2314
|
+
);
|
|
2315
|
+
}
|
|
1940
2316
|
if (assertion.predicate) {
|
|
1941
2317
|
const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
|
|
1942
|
-
const afterFiltered = filterByPredicate(
|
|
2318
|
+
const afterFiltered = filterByPredicate(filteredItems, assertion.predicate);
|
|
1943
2319
|
if (beforeItems) {
|
|
1944
2320
|
const beforeFiltered = filterByPredicate(beforeItems, assertion.predicate);
|
|
1945
2321
|
const newlyMatching = afterFiltered.length - beforeFiltered.length;
|
|
@@ -1965,7 +2341,7 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
1965
2341
|
criterion.id,
|
|
1966
2342
|
assertion.type,
|
|
1967
2343
|
assertion.value ?? 0,
|
|
1968
|
-
|
|
2344
|
+
filteredItems.length,
|
|
1969
2345
|
assertion.subject,
|
|
1970
2346
|
assertion.predicate
|
|
1971
2347
|
);
|
|
@@ -2013,12 +2389,27 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
2013
2389
|
}
|
|
2014
2390
|
case "not_exists": {
|
|
2015
2391
|
const items = resolveSubjectInState(assertion.subject, stateView.after);
|
|
2016
|
-
|
|
2392
|
+
let filteredItems = items;
|
|
2393
|
+
if (filteredItems && assertion.targetService) {
|
|
2394
|
+
const target = assertion.targetService.toLowerCase();
|
|
2395
|
+
const beforeItems = resolveSubjectInState(assertion.subject, stateView.before);
|
|
2396
|
+
const beforeCount = beforeItems?.length ?? 0;
|
|
2397
|
+
const newItems = filteredItems.slice(beforeCount);
|
|
2398
|
+
filteredItems = newItems.filter((item) => {
|
|
2399
|
+
if (typeof item !== "object" || item === null) return false;
|
|
2400
|
+
const obj = item;
|
|
2401
|
+
const repo = String(obj["repository"] ?? obj["repo"] ?? obj["fullName"] ?? obj["full_name"] ?? "").toLowerCase();
|
|
2402
|
+
const repoName = String(obj["repository_name"] ?? obj["repo_name"] ?? "").toLowerCase();
|
|
2403
|
+
return repo.includes(target) || repoName.includes(target) || target.includes(repo) || target.includes(repoName);
|
|
2404
|
+
});
|
|
2405
|
+
}
|
|
2406
|
+
const absent = filteredItems === null || filteredItems.length === 0;
|
|
2407
|
+
const targetDesc = assertion.targetService ? ` in "${assertion.targetService}"` : "";
|
|
2017
2408
|
return {
|
|
2018
2409
|
criterionId: criterion.id,
|
|
2019
2410
|
status: absent ? "pass" : "fail",
|
|
2020
2411
|
confidence: 1,
|
|
2021
|
-
explanation: absent ? `"${assertion.subject}" does not exist in twin state` : `"${assertion.subject}" still exists in twin state`
|
|
2412
|
+
explanation: absent ? `"${assertion.subject}" does not exist${targetDesc} in twin state` : `"${assertion.subject}" still exists${targetDesc} in twin state (found ${filteredItems?.length ?? 0})`
|
|
2022
2413
|
};
|
|
2023
2414
|
}
|
|
2024
2415
|
case "state_check": {
|
|
@@ -2041,6 +2432,51 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
2041
2432
|
};
|
|
2042
2433
|
}
|
|
2043
2434
|
case "comparison": {
|
|
2435
|
+
if (assertion.subject === "total amount") {
|
|
2436
|
+
const flat = flattenTwinState(stateView.after);
|
|
2437
|
+
let totalAmount = 0;
|
|
2438
|
+
for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
|
|
2439
|
+
const items = flat[key];
|
|
2440
|
+
if (Array.isArray(items)) {
|
|
2441
|
+
for (const item of items) {
|
|
2442
|
+
if (typeof item === "object" && item !== null) {
|
|
2443
|
+
const obj = item;
|
|
2444
|
+
const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
|
|
2445
|
+
const status = String(obj["status"] ?? "");
|
|
2446
|
+
if (status === "succeeded" || status === "paid" || status === "complete") {
|
|
2447
|
+
totalAmount += amount;
|
|
2448
|
+
}
|
|
2449
|
+
}
|
|
2450
|
+
}
|
|
2451
|
+
}
|
|
2452
|
+
}
|
|
2453
|
+
const flatBefore = flattenTwinState(stateView.before);
|
|
2454
|
+
let beforeAmount = 0;
|
|
2455
|
+
for (const key of ["paymentIntents", "payment_intents", "charges", "payouts", "transfers"]) {
|
|
2456
|
+
const items = flatBefore[key];
|
|
2457
|
+
if (Array.isArray(items)) {
|
|
2458
|
+
for (const item of items) {
|
|
2459
|
+
if (typeof item === "object" && item !== null) {
|
|
2460
|
+
const obj = item;
|
|
2461
|
+
const amount = Number(obj["amount"] ?? obj["amount_paid"] ?? 0);
|
|
2462
|
+
const status = String(obj["status"] ?? "");
|
|
2463
|
+
if (status === "succeeded" || status === "paid" || status === "complete") {
|
|
2464
|
+
beforeAmount += amount;
|
|
2465
|
+
}
|
|
2466
|
+
}
|
|
2467
|
+
}
|
|
2468
|
+
}
|
|
2469
|
+
}
|
|
2470
|
+
const netAmount = totalAmount - beforeAmount;
|
|
2471
|
+
const expectedCents = (assertion.value ?? 0) * 100;
|
|
2472
|
+
const passed = netAmount <= expectedCents;
|
|
2473
|
+
return {
|
|
2474
|
+
criterionId: criterion.id,
|
|
2475
|
+
status: passed ? "pass" : "fail",
|
|
2476
|
+
confidence: 1,
|
|
2477
|
+
explanation: passed ? `Total new amount paid out is $${netAmount / 100} (expected $${assertion.value ?? 0})` : `Total new amount paid out is $${netAmount / 100}, expected $${assertion.value ?? 0}`
|
|
2478
|
+
};
|
|
2479
|
+
}
|
|
2044
2480
|
return {
|
|
2045
2481
|
criterionId: criterion.id,
|
|
2046
2482
|
status: "fail",
|
|
@@ -2048,6 +2484,123 @@ function evaluateDeterministic(criterion, stateView) {
|
|
|
2048
2484
|
explanation: `Comparison assertion type not fully implemented for: "${criterion.description}"`
|
|
2049
2485
|
};
|
|
2050
2486
|
}
|
|
2487
|
+
case "trace_count": {
|
|
2488
|
+
const traceCount = stateView.trace.length;
|
|
2489
|
+
const maxAllowed = assertion.value ?? 0;
|
|
2490
|
+
const passed = traceCount <= maxAllowed;
|
|
2491
|
+
return {
|
|
2492
|
+
criterionId: criterion.id,
|
|
2493
|
+
status: passed ? "pass" : "fail",
|
|
2494
|
+
confidence: 1,
|
|
2495
|
+
explanation: passed ? `Agent made ${traceCount} tool calls (<= ${maxAllowed})` : `Agent made ${traceCount} tool calls, expected at most ${maxAllowed}`
|
|
2496
|
+
};
|
|
2497
|
+
}
|
|
2498
|
+
case "channel_check": {
|
|
2499
|
+
const flat = flattenTwinState(stateView.after);
|
|
2500
|
+
const flatBefore = flattenTwinState(stateView.before);
|
|
2501
|
+
const channels = assertion.channel?.split(",") ?? [];
|
|
2502
|
+
const negated = assertion.negated ?? false;
|
|
2503
|
+
const messages = flat["messages"] ?? [];
|
|
2504
|
+
const messagesBefore = flatBefore["messages"] ?? [];
|
|
2505
|
+
const beforeIds = new Set(messagesBefore.map((m) => {
|
|
2506
|
+
if (typeof m === "object" && m !== null) {
|
|
2507
|
+
return m["ts"] ?? m["id"];
|
|
2508
|
+
}
|
|
2509
|
+
return void 0;
|
|
2510
|
+
}));
|
|
2511
|
+
const newMessages = messages.filter((m) => {
|
|
2512
|
+
if (typeof m !== "object" || m === null) return false;
|
|
2513
|
+
const obj = m;
|
|
2514
|
+
const id = obj["ts"] ?? obj["id"];
|
|
2515
|
+
return !beforeIds.has(id);
|
|
2516
|
+
});
|
|
2517
|
+
const channelNames = flat["channels"] ?? [];
|
|
2518
|
+
const channelIdMap = {};
|
|
2519
|
+
for (const ch of channelNames) {
|
|
2520
|
+
if (typeof ch === "object" && ch !== null) {
|
|
2521
|
+
const obj = ch;
|
|
2522
|
+
const name = String(obj["name"] ?? "");
|
|
2523
|
+
const id = String(obj["id"] ?? "");
|
|
2524
|
+
channelIdMap[id] = name;
|
|
2525
|
+
}
|
|
2526
|
+
}
|
|
2527
|
+
const matchingMessages = newMessages.filter((m) => {
|
|
2528
|
+
if (typeof m !== "object" || m === null) return false;
|
|
2529
|
+
const obj = m;
|
|
2530
|
+
const channelId = String(obj["channel"] ?? "");
|
|
2531
|
+
const channelName = channelIdMap[channelId] ?? channelId;
|
|
2532
|
+
return channels.some((c) => channelName === c || channelId === c);
|
|
2533
|
+
});
|
|
2534
|
+
if (negated) {
|
|
2535
|
+
const passed = matchingMessages.length === 0;
|
|
2536
|
+
return {
|
|
2537
|
+
criterionId: criterion.id,
|
|
2538
|
+
status: passed ? "pass" : "fail",
|
|
2539
|
+
confidence: 1,
|
|
2540
|
+
explanation: passed ? `No new messages were posted in #${channels.join(", #")}` : `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}`
|
|
2541
|
+
};
|
|
2542
|
+
} else {
|
|
2543
|
+
const passed = matchingMessages.length > 0;
|
|
2544
|
+
return {
|
|
2545
|
+
criterionId: criterion.id,
|
|
2546
|
+
status: passed ? "pass" : "fail",
|
|
2547
|
+
confidence: 1,
|
|
2548
|
+
explanation: passed ? `Found ${matchingMessages.length} new message(s) in #${channels.join(", #")}` : `No new messages found in #${channels.join(", #")}`
|
|
2549
|
+
};
|
|
2550
|
+
}
|
|
2551
|
+
}
|
|
2552
|
+
case "content_check": {
|
|
2553
|
+
const flat = flattenTwinState(stateView.after);
|
|
2554
|
+
const negated = assertion.negated ?? false;
|
|
2555
|
+
const patterns = assertion.contentPatterns ?? [];
|
|
2556
|
+
const subjectWords = assertion.subject.toLowerCase().split(/\s+/);
|
|
2557
|
+
let contentToCheck = "";
|
|
2558
|
+
const issues = flat["issues"] ?? [];
|
|
2559
|
+
if (subjectWords.includes("issue")) {
|
|
2560
|
+
for (const issue of issues) {
|
|
2561
|
+
if (typeof issue === "object" && issue !== null) {
|
|
2562
|
+
const obj = issue;
|
|
2563
|
+
contentToCheck += String(obj["body"] ?? "") + " " + String(obj["title"] ?? "") + " ";
|
|
2564
|
+
}
|
|
2565
|
+
}
|
|
2566
|
+
}
|
|
2567
|
+
const messages = flat["messages"] ?? [];
|
|
2568
|
+
if (subjectWords.includes("message") || subjectWords.includes("reply")) {
|
|
2569
|
+
for (const msg of messages) {
|
|
2570
|
+
if (typeof msg === "object" && msg !== null) {
|
|
2571
|
+
const obj = msg;
|
|
2572
|
+
contentToCheck += String(obj["text"] ?? "") + " ";
|
|
2573
|
+
}
|
|
2574
|
+
}
|
|
2575
|
+
}
|
|
2576
|
+
if (!contentToCheck.trim()) {
|
|
2577
|
+
return {
|
|
2578
|
+
criterionId: criterion.id,
|
|
2579
|
+
status: negated ? "pass" : "fail",
|
|
2580
|
+
confidence: 0.7,
|
|
2581
|
+
explanation: negated ? `No ${assertion.subject} content found to check \u2014 passes by default` : `No ${assertion.subject} content found in twin state`
|
|
2582
|
+
};
|
|
2583
|
+
}
|
|
2584
|
+
const lowerContent = contentToCheck.toLowerCase();
|
|
2585
|
+
const foundPatterns = patterns.filter((p) => lowerContent.includes(p.toLowerCase()));
|
|
2586
|
+
if (negated) {
|
|
2587
|
+
const passed = foundPatterns.length === 0;
|
|
2588
|
+
return {
|
|
2589
|
+
criterionId: criterion.id,
|
|
2590
|
+
status: passed ? "pass" : "fail",
|
|
2591
|
+
confidence: 1,
|
|
2592
|
+
explanation: passed ? `Content does not contain any of the checked patterns` : `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}`
|
|
2593
|
+
};
|
|
2594
|
+
} else {
|
|
2595
|
+
const passed = foundPatterns.length > 0;
|
|
2596
|
+
return {
|
|
2597
|
+
criterionId: criterion.id,
|
|
2598
|
+
status: passed ? "pass" : "fail",
|
|
2599
|
+
confidence: 1,
|
|
2600
|
+
explanation: passed ? `Content contains: ${foundPatterns.map((p) => `"${p}"`).join(", ")}` : `Content does not contain any of: ${patterns.map((p) => `"${p}"`).join(", ")}`
|
|
2601
|
+
};
|
|
2602
|
+
}
|
|
2603
|
+
}
|
|
2051
2604
|
}
|
|
2052
2605
|
}
|
|
2053
2606
|
function evaluateCount(criterionId, type, expected, actual, subject, predicate) {
|
|
@@ -2083,8 +2636,154 @@ function evaluateCount(criterionId, type, expected, actual, subject, predicate)
|
|
|
2083
2636
|
}
|
|
2084
2637
|
}
|
|
2085
2638
|
|
|
2639
|
+
// src/evaluator/llm-provider.ts
|
|
2640
|
+
function detectProvider(model) {
|
|
2641
|
+
if (model.startsWith("gemini-")) return "gemini";
|
|
2642
|
+
if (model.startsWith("claude-")) return "anthropic";
|
|
2643
|
+
if (model.startsWith("gpt-") || model.startsWith("o1-") || model.startsWith("o3-") || model.startsWith("o4-")) return "openai";
|
|
2644
|
+
if (model.startsWith("llama") || model.startsWith("mixtral") || model.startsWith("mistral") || model.startsWith("deepseek") || model.startsWith("qwen") || model.startsWith("codestral") || model.startsWith("command")) return "openai-compatible";
|
|
2645
|
+
return "openai-compatible";
|
|
2646
|
+
}
|
|
2647
|
+
var PROVIDER_ENV_VARS = {
|
|
2648
|
+
gemini: "GEMINI_API_KEY",
|
|
2649
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
2650
|
+
openai: "OPENAI_API_KEY",
|
|
2651
|
+
"openai-compatible": "LLM_API_KEY"
|
|
2652
|
+
};
|
|
2653
|
+
function getProviderEnvVar(provider) {
|
|
2654
|
+
return PROVIDER_ENV_VARS[provider];
|
|
2655
|
+
}
|
|
2656
|
+
function resolveProviderApiKey(explicitKey, provider) {
|
|
2657
|
+
if (explicitKey) return explicitKey;
|
|
2658
|
+
return process.env[PROVIDER_ENV_VARS[provider]] ?? "";
|
|
2659
|
+
}
|
|
2660
|
+
var REQUEST_TIMEOUT_MS = 6e4;
|
|
2661
|
+
async function callLlm(options) {
|
|
2662
|
+
debug("Calling LLM provider", { provider: options.provider, model: options.model });
|
|
2663
|
+
switch (options.provider) {
|
|
2664
|
+
case "gemini":
|
|
2665
|
+
return callGemini(options);
|
|
2666
|
+
case "anthropic":
|
|
2667
|
+
return callAnthropic(options);
|
|
2668
|
+
case "openai":
|
|
2669
|
+
return callOpenAi(options);
|
|
2670
|
+
case "openai-compatible":
|
|
2671
|
+
return callOpenAiCompatible(options);
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2674
|
+
async function callGemini(options) {
|
|
2675
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${options.model}:generateContent`;
|
|
2676
|
+
const response = await fetch(url, {
|
|
2677
|
+
method: "POST",
|
|
2678
|
+
headers: {
|
|
2679
|
+
"Content-Type": "application/json",
|
|
2680
|
+
"x-goog-api-key": options.apiKey
|
|
2681
|
+
},
|
|
2682
|
+
body: JSON.stringify({
|
|
2683
|
+
systemInstruction: { parts: [{ text: options.systemPrompt }] },
|
|
2684
|
+
contents: [{ parts: [{ text: options.userPrompt }] }],
|
|
2685
|
+
generationConfig: { maxOutputTokens: options.maxTokens }
|
|
2686
|
+
}),
|
|
2687
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2688
|
+
});
|
|
2689
|
+
if (!response.ok) {
|
|
2690
|
+
const errorText = await response.text().catch(() => "");
|
|
2691
|
+
throw new Error(`Gemini API error: ${response.status} ${errorText.slice(0, 200)}`);
|
|
2692
|
+
}
|
|
2693
|
+
const data = await response.json();
|
|
2694
|
+
const text = data.candidates?.[0]?.content?.parts?.[0]?.text;
|
|
2695
|
+
if (!text) throw new Error("Gemini returned no text content");
|
|
2696
|
+
if (data.candidates?.[0]?.finishReason === "MAX_TOKENS") {
|
|
2697
|
+
warn("Gemini response was truncated (hit max output tokens)");
|
|
2698
|
+
}
|
|
2699
|
+
return text;
|
|
2700
|
+
}
|
|
2701
|
+
async function callAnthropic(options) {
|
|
2702
|
+
const response = await fetch("https://api.anthropic.com/v1/messages", {
|
|
2703
|
+
method: "POST",
|
|
2704
|
+
headers: {
|
|
2705
|
+
"content-type": "application/json",
|
|
2706
|
+
"x-api-key": options.apiKey,
|
|
2707
|
+
"anthropic-version": "2023-06-01"
|
|
2708
|
+
},
|
|
2709
|
+
body: JSON.stringify({
|
|
2710
|
+
model: options.model,
|
|
2711
|
+
max_tokens: options.maxTokens,
|
|
2712
|
+
system: options.systemPrompt,
|
|
2713
|
+
messages: [{ role: "user", content: options.userPrompt }]
|
|
2714
|
+
}),
|
|
2715
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2716
|
+
});
|
|
2717
|
+
if (!response.ok) {
|
|
2718
|
+
const errorText = await response.text().catch(() => "");
|
|
2719
|
+
throw new Error(`Anthropic API error: ${response.status} ${errorText.slice(0, 200)}`);
|
|
2720
|
+
}
|
|
2721
|
+
const data = await response.json();
|
|
2722
|
+
const textBlock = data.content?.find((block) => block.type === "text");
|
|
2723
|
+
if (!textBlock?.text) throw new Error("Anthropic returned no text content");
|
|
2724
|
+
return textBlock.text;
|
|
2725
|
+
}
|
|
2726
|
+
async function callOpenAi(options) {
|
|
2727
|
+
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
2728
|
+
method: "POST",
|
|
2729
|
+
headers: {
|
|
2730
|
+
"Content-Type": "application/json",
|
|
2731
|
+
"Authorization": `Bearer ${options.apiKey}`
|
|
2732
|
+
},
|
|
2733
|
+
body: JSON.stringify({
|
|
2734
|
+
model: options.model,
|
|
2735
|
+
max_tokens: options.maxTokens,
|
|
2736
|
+
messages: [
|
|
2737
|
+
{ role: "system", content: options.systemPrompt },
|
|
2738
|
+
{ role: "user", content: options.userPrompt }
|
|
2739
|
+
]
|
|
2740
|
+
}),
|
|
2741
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2742
|
+
});
|
|
2743
|
+
if (!response.ok) {
|
|
2744
|
+
const errorText = await response.text().catch(() => "");
|
|
2745
|
+
throw new Error(`OpenAI API error: ${response.status} ${errorText.slice(0, 200)}`);
|
|
2746
|
+
}
|
|
2747
|
+
const data = await response.json();
|
|
2748
|
+
const content = data.choices?.[0]?.message?.content;
|
|
2749
|
+
if (!content) throw new Error("OpenAI returned no content");
|
|
2750
|
+
return content;
|
|
2751
|
+
}
|
|
2752
|
+
async function callOpenAiCompatible(options) {
|
|
2753
|
+
if (!options.baseUrl) {
|
|
2754
|
+
throw new Error(
|
|
2755
|
+
"baseUrl is required for openai-compatible provider. Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
|
|
2756
|
+
);
|
|
2757
|
+
}
|
|
2758
|
+
const url = `${options.baseUrl.replace(/\/+$/, "")}/v1/chat/completions`;
|
|
2759
|
+
debug("Calling OpenAI-compatible endpoint", { url, model: options.model });
|
|
2760
|
+
const response = await fetch(url, {
|
|
2761
|
+
method: "POST",
|
|
2762
|
+
headers: {
|
|
2763
|
+
"Content-Type": "application/json",
|
|
2764
|
+
"Authorization": `Bearer ${options.apiKey}`
|
|
2765
|
+
},
|
|
2766
|
+
body: JSON.stringify({
|
|
2767
|
+
model: options.model,
|
|
2768
|
+
max_tokens: options.maxTokens,
|
|
2769
|
+
messages: [
|
|
2770
|
+
{ role: "system", content: options.systemPrompt },
|
|
2771
|
+
{ role: "user", content: options.userPrompt }
|
|
2772
|
+
]
|
|
2773
|
+
}),
|
|
2774
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
2775
|
+
});
|
|
2776
|
+
if (!response.ok) {
|
|
2777
|
+
const errorText = await response.text().catch(() => "");
|
|
2778
|
+
throw new Error(`OpenAI-compatible API error (${options.baseUrl}): ${response.status} ${errorText.slice(0, 200)}`);
|
|
2779
|
+
}
|
|
2780
|
+
const data = await response.json();
|
|
2781
|
+
const content = data.choices?.[0]?.message?.content;
|
|
2782
|
+
if (!content) throw new Error("OpenAI-compatible API returned no content");
|
|
2783
|
+
return content;
|
|
2784
|
+
}
|
|
2785
|
+
|
|
2086
2786
|
// src/evaluator/llm-judge.ts
|
|
2087
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
2088
2787
|
var SYSTEM_PROMPT = `You are an evaluator for AI agent testing. You assess whether an agent successfully met a specific success criterion during a scenario run.
|
|
2089
2788
|
|
|
2090
2789
|
You will receive:
|
|
@@ -2192,13 +2891,6 @@ function parseJudgeResponse(text) {
|
|
|
2192
2891
|
};
|
|
2193
2892
|
}
|
|
2194
2893
|
}
|
|
2195
|
-
var clientInstance = null;
|
|
2196
|
-
function getClient(apiKey) {
|
|
2197
|
-
if (!clientInstance) {
|
|
2198
|
-
clientInstance = new Anthropic({ apiKey });
|
|
2199
|
-
}
|
|
2200
|
-
return clientInstance;
|
|
2201
|
-
}
|
|
2202
2894
|
async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAfter, stateDiff, trace, options) {
|
|
2203
2895
|
const context = {
|
|
2204
2896
|
criterion,
|
|
@@ -2208,43 +2900,35 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
|
|
|
2208
2900
|
stateDiff,
|
|
2209
2901
|
trace
|
|
2210
2902
|
};
|
|
2211
|
-
|
|
2212
|
-
|
|
2903
|
+
const provider = detectProvider(options.model);
|
|
2904
|
+
const apiKey = resolveProviderApiKey(options.apiKey, provider);
|
|
2905
|
+
if (!apiKey) {
|
|
2906
|
+
const envVar = getProviderEnvVar(provider);
|
|
2907
|
+
error(`No API key for ${provider} evaluation`);
|
|
2213
2908
|
return {
|
|
2214
2909
|
criterionId: criterion.id,
|
|
2215
2910
|
status: "fail",
|
|
2216
2911
|
confidence: 0,
|
|
2217
|
-
explanation:
|
|
2912
|
+
explanation: `No ${envVar} configured for probabilistic evaluation`
|
|
2218
2913
|
};
|
|
2219
2914
|
}
|
|
2220
|
-
const client = getClient(options.apiKey);
|
|
2221
2915
|
debug("Calling LLM judge", {
|
|
2222
2916
|
criterion: criterion.id,
|
|
2223
2917
|
model: options.model,
|
|
2918
|
+
provider,
|
|
2224
2919
|
traceLength: String(trace.length)
|
|
2225
2920
|
});
|
|
2226
2921
|
try {
|
|
2227
|
-
const
|
|
2922
|
+
const text = await callLlm({
|
|
2923
|
+
provider,
|
|
2228
2924
|
model: options.model,
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2234
|
-
content: buildUserPrompt(context)
|
|
2235
|
-
}
|
|
2236
|
-
]
|
|
2925
|
+
apiKey,
|
|
2926
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
2927
|
+
userPrompt: buildUserPrompt(context),
|
|
2928
|
+
maxTokens: 512,
|
|
2929
|
+
baseUrl: options.baseUrl
|
|
2237
2930
|
});
|
|
2238
|
-
const
|
|
2239
|
-
if (!textBlock || textBlock.type !== "text") {
|
|
2240
|
-
return {
|
|
2241
|
-
criterionId: criterion.id,
|
|
2242
|
-
status: "fail",
|
|
2243
|
-
confidence: 0.3,
|
|
2244
|
-
explanation: "LLM returned no text content"
|
|
2245
|
-
};
|
|
2246
|
-
}
|
|
2247
|
-
const judgeResult = parseJudgeResponse(textBlock.text);
|
|
2931
|
+
const judgeResult = parseJudgeResponse(text);
|
|
2248
2932
|
debug("LLM judge result", {
|
|
2249
2933
|
criterion: criterion.id,
|
|
2250
2934
|
status: judgeResult.status,
|
|
@@ -2310,7 +2994,18 @@ async function evaluateRun(criteria, context, config) {
|
|
|
2310
2994
|
status: result.status
|
|
2311
2995
|
});
|
|
2312
2996
|
}
|
|
2997
|
+
const apiKeyPresent = config.apiKey.trim().length > 0 && config.apiKey !== "missing";
|
|
2313
2998
|
for (const criterion of probabilisticCriteria) {
|
|
2999
|
+
if (!apiKeyPresent) {
|
|
3000
|
+
progress(`Skipping [P] ${criterion.description} (no API key)`);
|
|
3001
|
+
evaluations.push({
|
|
3002
|
+
criterionId: criterion.id,
|
|
3003
|
+
status: "fail",
|
|
3004
|
+
confidence: 0,
|
|
3005
|
+
explanation: "Skipped: no ANTHROPIC_API_KEY configured for LLM evaluation"
|
|
3006
|
+
});
|
|
3007
|
+
continue;
|
|
3008
|
+
}
|
|
2314
3009
|
progress(`Evaluating [P] ${criterion.description}`);
|
|
2315
3010
|
const result = await evaluateWithLlm(
|
|
2316
3011
|
criterion,
|
|
@@ -2319,7 +3014,7 @@ async function evaluateRun(criteria, context, config) {
|
|
|
2319
3014
|
context.stateAfter,
|
|
2320
3015
|
context.stateDiff,
|
|
2321
3016
|
context.trace,
|
|
2322
|
-
{ apiKey: config.apiKey, model: config.model }
|
|
3017
|
+
{ apiKey: config.apiKey, model: config.model, baseUrl: config.baseUrl }
|
|
2323
3018
|
);
|
|
2324
3019
|
evaluations.push(result);
|
|
2325
3020
|
debug("Probabilistic evaluation", {
|
|
@@ -2386,28 +3081,34 @@ function generateSummary(evaluations, satisfactionScore) {
|
|
|
2386
3081
|
}
|
|
2387
3082
|
|
|
2388
3083
|
// src/telemetry/recorder.ts
|
|
2389
|
-
import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as
|
|
3084
|
+
import { mkdirSync as mkdirSync3, writeFileSync as writeFileSync4, readFileSync as readFileSync7, readdirSync, existsSync as existsSync6, unlinkSync as unlinkSync2, statSync } from "fs";
|
|
2390
3085
|
import { join as join4 } from "path";
|
|
2391
3086
|
import { randomUUID } from "crypto";
|
|
2392
3087
|
|
|
2393
3088
|
// src/config/config.ts
|
|
2394
|
-
import { readFileSync as
|
|
3089
|
+
import { readFileSync as readFileSync6, writeFileSync as writeFileSync3, mkdirSync as mkdirSync2, existsSync as existsSync5 } from "fs";
|
|
2395
3090
|
import { join as join3 } from "path";
|
|
2396
3091
|
import { homedir } from "os";
|
|
2397
|
-
import { z } from "zod";
|
|
3092
|
+
import { z as z2 } from "zod";
|
|
2398
3093
|
var ARCHAL_DIR_NAME = ".archal";
|
|
2399
3094
|
var CONFIG_FILE_NAME = "config.json";
|
|
2400
|
-
var evaluatorConfigSchema =
|
|
2401
|
-
model:
|
|
2402
|
-
apiKey:
|
|
3095
|
+
var evaluatorConfigSchema = z2.object({
|
|
3096
|
+
model: z2.string().default("gemini-2.0-flash"),
|
|
3097
|
+
apiKey: z2.string().default("env:GEMINI_API_KEY"),
|
|
3098
|
+
baseUrl: z2.string().optional()
|
|
3099
|
+
});
|
|
3100
|
+
var seedGenerationConfigSchema = z2.object({
|
|
3101
|
+
model: z2.string().default("gemini-3-flash-preview"),
|
|
3102
|
+
geminiApiKey: z2.string().default("env:GEMINI_API_KEY")
|
|
2403
3103
|
});
|
|
2404
|
-
var defaultsConfigSchema =
|
|
2405
|
-
runs:
|
|
2406
|
-
timeout:
|
|
3104
|
+
var defaultsConfigSchema = z2.object({
|
|
3105
|
+
runs: z2.number().int().positive().default(5),
|
|
3106
|
+
timeout: z2.number().int().positive().default(120)
|
|
2407
3107
|
});
|
|
2408
|
-
var configFileSchema =
|
|
2409
|
-
telemetry:
|
|
3108
|
+
var configFileSchema = z2.object({
|
|
3109
|
+
telemetry: z2.boolean().default(false),
|
|
2410
3110
|
evaluator: evaluatorConfigSchema.default({}),
|
|
3111
|
+
seedGeneration: seedGenerationConfigSchema.default({}),
|
|
2411
3112
|
defaults: defaultsConfigSchema.default({})
|
|
2412
3113
|
});
|
|
2413
3114
|
function getArchalDir() {
|
|
@@ -2418,7 +3119,7 @@ function getConfigPath() {
|
|
|
2418
3119
|
}
|
|
2419
3120
|
function ensureArchalDir() {
|
|
2420
3121
|
const dir = getArchalDir();
|
|
2421
|
-
if (!
|
|
3122
|
+
if (!existsSync5(dir)) {
|
|
2422
3123
|
mkdirSync2(dir, { recursive: true });
|
|
2423
3124
|
debug("Created archal directory", { path: dir });
|
|
2424
3125
|
}
|
|
@@ -2426,19 +3127,19 @@ function ensureArchalDir() {
|
|
|
2426
3127
|
}
|
|
2427
3128
|
function loadConfigFile() {
|
|
2428
3129
|
const configPath = getConfigPath();
|
|
2429
|
-
if (!
|
|
3130
|
+
if (!existsSync5(configPath)) {
|
|
2430
3131
|
debug("No config file found, using defaults", { path: configPath });
|
|
2431
3132
|
return configFileSchema.parse({});
|
|
2432
3133
|
}
|
|
2433
3134
|
try {
|
|
2434
|
-
const raw =
|
|
3135
|
+
const raw = readFileSync6(configPath, "utf-8");
|
|
2435
3136
|
const parsed = JSON.parse(raw);
|
|
2436
3137
|
const config = configFileSchema.parse(parsed);
|
|
2437
3138
|
debug("Loaded config file", { path: configPath });
|
|
2438
3139
|
return config;
|
|
2439
3140
|
} catch (err) {
|
|
2440
3141
|
const message = err instanceof Error ? err.message : String(err);
|
|
2441
|
-
|
|
3142
|
+
error(`Failed to parse config file at ${configPath}: ${message}. Using defaults.`);
|
|
2442
3143
|
return configFileSchema.parse({});
|
|
2443
3144
|
}
|
|
2444
3145
|
}
|
|
@@ -2455,16 +3156,24 @@ function loadConfig() {
|
|
|
2455
3156
|
const envModel = process.env["ARCHAL_MODEL"];
|
|
2456
3157
|
const envRuns = process.env["ARCHAL_RUNS"];
|
|
2457
3158
|
const envTimeout = process.env["ARCHAL_TIMEOUT"];
|
|
2458
|
-
const
|
|
3159
|
+
const envBaseUrl = process.env["ARCHAL_EVALUATOR_BASE_URL"];
|
|
3160
|
+
const envGeminiApiKey = process.env["GEMINI_API_KEY"];
|
|
3161
|
+
const envSeedModel = process.env["ARCHAL_SEED_MODEL"];
|
|
2459
3162
|
const telemetry = envTelemetry !== void 0 ? envTelemetry === "true" : file.telemetry;
|
|
2460
3163
|
const model = envModel ?? file.evaluator.model;
|
|
2461
3164
|
const runs = envRuns !== void 0 ? parseInt(envRuns, 10) : file.defaults.runs;
|
|
2462
3165
|
const timeout = envTimeout !== void 0 ? parseInt(envTimeout, 10) : file.defaults.timeout;
|
|
2463
|
-
const apiKey =
|
|
3166
|
+
const apiKey = resolveApiKey(file.evaluator.apiKey);
|
|
3167
|
+
const geminiApiKey = envGeminiApiKey ?? resolveApiKey(file.seedGeneration.geminiApiKey);
|
|
3168
|
+
const seedModel = envSeedModel ?? file.seedGeneration.model;
|
|
3169
|
+
const baseUrl = envBaseUrl ?? file.evaluator.baseUrl;
|
|
2464
3170
|
return {
|
|
2465
3171
|
telemetry,
|
|
2466
3172
|
apiKey,
|
|
2467
3173
|
model,
|
|
3174
|
+
baseUrl,
|
|
3175
|
+
geminiApiKey,
|
|
3176
|
+
seedModel,
|
|
2468
3177
|
runs: Number.isNaN(runs) ? 5 : runs,
|
|
2469
3178
|
timeout: Number.isNaN(timeout) ? 120 : timeout,
|
|
2470
3179
|
archalDir: getArchalDir(),
|
|
@@ -2475,9 +3184,9 @@ function saveConfig(config) {
|
|
|
2475
3184
|
const dir = ensureArchalDir();
|
|
2476
3185
|
const configPath = join3(dir, CONFIG_FILE_NAME);
|
|
2477
3186
|
let existing;
|
|
2478
|
-
if (
|
|
3187
|
+
if (existsSync5(configPath)) {
|
|
2479
3188
|
try {
|
|
2480
|
-
const raw =
|
|
3189
|
+
const raw = readFileSync6(configPath, "utf-8");
|
|
2481
3190
|
existing = configFileSchema.parse(JSON.parse(raw));
|
|
2482
3191
|
} catch {
|
|
2483
3192
|
existing = configFileSchema.parse({});
|
|
@@ -2491,31 +3200,27 @@ function saveConfig(config) {
|
|
|
2491
3200
|
...existing.evaluator,
|
|
2492
3201
|
...config.evaluator
|
|
2493
3202
|
},
|
|
3203
|
+
seedGeneration: {
|
|
3204
|
+
...existing.seedGeneration,
|
|
3205
|
+
...config.seedGeneration
|
|
3206
|
+
},
|
|
2494
3207
|
defaults: {
|
|
2495
3208
|
...existing.defaults,
|
|
2496
3209
|
...config.defaults
|
|
2497
3210
|
}
|
|
2498
3211
|
};
|
|
2499
|
-
writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", "utf-8");
|
|
2500
|
-
try {
|
|
2501
|
-
chmodSync(configPath, 384);
|
|
2502
|
-
} catch {
|
|
2503
|
-
}
|
|
3212
|
+
writeFileSync3(configPath, JSON.stringify(merged, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
|
|
2504
3213
|
debug("Saved config file", { path: configPath });
|
|
2505
3214
|
}
|
|
2506
3215
|
function initConfig() {
|
|
2507
3216
|
const configPath = getConfigPath();
|
|
2508
|
-
if (
|
|
3217
|
+
if (existsSync5(configPath)) {
|
|
2509
3218
|
warn(`Config file already exists at ${configPath}`);
|
|
2510
3219
|
return configPath;
|
|
2511
3220
|
}
|
|
2512
3221
|
const defaultConfig = configFileSchema.parse({});
|
|
2513
3222
|
ensureArchalDir();
|
|
2514
|
-
writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", "utf-8");
|
|
2515
|
-
try {
|
|
2516
|
-
chmodSync(configPath, 384);
|
|
2517
|
-
} catch {
|
|
2518
|
-
}
|
|
3223
|
+
writeFileSync3(configPath, JSON.stringify(defaultConfig, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
|
|
2519
3224
|
return configPath;
|
|
2520
3225
|
}
|
|
2521
3226
|
function setConfigValue(key, value) {
|
|
@@ -2530,13 +3235,20 @@ function setConfigValue(key, value) {
|
|
|
2530
3235
|
}
|
|
2531
3236
|
if (parts.length === 2) {
|
|
2532
3237
|
const [section, prop] = parts;
|
|
2533
|
-
if (section === "evaluator" && (prop === "model" || prop === "apiKey")) {
|
|
3238
|
+
if (section === "evaluator" && (prop === "model" || prop === "apiKey" || prop === "baseUrl")) {
|
|
2534
3239
|
saveConfig({
|
|
2535
3240
|
...file,
|
|
2536
3241
|
evaluator: { ...file.evaluator, [prop]: value }
|
|
2537
3242
|
});
|
|
2538
3243
|
return;
|
|
2539
3244
|
}
|
|
3245
|
+
if (section === "seedGeneration" && (prop === "model" || prop === "geminiApiKey")) {
|
|
3246
|
+
saveConfig({
|
|
3247
|
+
...file,
|
|
3248
|
+
seedGeneration: { ...file.seedGeneration, [prop]: value }
|
|
3249
|
+
});
|
|
3250
|
+
return;
|
|
3251
|
+
}
|
|
2540
3252
|
if (section === "defaults" && (prop === "runs" || prop === "timeout")) {
|
|
2541
3253
|
const numValue = parseInt(value, 10);
|
|
2542
3254
|
if (Number.isNaN(numValue) || numValue <= 0) {
|
|
@@ -2550,7 +3262,7 @@ function setConfigValue(key, value) {
|
|
|
2550
3262
|
}
|
|
2551
3263
|
}
|
|
2552
3264
|
throw new Error(
|
|
2553
|
-
`Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout`
|
|
3265
|
+
`Unknown config key: "${key}". Valid keys: telemetry, evaluator.model, evaluator.apiKey, evaluator.baseUrl, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout`
|
|
2554
3266
|
);
|
|
2555
3267
|
}
|
|
2556
3268
|
function getConfigDisplay() {
|
|
@@ -2559,7 +3271,12 @@ function getConfigDisplay() {
|
|
|
2559
3271
|
telemetry: resolved.telemetry,
|
|
2560
3272
|
evaluator: {
|
|
2561
3273
|
model: resolved.model,
|
|
2562
|
-
apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)"
|
|
3274
|
+
apiKey: resolved.apiKey ? "***" + resolved.apiKey.slice(-4) : "(not set)",
|
|
3275
|
+
...resolved.baseUrl ? { baseUrl: resolved.baseUrl } : {}
|
|
3276
|
+
},
|
|
3277
|
+
seedGeneration: {
|
|
3278
|
+
model: resolved.seedModel,
|
|
3279
|
+
geminiApiKey: resolved.geminiApiKey ? "***" + resolved.geminiApiKey.slice(-4) : "(not set)"
|
|
2563
3280
|
},
|
|
2564
3281
|
defaults: {
|
|
2565
3282
|
runs: resolved.runs,
|
|
@@ -2580,7 +3297,7 @@ function getTracesDir() {
|
|
|
2580
3297
|
}
|
|
2581
3298
|
function ensureTracesDir() {
|
|
2582
3299
|
const dir = getTracesDir();
|
|
2583
|
-
if (!
|
|
3300
|
+
if (!existsSync6(dir)) {
|
|
2584
3301
|
ensureArchalDir();
|
|
2585
3302
|
mkdirSync3(dir, { recursive: true });
|
|
2586
3303
|
}
|
|
@@ -2590,14 +3307,14 @@ function traceFilePath(id) {
|
|
|
2590
3307
|
return join4(getTracesDir(), `${id}.json`);
|
|
2591
3308
|
}
|
|
2592
3309
|
function traceJsonFiles(dir) {
|
|
2593
|
-
return
|
|
3310
|
+
return existsSync6(dir) ? readdirSync(dir).filter((f) => f.endsWith(".json")).sort().reverse() : [];
|
|
2594
3311
|
}
|
|
2595
3312
|
function toMetadata(s) {
|
|
2596
3313
|
return { id: s.id, scenarioTitle: s.scenarioTitle, timestamp: s.timestamp, satisfactionScore: s.satisfactionScore, runCount: s.runCount, entryCount: s.entries.length };
|
|
2597
3314
|
}
|
|
2598
3315
|
function loadTraceByPath(filePath) {
|
|
2599
3316
|
try {
|
|
2600
|
-
return JSON.parse(
|
|
3317
|
+
return JSON.parse(readFileSync7(filePath, "utf-8"));
|
|
2601
3318
|
} catch (err) {
|
|
2602
3319
|
warn(`Failed to load trace: ${err instanceof Error ? err.message : String(err)}`);
|
|
2603
3320
|
return null;
|
|
@@ -2605,7 +3322,7 @@ function loadTraceByPath(filePath) {
|
|
|
2605
3322
|
}
|
|
2606
3323
|
function findTraceByPrefix(prefix) {
|
|
2607
3324
|
const dir = getTracesDir();
|
|
2608
|
-
if (!
|
|
3325
|
+
if (!existsSync6(dir)) return null;
|
|
2609
3326
|
const file = readdirSync(dir).find((f) => f.endsWith(".json") && f.replace(".json", "").startsWith(prefix));
|
|
2610
3327
|
return file ? file.replace(".json", "") : null;
|
|
2611
3328
|
}
|
|
@@ -2641,7 +3358,7 @@ function recordTrace(report) {
|
|
|
2641
3358
|
}
|
|
2642
3359
|
function loadTrace(traceId) {
|
|
2643
3360
|
const filePath = traceFilePath(traceId);
|
|
2644
|
-
if (
|
|
3361
|
+
if (existsSync6(filePath)) return loadTraceByPath(filePath);
|
|
2645
3362
|
const match = findTraceByPrefix(traceId);
|
|
2646
3363
|
return match ? loadTraceByPath(traceFilePath(match)) : null;
|
|
2647
3364
|
}
|
|
@@ -2650,7 +3367,7 @@ function listTraces(limit = 20) {
|
|
|
2650
3367
|
const results = [];
|
|
2651
3368
|
for (const file of traceJsonFiles(dir).slice(0, limit)) {
|
|
2652
3369
|
try {
|
|
2653
|
-
results.push(toMetadata(JSON.parse(
|
|
3370
|
+
results.push(toMetadata(JSON.parse(readFileSync7(join4(dir, file), "utf-8"))));
|
|
2654
3371
|
} catch {
|
|
2655
3372
|
debug(`Skipping corrupted trace file: ${file}`);
|
|
2656
3373
|
}
|
|
@@ -2664,7 +3381,7 @@ function searchTraces(options) {
|
|
|
2664
3381
|
for (const file of traceJsonFiles(dir)) {
|
|
2665
3382
|
if (results.length >= limit) break;
|
|
2666
3383
|
try {
|
|
2667
|
-
const stored = JSON.parse(
|
|
3384
|
+
const stored = JSON.parse(readFileSync7(join4(dir, file), "utf-8"));
|
|
2668
3385
|
if (options.scenario && !stored.scenarioTitle.toLowerCase().includes(options.scenario.toLowerCase())) continue;
|
|
2669
3386
|
if (options.minScore !== void 0 && stored.satisfactionScore < options.minScore) continue;
|
|
2670
3387
|
if (options.maxScore !== void 0 && stored.satisfactionScore > options.maxScore) continue;
|
|
@@ -2679,7 +3396,7 @@ function searchTraces(options) {
|
|
|
2679
3396
|
}
|
|
2680
3397
|
function deleteTrace(traceId) {
|
|
2681
3398
|
let filePath = traceFilePath(traceId);
|
|
2682
|
-
if (!
|
|
3399
|
+
if (!existsSync6(filePath)) {
|
|
2683
3400
|
const match = findTraceByPrefix(traceId);
|
|
2684
3401
|
if (!match) return false;
|
|
2685
3402
|
filePath = traceFilePath(match);
|
|
@@ -2695,7 +3412,7 @@ function deleteTrace(traceId) {
|
|
|
2695
3412
|
}
|
|
2696
3413
|
function deleteAllTraces() {
|
|
2697
3414
|
const dir = getTracesDir();
|
|
2698
|
-
if (!
|
|
3415
|
+
if (!existsSync6(dir)) return 0;
|
|
2699
3416
|
let deleted = 0;
|
|
2700
3417
|
for (const file of readdirSync(dir).filter((f) => f.endsWith(".json"))) {
|
|
2701
3418
|
try {
|
|
@@ -2732,7 +3449,7 @@ function getTraceStats() {
|
|
|
2732
3449
|
const filePath = join4(dir, file);
|
|
2733
3450
|
try {
|
|
2734
3451
|
diskUsageBytes += statSync(filePath).size;
|
|
2735
|
-
const stored = JSON.parse(
|
|
3452
|
+
const stored = JSON.parse(readFileSync7(filePath, "utf-8"));
|
|
2736
3453
|
scores.push(stored.satisfactionScore);
|
|
2737
3454
|
totalRuns += stored.runCount;
|
|
2738
3455
|
totalEntries += stored.entries.length;
|
|
@@ -2979,9 +3696,28 @@ function anonymizeTrace(entries) {
|
|
|
2979
3696
|
}
|
|
2980
3697
|
|
|
2981
3698
|
// src/telemetry/consent.ts
|
|
2982
|
-
import { existsSync as
|
|
3699
|
+
import { existsSync as existsSync7, readFileSync as readFileSync9, writeFileSync as writeFileSync5, unlinkSync as unlinkSync3 } from "fs";
|
|
2983
3700
|
import { join as join5 } from "path";
|
|
2984
3701
|
import { createInterface } from "readline";
|
|
3702
|
+
|
|
3703
|
+
// src/utils/version.ts
|
|
3704
|
+
import { readFileSync as readFileSync8 } from "fs";
|
|
3705
|
+
import { resolve as resolve5 } from "path";
|
|
3706
|
+
import { fileURLToPath as fileURLToPath3 } from "url";
|
|
3707
|
+
var __dirname3 = fileURLToPath3(new URL(".", import.meta.url));
|
|
3708
|
+
function loadVersion() {
|
|
3709
|
+
try {
|
|
3710
|
+
const pkgPath = resolve5(__dirname3, "..", "package.json");
|
|
3711
|
+
const pkg = JSON.parse(readFileSync8(pkgPath, "utf-8"));
|
|
3712
|
+
return typeof pkg.version === "string" ? pkg.version : "0.0.0";
|
|
3713
|
+
} catch {
|
|
3714
|
+
return "0.0.0";
|
|
3715
|
+
}
|
|
3716
|
+
}
|
|
3717
|
+
var CLI_VERSION = loadVersion();
|
|
3718
|
+
var CLI_USER_AGENT = `archal-cli/${CLI_VERSION}`;
|
|
3719
|
+
|
|
3720
|
+
// src/telemetry/consent.ts
|
|
2985
3721
|
var CONSENT_FILE = ".telemetry-consent";
|
|
2986
3722
|
var TELEMETRY_NOTICE = `
|
|
2987
3723
|
Archal collects anonymous usage telemetry to improve the product.
|
|
@@ -3007,7 +3743,7 @@ function getConsentStatus() {
|
|
|
3007
3743
|
const env = process.env["ARCHAL_TELEMETRY"];
|
|
3008
3744
|
if (env !== void 0) return env === "true" ? "granted" : "denied";
|
|
3009
3745
|
try {
|
|
3010
|
-
const record = JSON.parse(
|
|
3746
|
+
const record = JSON.parse(readFileSync9(consentPath(), "utf-8"));
|
|
3011
3747
|
return record.status;
|
|
3012
3748
|
} catch {
|
|
3013
3749
|
return "pending";
|
|
@@ -3015,7 +3751,7 @@ function getConsentStatus() {
|
|
|
3015
3751
|
}
|
|
3016
3752
|
function saveConsent(status) {
|
|
3017
3753
|
const dir = ensureArchalDir();
|
|
3018
|
-
const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version:
|
|
3754
|
+
const record = { status, timestamp: (/* @__PURE__ */ new Date()).toISOString(), version: CLI_VERSION };
|
|
3019
3755
|
writeFileSync5(join5(dir, CONSENT_FILE), JSON.stringify(record, null, 2) + "\n", "utf-8");
|
|
3020
3756
|
debug("Saved telemetry consent", { status });
|
|
3021
3757
|
}
|
|
@@ -3033,7 +3769,7 @@ async function promptForConsent() {
|
|
|
3033
3769
|
}
|
|
3034
3770
|
process.stderr.write(TELEMETRY_NOTICE);
|
|
3035
3771
|
const rl = createInterface({ input: process.stdin, output: process.stderr });
|
|
3036
|
-
return new Promise((
|
|
3772
|
+
return new Promise((resolve13) => {
|
|
3037
3773
|
rl.question("\nEnable anonymous telemetry? [y/N] ", (answer) => {
|
|
3038
3774
|
rl.close();
|
|
3039
3775
|
const enabled = answer.trim().toLowerCase() === "y";
|
|
@@ -3044,7 +3780,7 @@ async function promptForConsent() {
|
|
|
3044
3780
|
denyConsent();
|
|
3045
3781
|
process.stderr.write("\nTelemetry disabled.\n\n");
|
|
3046
3782
|
}
|
|
3047
|
-
|
|
3783
|
+
resolve13(enabled);
|
|
3048
3784
|
});
|
|
3049
3785
|
});
|
|
3050
3786
|
}
|
|
@@ -3053,11 +3789,11 @@ async function ensureConsentResolved() {
|
|
|
3053
3789
|
}
|
|
3054
3790
|
|
|
3055
3791
|
// src/telemetry/uploader.ts
|
|
3056
|
-
var ENDPOINT = "https://api.archal.dev/v1/traces";
|
|
3792
|
+
var ENDPOINT = process.env["ARCHAL_TELEMETRY_URL"] ?? "https://api.archal.dev/v1/traces";
|
|
3057
3793
|
var BATCH_SIZE = 50;
|
|
3058
3794
|
var MAX_RETRIES = 3;
|
|
3059
3795
|
var BASE_RETRY_DELAY_MS = 1e3;
|
|
3060
|
-
var
|
|
3796
|
+
var REQUEST_TIMEOUT_MS2 = 3e4;
|
|
3061
3797
|
var RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 429, 500, 502, 503, 504]);
|
|
3062
3798
|
function isTelemetryEnabled() {
|
|
3063
3799
|
const consent = getConsentStatus();
|
|
@@ -3072,7 +3808,7 @@ function buildMetadata(report, totalEntries) {
|
|
|
3072
3808
|
if (prefix) twinNames.add(prefix);
|
|
3073
3809
|
}
|
|
3074
3810
|
return {
|
|
3075
|
-
cliVersion:
|
|
3811
|
+
cliVersion: CLI_VERSION,
|
|
3076
3812
|
nodeVersion: process.version,
|
|
3077
3813
|
platform: process.platform,
|
|
3078
3814
|
arch: process.arch,
|
|
@@ -3106,7 +3842,7 @@ async function sendBatchWithRetry(payload, batchNum, totalBatches) {
|
|
|
3106
3842
|
alreadySlept = false;
|
|
3107
3843
|
try {
|
|
3108
3844
|
const controller = new AbortController();
|
|
3109
|
-
const timeoutId = setTimeout(() => controller.abort(),
|
|
3845
|
+
const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS2);
|
|
3110
3846
|
const body = JSON.stringify(payload);
|
|
3111
3847
|
debug(`Sending batch ${batchNum}/${totalBatches}`, { entries: String(payload.entries.length), sizeBytes: String(body.length) });
|
|
3112
3848
|
const response = await fetch(ENDPOINT, {
|
|
@@ -3201,8 +3937,7 @@ async function uploadIfEnabled(traceId, report) {
|
|
|
3201
3937
|
}
|
|
3202
3938
|
|
|
3203
3939
|
// src/runner/dynamic-seed-generator.ts
|
|
3204
|
-
import
|
|
3205
|
-
import { z as z2 } from "zod";
|
|
3940
|
+
import { z as z3 } from "zod";
|
|
3206
3941
|
|
|
3207
3942
|
// src/runner/seed-patch.ts
|
|
3208
3943
|
var TWINS_WITHOUT_SEED_FILE_SUPPORT = /* @__PURE__ */ new Set(["supabase"]);
|
|
@@ -3408,7 +4143,7 @@ function getProjectedEntities(baseSeed, patch, collection) {
|
|
|
3408
4143
|
|
|
3409
4144
|
// src/runner/seed-cache.ts
|
|
3410
4145
|
import { createHash as createHash2 } from "crypto";
|
|
3411
|
-
import { existsSync as
|
|
4146
|
+
import { existsSync as existsSync8, mkdirSync as mkdirSync4, readFileSync as readFileSync10, writeFileSync as writeFileSync6, readdirSync as readdirSync2, unlinkSync as unlinkSync4, statSync as statSync2 } from "fs";
|
|
3412
4147
|
import { join as join6 } from "path";
|
|
3413
4148
|
import { homedir as homedir2 } from "os";
|
|
3414
4149
|
var CACHE_VERSION = 1;
|
|
@@ -3419,13 +4154,13 @@ function cacheKey(twinName, baseSeedName, setupText) {
|
|
|
3419
4154
|
return hash.slice(0, 32);
|
|
3420
4155
|
}
|
|
3421
4156
|
function ensureCacheDir() {
|
|
3422
|
-
if (!
|
|
4157
|
+
if (!existsSync8(CACHE_DIR)) {
|
|
3423
4158
|
mkdirSync4(CACHE_DIR, { recursive: true });
|
|
3424
4159
|
}
|
|
3425
4160
|
}
|
|
3426
4161
|
function evictStaleEntries() {
|
|
3427
4162
|
try {
|
|
3428
|
-
if (!
|
|
4163
|
+
if (!existsSync8(CACHE_DIR)) return;
|
|
3429
4164
|
const now = Date.now();
|
|
3430
4165
|
for (const file of readdirSync2(CACHE_DIR)) {
|
|
3431
4166
|
if (!file.endsWith(".json")) continue;
|
|
@@ -3445,7 +4180,7 @@ function getCachedSeed(twinName, baseSeedName, setupText) {
|
|
|
3445
4180
|
const filePath = join6(CACHE_DIR, `${key}.json`);
|
|
3446
4181
|
let raw;
|
|
3447
4182
|
try {
|
|
3448
|
-
raw =
|
|
4183
|
+
raw = readFileSync10(filePath, "utf-8");
|
|
3449
4184
|
} catch {
|
|
3450
4185
|
return null;
|
|
3451
4186
|
}
|
|
@@ -3483,26 +4218,57 @@ function cacheSeed(twinName, baseSeedName, setupText, seed, patch) {
|
|
|
3483
4218
|
}
|
|
3484
4219
|
|
|
3485
4220
|
// src/runner/dynamic-seed-generator.ts
|
|
3486
|
-
var SeedPatchSchema =
|
|
3487
|
-
add:
|
|
3488
|
-
modify:
|
|
3489
|
-
remove:
|
|
4221
|
+
var SeedPatchSchema = z3.object({
|
|
4222
|
+
add: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
|
|
4223
|
+
modify: z3.record(z3.array(z3.record(z3.unknown()))).optional(),
|
|
4224
|
+
remove: z3.record(z3.array(z3.number())).optional()
|
|
3490
4225
|
}).strict();
|
|
3491
|
-
var
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
4226
|
+
var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
|
|
4227
|
+
async function callGemini2(apiKey, model, systemPrompt, userPrompt, maxOutputTokens) {
|
|
4228
|
+
const url = `${GEMINI_BASE_URL}/${model}:generateContent`;
|
|
4229
|
+
const controller = new AbortController();
|
|
4230
|
+
const timeout = setTimeout(() => controller.abort(), 6e4);
|
|
4231
|
+
try {
|
|
4232
|
+
const response = await fetch(url, {
|
|
4233
|
+
method: "POST",
|
|
4234
|
+
headers: { "Content-Type": "application/json", "x-goog-api-key": apiKey },
|
|
4235
|
+
body: JSON.stringify({
|
|
4236
|
+
systemInstruction: { parts: [{ text: systemPrompt }] },
|
|
4237
|
+
contents: [{ parts: [{ text: userPrompt }] }],
|
|
4238
|
+
generationConfig: {
|
|
4239
|
+
maxOutputTokens,
|
|
4240
|
+
responseMimeType: "application/json"
|
|
4241
|
+
}
|
|
4242
|
+
}),
|
|
4243
|
+
signal: controller.signal
|
|
4244
|
+
});
|
|
4245
|
+
clearTimeout(timeout);
|
|
4246
|
+
if (response.status === 429 || response.status >= 500) {
|
|
4247
|
+
warn(`Gemini API returned ${response.status}, will retry`);
|
|
4248
|
+
return { text: null, truncated: false };
|
|
4249
|
+
}
|
|
4250
|
+
if (!response.ok) {
|
|
4251
|
+
const errorText = await response.text();
|
|
4252
|
+
warn(`Gemini API error: ${response.status} ${errorText}`);
|
|
4253
|
+
return { text: null, truncated: false };
|
|
4254
|
+
}
|
|
4255
|
+
const data = await response.json();
|
|
4256
|
+
const text = data.candidates?.[0]?.content?.parts?.[0]?.text ?? null;
|
|
4257
|
+
const truncated = data.candidates?.[0]?.finishReason === "MAX_TOKENS";
|
|
4258
|
+
return { text, truncated };
|
|
4259
|
+
} catch (err) {
|
|
4260
|
+
clearTimeout(timeout);
|
|
4261
|
+
throw err;
|
|
3497
4262
|
}
|
|
3498
|
-
return clientInstance2;
|
|
3499
4263
|
}
|
|
3500
4264
|
var SYSTEM_PROMPT2 = `You are a test data generator for Archal, a testing platform for AI agents. Your job is to generate seed data patches that create realistic digital twin states matching a given setup description.
|
|
3501
4265
|
|
|
4266
|
+
CRITICAL CONTEXT: The seed data you generate is what an AI agent will interact with during a test scenario. The agent connects to a digital twin (a behavioral clone of a real service like Slack, GitHub, or Stripe) and uses API calls to read and act on the data. If a message, user, channel, issue, or any other entity described in the setup is NOT present in the seed data, the agent literally cannot find or interact with it, and the test will fail. You must faithfully reproduce EVERY specific detail from the setup description.
|
|
4267
|
+
|
|
3502
4268
|
You will receive:
|
|
3503
|
-
1. The twin type (e.g., "github", "slack")
|
|
3504
|
-
2. A sample of the base seed data showing the
|
|
3505
|
-
3. The current max
|
|
4269
|
+
1. The twin type (e.g., "github", "slack", "stripe")
|
|
4270
|
+
2. A sample of the base seed data showing the exact schema of each entity type
|
|
4271
|
+
3. The current entity counts and max IDs per collection
|
|
3506
4272
|
4. Referential integrity rules
|
|
3507
4273
|
5. A natural language setup description
|
|
3508
4274
|
|
|
@@ -3521,23 +4287,60 @@ Respond with ONLY valid JSON in this exact format:
|
|
|
3521
4287
|
}
|
|
3522
4288
|
}
|
|
3523
4289
|
|
|
3524
|
-
|
|
4290
|
+
## FAITHFULNESS RULES (most important)
|
|
4291
|
+
|
|
4292
|
+
- EVERY specific detail in the setup description MUST be represented in the seed data. This includes:
|
|
4293
|
+
- Exact usernames, display names, and user IDs mentioned
|
|
4294
|
+
- Exact channel names (including whether they are public or private)
|
|
4295
|
+
- Exact message text \u2014 if the setup contains quoted text, it must appear VERBATIM in a message entity's "text" field
|
|
4296
|
+
- Exact dollar amounts, invoice numbers, account numbers
|
|
4297
|
+
- Exact repository names, organization names, issue titles
|
|
4298
|
+
- Exact labels, categories, and statuses
|
|
4299
|
+
- Specific member counts and membership lists
|
|
4300
|
+
- If the setup says a user "mark.wilson" exists and a DIFFERENT user "markwilson-ceo" sent a message, you must create BOTH users with those exact usernames
|
|
4301
|
+
- If the setup quotes a message like "URGENT \u2014 I need you to process...", that exact text must be in a message entity
|
|
4302
|
+
- Company/workspace names in the setup override whatever is in the base seed \u2014 modify the workspace entity accordingly
|
|
4303
|
+
- If the setup mentions a channel has N members, include at least the named users plus enough additional users to reach that count
|
|
4304
|
+
|
|
4305
|
+
## SERVICE-SPECIFIC GUIDANCE
|
|
4306
|
+
|
|
4307
|
+
### Slack
|
|
4308
|
+
- Users need: user_id (format "UXXXX"), name, real_name, display_name, is_bot, is_admin
|
|
4309
|
+
- Channels need: channel_id (format "CXXXX"), name, is_private, members (array of user_ids)
|
|
4310
|
+
- Messages need: ts (unique Slack timestamp like "1706140800.100001"), channel_id, user_id, text, thread_ts (null for top-level, parent's ts for replies), reply_count, reply_users, latest_reply, subtype, edited
|
|
4311
|
+
- For threaded conversations: the parent message has reply_count > 0 and reply_users populated. Reply messages have thread_ts set to the parent's ts
|
|
4312
|
+
- A user must be in a channel's members array to post messages in that channel
|
|
4313
|
+
|
|
4314
|
+
### GitHub
|
|
4315
|
+
- Repos need: owner (the org or user name), name, fullName ("owner/name"), isPrivate
|
|
4316
|
+
- Issues need: repoId, number (sequential), title, body, state ("open"/"closed"), labels (array of label names), user (creator username)
|
|
4317
|
+
- If setup mentions both public and private repos, create both with correct isPrivate values
|
|
4318
|
+
|
|
4319
|
+
### Stripe
|
|
4320
|
+
- Accounts need: accountId, businessName, defaultCurrency, chargesEnabled, payoutsEnabled
|
|
4321
|
+
- Customers need: customerId ("cus_xxx"), name, email, balance (in cents)
|
|
4322
|
+
- PaymentIntents need: paymentIntentId, amount (in cents), currency, status
|
|
4323
|
+
- The account's businessName should match the company name in the setup
|
|
4324
|
+
- Stripe amounts are always in the smallest currency unit (cents for USD \u2014 $24,800 = 2480000)
|
|
4325
|
+
|
|
4326
|
+
## STRUCTURAL RULES
|
|
4327
|
+
|
|
3525
4328
|
- Only include sections (add/modify/remove) and collections that need changes
|
|
3526
4329
|
- Do NOT include id, createdAt, or updatedAt in added entities \u2014 they are auto-assigned
|
|
3527
4330
|
- For modify, include the existing entity's id and only the fields to change
|
|
3528
4331
|
- Maintain referential integrity per the rules provided
|
|
3529
|
-
- Use realistic data (real-looking names, descriptions, timestamps in ISO 8601)
|
|
3530
4332
|
- Match the field types and formats exactly as shown in the base seed example
|
|
3531
4333
|
- If the setup mentions specific counts (e.g., "20 issues"), generate that exact count
|
|
3532
4334
|
- Keep data internally consistent (e.g., issue numbers sequential, branch refs valid)
|
|
4335
|
+
- Use unique ts values for each Slack message (increment by 100+ between messages)
|
|
3533
4336
|
- If the base seed already matches the setup description, respond with {}`;
|
|
3534
|
-
function truncateBaseSeed(baseSeed) {
|
|
4337
|
+
function truncateBaseSeed(baseSeed, maxPerCollection = 2) {
|
|
3535
4338
|
const truncated = {};
|
|
3536
4339
|
for (const [collection, entities] of Object.entries(baseSeed)) {
|
|
3537
4340
|
if (entities.length === 0) {
|
|
3538
4341
|
truncated[collection] = [];
|
|
3539
4342
|
} else {
|
|
3540
|
-
truncated[collection] =
|
|
4343
|
+
truncated[collection] = entities.slice(0, maxPerCollection);
|
|
3541
4344
|
}
|
|
3542
4345
|
}
|
|
3543
4346
|
return truncated;
|
|
@@ -3560,7 +4363,7 @@ function buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription) {
|
|
|
3560
4363
|
let prompt = `## Twin: ${twinName}
|
|
3561
4364
|
|
|
3562
4365
|
`;
|
|
3563
|
-
prompt += `## Base Seed (
|
|
4366
|
+
prompt += `## Base Seed (sample entities per collection, showing exact data shape)
|
|
3564
4367
|
`;
|
|
3565
4368
|
prompt += `\`\`\`json
|
|
3566
4369
|
${JSON.stringify(truncated, null, 2)}
|
|
@@ -3575,6 +4378,10 @@ ${JSON.stringify(truncated, null, 2)}
|
|
|
3575
4378
|
`;
|
|
3576
4379
|
prompt += Object.entries(maxIds).map(([col, id]) => `- ${col}: ${id}`).join("\n");
|
|
3577
4380
|
prompt += "\n\n";
|
|
4381
|
+
prompt += `## Available collections
|
|
4382
|
+
`;
|
|
4383
|
+
prompt += Object.keys(baseSeedData).map((col) => `- ${col}`).join("\n");
|
|
4384
|
+
prompt += "\n\n";
|
|
3578
4385
|
if (relationships.length > 0) {
|
|
3579
4386
|
prompt += `## Referential integrity rules
|
|
3580
4387
|
`;
|
|
@@ -3582,6 +4389,8 @@ ${JSON.stringify(truncated, null, 2)}
|
|
|
3582
4389
|
prompt += "\n\n";
|
|
3583
4390
|
}
|
|
3584
4391
|
prompt += `## Setup Description
|
|
4392
|
+
Generate seed data that faithfully reproduces EVERY detail below. Specific names, messages, amounts, and entities mentioned MUST exist in the generated data.
|
|
4393
|
+
|
|
3585
4394
|
${setupDescription}`;
|
|
3586
4395
|
return prompt;
|
|
3587
4396
|
}
|
|
@@ -3621,11 +4430,10 @@ async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDe
|
|
|
3621
4430
|
return { seed: cached.seed, patch: cached.patch, fromCache: true };
|
|
3622
4431
|
}
|
|
3623
4432
|
}
|
|
3624
|
-
if (!config.
|
|
3625
|
-
warn("No API key for dynamic seed generation, using base seed");
|
|
4433
|
+
if (!config.geminiApiKey) {
|
|
4434
|
+
warn("No Gemini API key for dynamic seed generation, using base seed");
|
|
3626
4435
|
return { seed: baseSeedData, patch: {}, fromCache: false };
|
|
3627
4436
|
}
|
|
3628
|
-
const client = getClient2(config.apiKey);
|
|
3629
4437
|
const userPrompt = buildSeedGenerationPrompt(twinName, baseSeedData, setupDescription);
|
|
3630
4438
|
progress(`Generating dynamic seed for ${twinName}...`);
|
|
3631
4439
|
let patch = null;
|
|
@@ -3641,27 +4449,27 @@ Fix these issues:
|
|
|
3641
4449
|
`;
|
|
3642
4450
|
promptWithFeedback += lastErrors.map((e) => `- ${e}`).join("\n");
|
|
3643
4451
|
}
|
|
3644
|
-
debug("Calling
|
|
4452
|
+
debug("Calling Gemini for dynamic seed", {
|
|
3645
4453
|
twin: twinName,
|
|
3646
4454
|
model: config.model,
|
|
3647
4455
|
attempt: String(attempt + 1)
|
|
3648
4456
|
});
|
|
3649
|
-
const
|
|
3650
|
-
|
|
3651
|
-
|
|
3652
|
-
|
|
3653
|
-
|
|
3654
|
-
|
|
3655
|
-
|
|
3656
|
-
|
|
4457
|
+
const result = await callGemini2(
|
|
4458
|
+
config.geminiApiKey,
|
|
4459
|
+
config.model,
|
|
4460
|
+
SYSTEM_PROMPT2,
|
|
4461
|
+
promptWithFeedback,
|
|
4462
|
+
16384
|
|
4463
|
+
);
|
|
4464
|
+
if (result.truncated) {
|
|
4465
|
+
warn("Gemini response was truncated (hit max output tokens), retrying");
|
|
3657
4466
|
continue;
|
|
3658
4467
|
}
|
|
3659
|
-
|
|
3660
|
-
|
|
3661
|
-
warn("LLM returned no text content for dynamic seed");
|
|
4468
|
+
if (!result.text) {
|
|
4469
|
+
warn("Gemini returned no text content for dynamic seed");
|
|
3662
4470
|
continue;
|
|
3663
4471
|
}
|
|
3664
|
-
patch = parseSeedPatchResponse(
|
|
4472
|
+
patch = parseSeedPatchResponse(result.text);
|
|
3665
4473
|
if (!patch) continue;
|
|
3666
4474
|
const validation = validateSeedPatch(patch, baseSeedData, twinName);
|
|
3667
4475
|
if (!validation.valid) {
|
|
@@ -3693,11 +4501,11 @@ Fix these issues:
|
|
|
3693
4501
|
|
|
3694
4502
|
// src/commands/doctor.ts
|
|
3695
4503
|
import { Command } from "commander";
|
|
3696
|
-
import { existsSync as
|
|
3697
|
-
import { resolve as
|
|
4504
|
+
import { existsSync as existsSync9, readFileSync as readFileSync11 } from "fs";
|
|
4505
|
+
import { resolve as resolve6 } from "path";
|
|
3698
4506
|
import { createRequire as createRequire3 } from "module";
|
|
3699
|
-
import { fileURLToPath as
|
|
3700
|
-
var
|
|
4507
|
+
import { fileURLToPath as fileURLToPath4 } from "url";
|
|
4508
|
+
var __dirname4 = fileURLToPath4(new URL(".", import.meta.url));
|
|
3701
4509
|
var PASS = `${GREEN}${BOLD}pass${RESET}`;
|
|
3702
4510
|
var FAIL = `${RED}${BOLD}FAIL${RESET}`;
|
|
3703
4511
|
var WARN_TAG = `${YELLOW}${BOLD}warn${RESET}`;
|
|
@@ -3712,20 +4520,20 @@ var KNOWN_TWINS = [
|
|
|
3712
4520
|
"google-workspace"
|
|
3713
4521
|
];
|
|
3714
4522
|
function resolveMonorepoRoot2() {
|
|
3715
|
-
let cursor =
|
|
4523
|
+
let cursor = __dirname4;
|
|
3716
4524
|
for (let depth = 0; depth < 8; depth += 1) {
|
|
3717
|
-
const hasTwinsDir =
|
|
3718
|
-
const hasWorkspacePackage =
|
|
4525
|
+
const hasTwinsDir = existsSync9(resolve6(cursor, "twins"));
|
|
4526
|
+
const hasWorkspacePackage = existsSync9(resolve6(cursor, "package.json"));
|
|
3719
4527
|
if (hasTwinsDir && hasWorkspacePackage) {
|
|
3720
4528
|
return cursor;
|
|
3721
4529
|
}
|
|
3722
|
-
const parent =
|
|
4530
|
+
const parent = resolve6(cursor, "..");
|
|
3723
4531
|
if (parent === cursor) {
|
|
3724
4532
|
break;
|
|
3725
4533
|
}
|
|
3726
4534
|
cursor = parent;
|
|
3727
4535
|
}
|
|
3728
|
-
return
|
|
4536
|
+
return resolve6(__dirname4, "..", "..");
|
|
3729
4537
|
}
|
|
3730
4538
|
function statusTag(status) {
|
|
3731
4539
|
switch (status) {
|
|
@@ -3756,7 +4564,7 @@ function checkNodeVersion() {
|
|
|
3756
4564
|
}
|
|
3757
4565
|
function checkArchalDir() {
|
|
3758
4566
|
const dir = getArchalDir();
|
|
3759
|
-
if (
|
|
4567
|
+
if (existsSync9(dir)) {
|
|
3760
4568
|
return {
|
|
3761
4569
|
name: "Archal directory",
|
|
3762
4570
|
status: "pass",
|
|
@@ -3772,7 +4580,7 @@ function checkArchalDir() {
|
|
|
3772
4580
|
}
|
|
3773
4581
|
function checkConfigFile() {
|
|
3774
4582
|
const path = getConfigPath();
|
|
3775
|
-
if (
|
|
4583
|
+
if (existsSync9(path)) {
|
|
3776
4584
|
return {
|
|
3777
4585
|
name: "Config file",
|
|
3778
4586
|
status: "pass",
|
|
@@ -3788,25 +4596,38 @@ function checkConfigFile() {
|
|
|
3788
4596
|
}
|
|
3789
4597
|
function checkApiKey() {
|
|
3790
4598
|
const config = loadConfig();
|
|
3791
|
-
|
|
3792
|
-
|
|
4599
|
+
const provider = detectProvider(config.model);
|
|
4600
|
+
const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
|
|
4601
|
+
const envVar = getProviderEnvVar(provider);
|
|
4602
|
+
const label = provider === "openai-compatible" ? `custom: ${config.model}` : provider;
|
|
4603
|
+
if (provider === "openai-compatible" && !config.baseUrl) {
|
|
3793
4604
|
return {
|
|
3794
|
-
name:
|
|
4605
|
+
name: `Evaluator API key (${label})`,
|
|
4606
|
+
status: "fail",
|
|
4607
|
+
message: "No base URL configured",
|
|
4608
|
+
detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
|
|
4609
|
+
};
|
|
4610
|
+
}
|
|
4611
|
+
if (resolvedKey && resolvedKey.length > 0) {
|
|
4612
|
+
const masked = "***" + resolvedKey.slice(-4);
|
|
4613
|
+
return {
|
|
4614
|
+
name: `Evaluator API key (${label})`,
|
|
3795
4615
|
status: "pass",
|
|
3796
4616
|
message: `Set (${masked})`
|
|
3797
4617
|
};
|
|
3798
4618
|
}
|
|
3799
4619
|
return {
|
|
3800
|
-
name:
|
|
4620
|
+
name: `Evaluator API key (${label})`,
|
|
3801
4621
|
status: "fail",
|
|
3802
4622
|
message: "Not set",
|
|
3803
|
-
detail:
|
|
4623
|
+
detail: `Required for probabilistic ([P]) criteria evaluation. Set via: export ${envVar}=<your-key>`
|
|
3804
4624
|
};
|
|
3805
4625
|
}
|
|
3806
4626
|
function checkTwinAvailability(twinName) {
|
|
3807
4627
|
const monorepoRoot = resolveMonorepoRoot2();
|
|
3808
|
-
const
|
|
3809
|
-
|
|
4628
|
+
const hasTwinsDir = existsSync9(resolve6(monorepoRoot, "twins"));
|
|
4629
|
+
const distPath = resolve6(monorepoRoot, "twins", twinName, "dist", "index.js");
|
|
4630
|
+
if (existsSync9(distPath)) {
|
|
3810
4631
|
return {
|
|
3811
4632
|
name: `Twin: ${twinName}`,
|
|
3812
4633
|
status: "pass",
|
|
@@ -3823,8 +4644,8 @@ function checkTwinAvailability(twinName) {
|
|
|
3823
4644
|
};
|
|
3824
4645
|
} catch {
|
|
3825
4646
|
}
|
|
3826
|
-
const srcPath =
|
|
3827
|
-
if (
|
|
4647
|
+
const srcPath = resolve6(monorepoRoot, "twins", twinName, "src", "index.ts");
|
|
4648
|
+
if (existsSync9(srcPath)) {
|
|
3828
4649
|
return {
|
|
3829
4650
|
name: `Twin: ${twinName}`,
|
|
3830
4651
|
status: "warn",
|
|
@@ -3832,11 +4653,18 @@ function checkTwinAvailability(twinName) {
|
|
|
3832
4653
|
detail: `Run: pnpm --filter @archal/twin-${twinName} build`
|
|
3833
4654
|
};
|
|
3834
4655
|
}
|
|
4656
|
+
if (!hasTwinsDir) {
|
|
4657
|
+
return {
|
|
4658
|
+
name: `Twin: ${twinName}`,
|
|
4659
|
+
status: "pass",
|
|
4660
|
+
message: "Cloud-hosted (via archal run)"
|
|
4661
|
+
};
|
|
4662
|
+
}
|
|
3835
4663
|
return {
|
|
3836
4664
|
name: `Twin: ${twinName}`,
|
|
3837
4665
|
status: "fail",
|
|
3838
4666
|
message: "Not found",
|
|
3839
|
-
detail: `
|
|
4667
|
+
detail: `Build with: pnpm --filter @archal/twin-${twinName} build`
|
|
3840
4668
|
};
|
|
3841
4669
|
}
|
|
3842
4670
|
function checkAgentConfig() {
|
|
@@ -3848,10 +4676,10 @@ function checkAgentConfig() {
|
|
|
3848
4676
|
message: `ARCHAL_AGENT_COMMAND="${envCommand}"`
|
|
3849
4677
|
};
|
|
3850
4678
|
}
|
|
3851
|
-
const projectConfig =
|
|
3852
|
-
if (
|
|
4679
|
+
const projectConfig = resolve6(".archal.json");
|
|
4680
|
+
if (existsSync9(projectConfig)) {
|
|
3853
4681
|
try {
|
|
3854
|
-
const raw = JSON.parse(
|
|
4682
|
+
const raw = JSON.parse(readFileSync11(projectConfig, "utf-8"));
|
|
3855
4683
|
if (raw.agent?.command) {
|
|
3856
4684
|
return {
|
|
3857
4685
|
name: "Agent command",
|
|
@@ -3876,8 +4704,8 @@ function checkAgentConfig() {
|
|
|
3876
4704
|
};
|
|
3877
4705
|
}
|
|
3878
4706
|
function checkScenario(scenarioPath) {
|
|
3879
|
-
const resolved =
|
|
3880
|
-
if (!
|
|
4707
|
+
const resolved = resolve6(scenarioPath);
|
|
4708
|
+
if (!existsSync9(resolved)) {
|
|
3881
4709
|
return {
|
|
3882
4710
|
name: `Scenario: ${scenarioPath}`,
|
|
3883
4711
|
status: "fail",
|
|
@@ -3897,13 +4725,26 @@ function checkScenario(scenarioPath) {
|
|
|
3897
4725
|
}
|
|
3898
4726
|
const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
|
|
3899
4727
|
const config = loadConfig();
|
|
3900
|
-
if (hasProbabilistic
|
|
3901
|
-
|
|
3902
|
-
|
|
3903
|
-
|
|
3904
|
-
|
|
3905
|
-
|
|
3906
|
-
|
|
4728
|
+
if (hasProbabilistic) {
|
|
4729
|
+
const provider = detectProvider(config.model);
|
|
4730
|
+
const resolvedKey = resolveProviderApiKey(config.apiKey, provider);
|
|
4731
|
+
const envVar = getProviderEnvVar(provider);
|
|
4732
|
+
if (provider === "openai-compatible" && !config.baseUrl) {
|
|
4733
|
+
return {
|
|
4734
|
+
name: `Scenario: ${scenarioPath}`,
|
|
4735
|
+
status: "fail",
|
|
4736
|
+
message: `Has [P] criteria but no base URL for ${config.model}`,
|
|
4737
|
+
detail: "Set via: archal config set evaluator.baseUrl <url> or export ARCHAL_EVALUATOR_BASE_URL=<url>"
|
|
4738
|
+
};
|
|
4739
|
+
}
|
|
4740
|
+
if (!resolvedKey) {
|
|
4741
|
+
return {
|
|
4742
|
+
name: `Scenario: ${scenarioPath}`,
|
|
4743
|
+
status: "fail",
|
|
4744
|
+
message: `Has [P] criteria but no ${envVar}`,
|
|
4745
|
+
detail: `${scenario.successCriteria.filter((c) => c.type === "probabilistic").length} probabilistic criteria require an API key`
|
|
4746
|
+
};
|
|
4747
|
+
}
|
|
3907
4748
|
}
|
|
3908
4749
|
const missingTwins = [];
|
|
3909
4750
|
for (const twin of scenario.config.twins) {
|
|
@@ -4005,27 +4846,50 @@ function createDoctorCommand() {
|
|
|
4005
4846
|
|
|
4006
4847
|
// src/auth.ts
|
|
4007
4848
|
import { spawnSync } from "child_process";
|
|
4008
|
-
import {
|
|
4849
|
+
import { existsSync as existsSync10, readFileSync as readFileSync12, unlinkSync as unlinkSync5, writeFileSync as writeFileSync7 } from "fs";
|
|
4009
4850
|
import { join as join7 } from "path";
|
|
4010
4851
|
var CREDENTIALS_FILE = "credentials.json";
|
|
4011
|
-
var
|
|
4012
|
-
|
|
4852
|
+
var AUTH_TOKEN_ENV_VAR = "ARCHAL_TOKEN";
|
|
4853
|
+
function normalizeAuthUrl(value) {
|
|
4854
|
+
const trimmed = value.trim().replace(/\/+$/, "");
|
|
4855
|
+
return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
|
|
4856
|
+
}
|
|
4857
|
+
var AUTH_BASE_URL = normalizeAuthUrl(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
|
|
4858
|
+
var REQUEST_TIMEOUT_MS3 = 8e3;
|
|
4859
|
+
var ENV_TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
|
|
4013
4860
|
function getCredentialsPath() {
|
|
4014
4861
|
return join7(ensureArchalDir(), CREDENTIALS_FILE);
|
|
4015
4862
|
}
|
|
4016
4863
|
function isPlan(value) {
|
|
4017
4864
|
return value === "free" || value === "pro" || value === "enterprise";
|
|
4018
4865
|
}
|
|
4866
|
+
function isTokenDerivedIdentity(email) {
|
|
4867
|
+
return email === "(from ARCHAL_TOKEN)" || email === "(from token)";
|
|
4868
|
+
}
|
|
4869
|
+
function logRefreshFailure(creds, reason) {
|
|
4870
|
+
if (isTokenDerivedIdentity(creds.email)) {
|
|
4871
|
+
warn(
|
|
4872
|
+
`Could not verify token with ${AUTH_BASE_URL}/auth/me (${reason}). Using token without refreshed account metadata.`
|
|
4873
|
+
);
|
|
4874
|
+
return;
|
|
4875
|
+
}
|
|
4876
|
+
warn(
|
|
4877
|
+
`Could not refresh account metadata from ${AUTH_BASE_URL}/auth/me (${reason}). Using cached credentials.`
|
|
4878
|
+
);
|
|
4879
|
+
}
|
|
4019
4880
|
function readCredentialsFile() {
|
|
4020
4881
|
const path = getCredentialsPath();
|
|
4021
|
-
if (!
|
|
4882
|
+
if (!existsSync10(path)) {
|
|
4022
4883
|
return null;
|
|
4023
4884
|
}
|
|
4024
4885
|
try {
|
|
4025
|
-
const raw =
|
|
4886
|
+
const raw = readFileSync12(path, "utf-8");
|
|
4026
4887
|
const parsed = JSON.parse(raw);
|
|
4027
4888
|
const token = typeof parsed.token === "string" ? parsed.token : typeof parsed.accessToken === "string" ? parsed.accessToken : null;
|
|
4028
4889
|
if (token === null || parsed.refreshToken !== void 0 && typeof parsed.refreshToken !== "string" || typeof parsed.email !== "string" || !isPlan(parsed.plan) || !Array.isArray(parsed.selectedTwins) || !parsed.selectedTwins.every((value) => typeof value === "string") || typeof parsed.expiresAt !== "number") {
|
|
4890
|
+
warn(
|
|
4891
|
+
`Credentials file at ${path} has missing or invalid fields. Run \`archal login\` to re-authenticate.`
|
|
4892
|
+
);
|
|
4029
4893
|
return null;
|
|
4030
4894
|
}
|
|
4031
4895
|
return {
|
|
@@ -4037,8 +4901,31 @@ function readCredentialsFile() {
|
|
|
4037
4901
|
expiresAt: parsed.expiresAt
|
|
4038
4902
|
};
|
|
4039
4903
|
} catch {
|
|
4904
|
+
warn(
|
|
4905
|
+
`Credentials file at ${path} exists but could not be parsed. Delete it and run \`archal login\` to re-authenticate.`
|
|
4906
|
+
);
|
|
4907
|
+
return null;
|
|
4908
|
+
}
|
|
4909
|
+
}
|
|
4910
|
+
function readCredentialsFromEnv() {
|
|
4911
|
+
const raw = process.env[AUTH_TOKEN_ENV_VAR];
|
|
4912
|
+
if (typeof raw !== "string") {
|
|
4913
|
+
return null;
|
|
4914
|
+
}
|
|
4915
|
+
const token = raw.trim();
|
|
4916
|
+
if (token.length === 0) {
|
|
4040
4917
|
return null;
|
|
4041
4918
|
}
|
|
4919
|
+
const nowSeconds = Math.floor(Date.now() / 1e3);
|
|
4920
|
+
return {
|
|
4921
|
+
token,
|
|
4922
|
+
refreshToken: "",
|
|
4923
|
+
email: "(from ARCHAL_TOKEN)",
|
|
4924
|
+
plan: "free",
|
|
4925
|
+
selectedTwins: [],
|
|
4926
|
+
// API keys are opaque and don't carry exp; keep env-provided token usable.
|
|
4927
|
+
expiresAt: getJwtExpiry(token) ?? nowSeconds + ENV_TOKEN_FALLBACK_TTL_SECONDS
|
|
4928
|
+
};
|
|
4042
4929
|
}
|
|
4043
4930
|
function getCredentials() {
|
|
4044
4931
|
const creds = getStoredCredentials();
|
|
@@ -4052,7 +4939,7 @@ function getCredentials() {
|
|
|
4052
4939
|
return creds;
|
|
4053
4940
|
}
|
|
4054
4941
|
function getStoredCredentials() {
|
|
4055
|
-
return readCredentialsFile();
|
|
4942
|
+
return readCredentialsFromEnv() ?? readCredentialsFile();
|
|
4056
4943
|
}
|
|
4057
4944
|
function saveCredentials(creds) {
|
|
4058
4945
|
const path = getCredentialsPath();
|
|
@@ -4060,15 +4947,11 @@ function saveCredentials(creds) {
|
|
|
4060
4947
|
accessToken: creds.token,
|
|
4061
4948
|
...creds
|
|
4062
4949
|
};
|
|
4063
|
-
writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", "utf-8");
|
|
4064
|
-
try {
|
|
4065
|
-
chmodSync2(path, 384);
|
|
4066
|
-
} catch {
|
|
4067
|
-
}
|
|
4950
|
+
writeFileSync7(path, JSON.stringify(payload, null, 2) + "\n", { encoding: "utf-8", mode: 384 });
|
|
4068
4951
|
}
|
|
4069
4952
|
function deleteCredentials() {
|
|
4070
4953
|
const path = getCredentialsPath();
|
|
4071
|
-
if (!
|
|
4954
|
+
if (!existsSync10(path)) {
|
|
4072
4955
|
return false;
|
|
4073
4956
|
}
|
|
4074
4957
|
unlinkSync5(path);
|
|
@@ -4114,21 +4997,86 @@ function requireAuth(options = {}) {
|
|
|
4114
4997
|
process.stderr.write("Tip: archal setup\n");
|
|
4115
4998
|
process.exit(1);
|
|
4116
4999
|
}
|
|
5000
|
+
function isCliTokenExchangeResponse(value) {
|
|
5001
|
+
if (!value || typeof value !== "object") return false;
|
|
5002
|
+
const data = value;
|
|
5003
|
+
return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["email"] === "string" && isPlan(data["plan"]) && Array.isArray(data["selectedTwins"]) && data["selectedTwins"].every((item) => typeof item === "string") && typeof data["expiresAt"] === "number";
|
|
5004
|
+
}
|
|
5005
|
+
function isCliRefreshResponse(value) {
|
|
5006
|
+
if (!value || typeof value !== "object") return false;
|
|
5007
|
+
const data = value;
|
|
5008
|
+
return typeof data["accessToken"] === "string" && typeof data["refreshToken"] === "string" && typeof data["expiresAt"] === "number";
|
|
5009
|
+
}
|
|
5010
|
+
async function exchangeCliAuthCode(input) {
|
|
5011
|
+
const response = await fetch(`${AUTH_BASE_URL}/auth/cli/token`, {
|
|
5012
|
+
method: "POST",
|
|
5013
|
+
headers: {
|
|
5014
|
+
"content-type": "application/json",
|
|
5015
|
+
"user-agent": CLI_USER_AGENT
|
|
5016
|
+
},
|
|
5017
|
+
body: JSON.stringify(input),
|
|
5018
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
|
|
5019
|
+
});
|
|
5020
|
+
if (!response.ok) {
|
|
5021
|
+
throw new Error(`Login failed during code exchange (${response.status})`);
|
|
5022
|
+
}
|
|
5023
|
+
const payload = await response.json();
|
|
5024
|
+
if (!isCliTokenExchangeResponse(payload)) {
|
|
5025
|
+
throw new Error("Login failed: invalid token exchange response");
|
|
5026
|
+
}
|
|
5027
|
+
return {
|
|
5028
|
+
token: payload.accessToken,
|
|
5029
|
+
refreshToken: payload.refreshToken,
|
|
5030
|
+
email: payload.email,
|
|
5031
|
+
plan: payload.plan,
|
|
5032
|
+
selectedTwins: payload.selectedTwins,
|
|
5033
|
+
expiresAt: payload.expiresAt
|
|
5034
|
+
};
|
|
5035
|
+
}
|
|
5036
|
+
async function refreshCliSession(creds) {
|
|
5037
|
+
if (!creds.refreshToken) {
|
|
5038
|
+
return null;
|
|
5039
|
+
}
|
|
5040
|
+
const response = await fetch(`${AUTH_BASE_URL}/auth/cli/refresh`, {
|
|
5041
|
+
method: "POST",
|
|
5042
|
+
headers: {
|
|
5043
|
+
"content-type": "application/json",
|
|
5044
|
+
"user-agent": CLI_USER_AGENT
|
|
5045
|
+
},
|
|
5046
|
+
body: JSON.stringify({ refreshToken: creds.refreshToken }),
|
|
5047
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
|
|
5048
|
+
});
|
|
5049
|
+
if (!response.ok) {
|
|
5050
|
+
return null;
|
|
5051
|
+
}
|
|
5052
|
+
const payload = await response.json();
|
|
5053
|
+
if (!isCliRefreshResponse(payload)) {
|
|
5054
|
+
return null;
|
|
5055
|
+
}
|
|
5056
|
+
return {
|
|
5057
|
+
...creds,
|
|
5058
|
+
token: payload.accessToken,
|
|
5059
|
+
refreshToken: payload.refreshToken,
|
|
5060
|
+
expiresAt: payload.expiresAt
|
|
5061
|
+
};
|
|
5062
|
+
}
|
|
4117
5063
|
async function refreshAuthFromServer(creds) {
|
|
4118
5064
|
try {
|
|
4119
5065
|
const response = await fetch(`${AUTH_BASE_URL}/auth/me`, {
|
|
4120
5066
|
method: "GET",
|
|
4121
5067
|
headers: {
|
|
4122
5068
|
authorization: `Bearer ${creds.token}`,
|
|
4123
|
-
"user-agent":
|
|
5069
|
+
"user-agent": CLI_USER_AGENT
|
|
4124
5070
|
},
|
|
4125
|
-
signal: AbortSignal.timeout(
|
|
5071
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS3)
|
|
4126
5072
|
});
|
|
4127
5073
|
if (!response.ok) {
|
|
5074
|
+
logRefreshFailure(creds, `HTTP ${response.status}`);
|
|
4128
5075
|
return creds;
|
|
4129
5076
|
}
|
|
4130
5077
|
const data = await response.json();
|
|
4131
5078
|
if (typeof data.email !== "string" || !isPlan(data.plan) || !Array.isArray(data.selectedTwins) || !data.selectedTwins.every((value) => typeof value === "string")) {
|
|
5079
|
+
logRefreshFailure(creds, "invalid response payload");
|
|
4132
5080
|
return creds;
|
|
4133
5081
|
}
|
|
4134
5082
|
const updated = {
|
|
@@ -4141,7 +5089,9 @@ async function refreshAuthFromServer(creds) {
|
|
|
4141
5089
|
saveCredentials(updated);
|
|
4142
5090
|
}
|
|
4143
5091
|
return updated;
|
|
4144
|
-
} catch {
|
|
5092
|
+
} catch (error2) {
|
|
5093
|
+
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
5094
|
+
logRefreshFailure(creds, message);
|
|
4145
5095
|
return creds;
|
|
4146
5096
|
}
|
|
4147
5097
|
}
|
|
@@ -4165,7 +5115,7 @@ function getJwtExpiry(token) {
|
|
|
4165
5115
|
}
|
|
4166
5116
|
|
|
4167
5117
|
// src/runner/routing.ts
|
|
4168
|
-
import { readFileSync as
|
|
5118
|
+
import { readFileSync as readFileSync13 } from "fs";
|
|
4169
5119
|
function isLoopbackUrl(rawUrl) {
|
|
4170
5120
|
try {
|
|
4171
5121
|
const parsed = new URL(rawUrl);
|
|
@@ -4180,7 +5130,7 @@ function isNonLocalEndpoint(rawUrl) {
|
|
|
4180
5130
|
}
|
|
4181
5131
|
function parseRemoteTwinUrlOverrides(path) {
|
|
4182
5132
|
if (!path) return void 0;
|
|
4183
|
-
const raw =
|
|
5133
|
+
const raw = readFileSync13(path, "utf-8");
|
|
4184
5134
|
const parsed = JSON.parse(raw);
|
|
4185
5135
|
const overrides = {};
|
|
4186
5136
|
for (const [key, value] of Object.entries(parsed)) {
|
|
@@ -4202,7 +5152,7 @@ function parseRemoteTwinUrlOverrides(path) {
|
|
|
4202
5152
|
}
|
|
4203
5153
|
function parseApiBaseUrlOverrides(path) {
|
|
4204
5154
|
if (!path) return void 0;
|
|
4205
|
-
const raw =
|
|
5155
|
+
const raw = readFileSync13(path, "utf-8");
|
|
4206
5156
|
const parsed = JSON.parse(raw);
|
|
4207
5157
|
const overrides = {};
|
|
4208
5158
|
for (const [key, value] of Object.entries(parsed)) {
|
|
@@ -4260,17 +5210,17 @@ function buildApiRoutingEnv(routing) {
|
|
|
4260
5210
|
}
|
|
4261
5211
|
return env;
|
|
4262
5212
|
}
|
|
4263
|
-
function
|
|
5213
|
+
function validateRemoteApiEngineTopology(endpointUrl, requiredTwins, remoteTwinUrlOverrides) {
|
|
4264
5214
|
if (!isNonLocalEndpoint(endpointUrl)) return;
|
|
4265
5215
|
if (!remoteTwinUrlOverrides) {
|
|
4266
5216
|
throw new Error(
|
|
4267
|
-
"Non-local
|
|
5217
|
+
"Non-local engine endpoint detected but no remote-reachable twin URL map provided. Use --engine-twin-urls <path-to-json> with twin MCP base URLs reachable by the engine endpoint."
|
|
4268
5218
|
);
|
|
4269
5219
|
}
|
|
4270
5220
|
const missing = requiredTwins.filter((twin) => !remoteTwinUrlOverrides[twin]);
|
|
4271
5221
|
if (missing.length > 0) {
|
|
4272
5222
|
throw new Error(
|
|
4273
|
-
`Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --
|
|
5223
|
+
`Missing twin URL overrides for: ${missing.join(", ")}. Provide a URL for each twin in --engine-twin-urls when using a non-local engine endpoint.`
|
|
4274
5224
|
);
|
|
4275
5225
|
}
|
|
4276
5226
|
}
|
|
@@ -4304,7 +5254,16 @@ function computeStateDiff(before, after) {
|
|
|
4304
5254
|
}
|
|
4305
5255
|
return diff;
|
|
4306
5256
|
}
|
|
4307
|
-
|
|
5257
|
+
function parsePositiveIntFromEnv(name) {
|
|
5258
|
+
const raw = process.env[name]?.trim();
|
|
5259
|
+
if (!raw) return void 0;
|
|
5260
|
+
const parsed = parseInt(raw, 10);
|
|
5261
|
+
if (Number.isNaN(parsed) || parsed <= 0) {
|
|
5262
|
+
throw new Error(`${name} must be a positive integer when set`);
|
|
5263
|
+
}
|
|
5264
|
+
return parsed;
|
|
5265
|
+
}
|
|
5266
|
+
async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, rateLimit, apiEngine, localEngine, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls, apiBearerToken, adminAuth) {
|
|
4308
5267
|
async function probeHealth(url, timeoutMs) {
|
|
4309
5268
|
const controller = new AbortController();
|
|
4310
5269
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
@@ -4335,8 +5294,13 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4335
5294
|
try {
|
|
4336
5295
|
let beforeState;
|
|
4337
5296
|
if (useCloud) {
|
|
5297
|
+
const hasDynamicSeeds = seedSelections.some((s) => s.seedData);
|
|
5298
|
+
if (hasDynamicSeeds) {
|
|
5299
|
+
progress("Pushing dynamic seeds to cloud twins...");
|
|
5300
|
+
await pushStateToCloud(cloudTwinUrls, seedSelections, apiBearerToken, adminAuth);
|
|
5301
|
+
}
|
|
4338
5302
|
progress("Fetching seed state from cloud twins...");
|
|
4339
|
-
beforeState = await collectStateFromHttp(cloudTwinUrls);
|
|
5303
|
+
beforeState = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
|
|
4340
5304
|
} else {
|
|
4341
5305
|
progress("Capturing seed state...");
|
|
4342
5306
|
const seedResult = await captureSeedState(twinConfigs);
|
|
@@ -4363,7 +5327,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4363
5327
|
const twinNames = twinConfigs.map((c) => c.twinName);
|
|
4364
5328
|
const localTwinUrls = twinUrls;
|
|
4365
5329
|
let effectiveRemoteTwinUrls;
|
|
4366
|
-
if (
|
|
5330
|
+
if (apiEngine) {
|
|
4367
5331
|
effectiveRemoteTwinUrls = {};
|
|
4368
5332
|
for (const twinName of twinNames) {
|
|
4369
5333
|
const fromOverride = remoteTwinUrlOverrides?.[twinName];
|
|
@@ -4375,7 +5339,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4375
5339
|
effectiveRemoteTwinUrls[twinName] = resolved;
|
|
4376
5340
|
}
|
|
4377
5341
|
}
|
|
4378
|
-
if (
|
|
5342
|
+
if (apiEngine && !useCloud) {
|
|
4379
5343
|
for (const [name, url] of Object.entries(localTwinUrls)) {
|
|
4380
5344
|
const ok = await probeHealth(url, 1500);
|
|
4381
5345
|
if (!ok) {
|
|
@@ -4383,24 +5347,25 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4383
5347
|
}
|
|
4384
5348
|
}
|
|
4385
5349
|
}
|
|
4386
|
-
|
|
4387
|
-
|
|
4388
|
-
|
|
4389
|
-
|
|
4390
|
-
|
|
4391
|
-
|
|
4392
|
-
|
|
4393
|
-
}
|
|
4394
|
-
const taskMessage = generateTaskFromScenario(scenario, apiRouting);
|
|
5350
|
+
const baseTaskMessage = generateTaskFromScenario(scenario, apiRouting);
|
|
5351
|
+
const taskMessage = localEngine?.promptContext ? `${localEngine.promptContext}
|
|
5352
|
+
|
|
5353
|
+
---
|
|
5354
|
+
|
|
5355
|
+
${baseTaskMessage}` : baseTaskMessage;
|
|
5356
|
+
const engineModel = localEngine?.model ?? apiEngine?.model;
|
|
4395
5357
|
const effectiveAgentConfig = {
|
|
4396
5358
|
...agentConfig,
|
|
4397
5359
|
env: {
|
|
4398
5360
|
...agentConfig.env,
|
|
4399
|
-
...buildApiRoutingEnv(apiRouting)
|
|
5361
|
+
...buildApiRoutingEnv(apiRouting),
|
|
5362
|
+
ARCHAL_ENGINE_MODE: apiEngine ? "api" : "local",
|
|
5363
|
+
...engineModel ? { ARCHAL_ENGINE_MODEL: engineModel } : {},
|
|
5364
|
+
ARCHAL_ENGINE_TASK: taskMessage
|
|
4400
5365
|
}
|
|
4401
5366
|
};
|
|
4402
|
-
let agentResult =
|
|
4403
|
-
|
|
5367
|
+
let agentResult = apiEngine ? await executeOpenClawRemote(
|
|
5368
|
+
apiEngine,
|
|
4404
5369
|
scenario,
|
|
4405
5370
|
runId,
|
|
4406
5371
|
taskMessage,
|
|
@@ -4414,7 +5379,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4414
5379
|
timeoutSeconds * 1e3,
|
|
4415
5380
|
{ restConfigPath, twinUrls }
|
|
4416
5381
|
);
|
|
4417
|
-
if (!
|
|
5382
|
+
if (!apiEngine && !localEngine && shouldRetryWithModernOpenClaw(agentResult)) {
|
|
4418
5383
|
warn(
|
|
4419
5384
|
"OpenClaw legacy local invocation failed with CLI drift signal; retrying with modern local args"
|
|
4420
5385
|
);
|
|
@@ -4431,8 +5396,8 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4431
5396
|
let stateAfter;
|
|
4432
5397
|
let trace;
|
|
4433
5398
|
if (useCloud) {
|
|
4434
|
-
stateAfter = await collectStateFromHttp(cloudTwinUrls);
|
|
4435
|
-
trace = await collectTraceFromHttp(cloudTwinUrls);
|
|
5399
|
+
stateAfter = await collectStateFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
|
|
5400
|
+
trace = await collectTraceFromHttp(cloudTwinUrls, apiBearerToken, adminAuth);
|
|
4436
5401
|
} else {
|
|
4437
5402
|
if (!twinPaths) {
|
|
4438
5403
|
throw new Error("Twin paths not initialized");
|
|
@@ -4443,7 +5408,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4443
5408
|
const diff = computeStateDiff(beforeState, stateAfter);
|
|
4444
5409
|
cleanupTempFiles(mcpConfigPath, twinPaths ?? {}, seedPaths, runId, twinNames);
|
|
4445
5410
|
if (agentResult.timedOut) {
|
|
4446
|
-
const timeoutDisplay =
|
|
5411
|
+
const timeoutDisplay = apiEngine ? `${(apiEngine.timeoutMs / 1e3).toFixed(0)}s` : `${timeoutSeconds}s`;
|
|
4447
5412
|
const durationMs2 = Date.now() - startTime;
|
|
4448
5413
|
return {
|
|
4449
5414
|
runIndex,
|
|
@@ -4461,6 +5426,9 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4461
5426
|
}
|
|
4462
5427
|
if (agentResult.exitCode !== 0 && agentResult.exitCode !== null) {
|
|
4463
5428
|
warn(`Agent exited with non-zero code ${agentResult.exitCode} on run ${runIndex + 1}`);
|
|
5429
|
+
if (agentResult.stderr) {
|
|
5430
|
+
debug(`Agent stderr: ${agentResult.stderr.slice(0, 500)}`);
|
|
5431
|
+
}
|
|
4464
5432
|
}
|
|
4465
5433
|
progress(`Evaluating run ${runIndex + 1}...`);
|
|
4466
5434
|
const evaluationResult = await evaluateRun(
|
|
@@ -4511,7 +5479,7 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4511
5479
|
for (const paths of Object.values(seedPaths)) {
|
|
4512
5480
|
for (const file of [paths.stateFile, `${paths.stateFile}.tmp`]) {
|
|
4513
5481
|
try {
|
|
4514
|
-
if (
|
|
5482
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4515
5483
|
} catch {
|
|
4516
5484
|
}
|
|
4517
5485
|
}
|
|
@@ -4520,14 +5488,14 @@ async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections,
|
|
|
4520
5488
|
if (restConfigPath) {
|
|
4521
5489
|
for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
|
|
4522
5490
|
try {
|
|
4523
|
-
if (
|
|
5491
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4524
5492
|
} catch {
|
|
4525
5493
|
}
|
|
4526
5494
|
}
|
|
4527
5495
|
}
|
|
4528
5496
|
}
|
|
4529
5497
|
}
|
|
4530
|
-
function preflightCheck(scenario, apiKey) {
|
|
5498
|
+
function preflightCheck(scenario, apiKey, model, baseUrl) {
|
|
4531
5499
|
const errors = [];
|
|
4532
5500
|
for (const twin of scenario.config.twins) {
|
|
4533
5501
|
const result = checkTwinAvailability(twin);
|
|
@@ -4540,17 +5508,30 @@ function preflightCheck(scenario, apiKey) {
|
|
|
4540
5508
|
}
|
|
4541
5509
|
}
|
|
4542
5510
|
const hasProbabilistic = scenario.successCriteria.some((c) => c.type === "probabilistic");
|
|
4543
|
-
if (hasProbabilistic
|
|
4544
|
-
const
|
|
4545
|
-
|
|
4546
|
-
|
|
4547
|
-
|
|
4548
|
-
|
|
4549
|
-
|
|
5511
|
+
if (hasProbabilistic) {
|
|
5512
|
+
const provider = detectProvider(model);
|
|
5513
|
+
const resolvedKey = resolveProviderApiKey(apiKey, provider);
|
|
5514
|
+
if (provider === "openai-compatible" && !baseUrl) {
|
|
5515
|
+
errors.push({
|
|
5516
|
+
check: "evaluator.baseUrl",
|
|
5517
|
+
message: `Model "${model}" requires a base URL for the OpenAI-compatible endpoint`,
|
|
5518
|
+
detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>"
|
|
5519
|
+
});
|
|
5520
|
+
}
|
|
5521
|
+
if (!resolvedKey) {
|
|
5522
|
+
const envVar = getProviderEnvVar(provider);
|
|
5523
|
+
const pCount = scenario.successCriteria.filter((c) => c.type === "probabilistic").length;
|
|
5524
|
+
errors.push({
|
|
5525
|
+
check: envVar,
|
|
5526
|
+
message: `Scenario has ${pCount} probabilistic criteria that will be skipped (no API key for ${provider})`,
|
|
5527
|
+
detail: `Set via: export ${envVar}=<your-key> or archal config set evaluator.apiKey <key>`,
|
|
5528
|
+
warning: true
|
|
5529
|
+
});
|
|
5530
|
+
}
|
|
4550
5531
|
}
|
|
4551
5532
|
return errors;
|
|
4552
5533
|
}
|
|
4553
|
-
async function
|
|
5534
|
+
async function runRemoteApiEnginePreflight(scenario, seedSelections, rateLimit, remoteConfig, remoteTwinUrlOverrides) {
|
|
4554
5535
|
const runId = `archal-preflight-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
4555
5536
|
const twinConfigs = seedSelections.map((sel) => ({
|
|
4556
5537
|
twinName: sel.twinName,
|
|
@@ -4592,14 +5573,14 @@ async function runRemoteOpenClawPreflight(scenario, seedSelections, rateLimit, r
|
|
|
4592
5573
|
for (const paths of Object.values(restResult.twinPaths)) {
|
|
4593
5574
|
for (const file of [paths.stateFile, `${paths.stateFile}.tmp`, paths.traceFile, `${paths.traceFile}.tmp`]) {
|
|
4594
5575
|
try {
|
|
4595
|
-
if (
|
|
5576
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4596
5577
|
} catch {
|
|
4597
5578
|
}
|
|
4598
5579
|
}
|
|
4599
5580
|
}
|
|
4600
5581
|
for (const file of [restConfigPath, `${restConfigPath}.tmp`]) {
|
|
4601
5582
|
try {
|
|
4602
|
-
if (
|
|
5583
|
+
if (existsSync11(file)) unlinkSync6(file);
|
|
4603
5584
|
} catch {
|
|
4604
5585
|
}
|
|
4605
5586
|
}
|
|
@@ -4622,9 +5603,14 @@ async function runScenario(options) {
|
|
|
4622
5603
|
);
|
|
4623
5604
|
}
|
|
4624
5605
|
}
|
|
4625
|
-
const preflightErrors = preflightCheck(scenario, config.apiKey);
|
|
4626
|
-
|
|
4627
|
-
|
|
5606
|
+
const preflightErrors = preflightCheck(scenario, config.apiKey, model, config.baseUrl);
|
|
5607
|
+
const hardErrors = preflightErrors.filter((e) => !e.warning);
|
|
5608
|
+
const warnings = preflightErrors.filter((e) => e.warning);
|
|
5609
|
+
for (const w of warnings) {
|
|
5610
|
+
warn(`${w.check}: ${w.message}${w.detail ? ` (${w.detail})` : ""}`);
|
|
5611
|
+
}
|
|
5612
|
+
if (hardErrors.length > 0) {
|
|
5613
|
+
const lines = hardErrors.map((e) => {
|
|
4628
5614
|
let line = ` - ${e.check}: ${e.message}`;
|
|
4629
5615
|
if (e.detail) line += `
|
|
4630
5616
|
${e.detail}`;
|
|
@@ -4651,7 +5637,7 @@ Run 'archal doctor' for a full system check.`
|
|
|
4651
5637
|
}
|
|
4652
5638
|
seedSelections = overrideSeedSelection(seedSelections, overrides);
|
|
4653
5639
|
}
|
|
4654
|
-
if (config.
|
|
5640
|
+
if (config.geminiApiKey && !options.noDynamicSeed) {
|
|
4655
5641
|
progress("Generating dynamic seeds from setup description...");
|
|
4656
5642
|
const baseTwinConfigs = seedSelections.map((sel) => ({
|
|
4657
5643
|
twinName: sel.twinName,
|
|
@@ -4659,8 +5645,8 @@ Run 'archal doctor' for a full system check.`
|
|
|
4659
5645
|
}));
|
|
4660
5646
|
const { beforeState: baseSeedStates } = await captureSeedState(baseTwinConfigs);
|
|
4661
5647
|
const dynamicConfig = {
|
|
4662
|
-
|
|
4663
|
-
model,
|
|
5648
|
+
geminiApiKey: config.geminiApiKey,
|
|
5649
|
+
model: config.seedModel,
|
|
4664
5650
|
noCache: options.noSeedCache
|
|
4665
5651
|
};
|
|
4666
5652
|
for (const sel of seedSelections) {
|
|
@@ -4683,24 +5669,28 @@ Run 'archal doctor' for a full system check.`
|
|
|
4683
5669
|
sel.seedData = result.seed;
|
|
4684
5670
|
}
|
|
4685
5671
|
}
|
|
4686
|
-
const scenarioDir = dirname2(
|
|
5672
|
+
const scenarioDir = dirname2(resolve7(options.scenarioPath));
|
|
4687
5673
|
let projectConfigPath;
|
|
4688
5674
|
for (const dir of [scenarioDir, process.cwd()]) {
|
|
4689
|
-
const candidate =
|
|
4690
|
-
if (
|
|
5675
|
+
const candidate = resolve7(dir, ".archal.json");
|
|
5676
|
+
if (existsSync11(candidate)) {
|
|
4691
5677
|
projectConfigPath = candidate;
|
|
4692
5678
|
break;
|
|
4693
5679
|
}
|
|
4694
5680
|
}
|
|
4695
|
-
function
|
|
4696
|
-
if (!raw || !raw.trim()) return
|
|
5681
|
+
function resolveOpenClawModel2(raw) {
|
|
5682
|
+
if (!raw || !raw.trim()) return void 0;
|
|
4697
5683
|
const value = raw.trim();
|
|
4698
5684
|
return value.includes(":") ? value : `openclaw:${value}`;
|
|
4699
5685
|
}
|
|
4700
|
-
function
|
|
5686
|
+
function resolveEngineToken2(explicitToken) {
|
|
4701
5687
|
if (explicitToken && explicitToken.trim()) {
|
|
4702
5688
|
return explicitToken.trim();
|
|
4703
5689
|
}
|
|
5690
|
+
const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
|
|
5691
|
+
if (engineToken) {
|
|
5692
|
+
return engineToken;
|
|
5693
|
+
}
|
|
4704
5694
|
const gatewayToken = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
|
|
4705
5695
|
if (gatewayToken) {
|
|
4706
5696
|
return gatewayToken;
|
|
@@ -4711,42 +5701,124 @@ Run 'archal doctor' for a full system check.`
|
|
|
4711
5701
|
}
|
|
4712
5702
|
return void 0;
|
|
4713
5703
|
}
|
|
4714
|
-
|
|
4715
|
-
|
|
4716
|
-
|
|
4717
|
-
|
|
4718
|
-
|
|
4719
|
-
|
|
4720
|
-
|
|
5704
|
+
const openclawEndpointAlias = options.openclawUrl ?? process.env["OPENCLAW_URL"];
|
|
5705
|
+
const engineMode = (() => {
|
|
5706
|
+
if (options.engine) {
|
|
5707
|
+
return options.engine;
|
|
5708
|
+
}
|
|
5709
|
+
if (options.engineEndpoint || openclawEndpointAlias || process.env["ARCHAL_ENGINE_ENDPOINT"]) {
|
|
5710
|
+
return "api";
|
|
5711
|
+
}
|
|
5712
|
+
if (options.harnessDir || process.env["ARCHAL_HARNESS_DIR"]) {
|
|
5713
|
+
return "local";
|
|
5714
|
+
}
|
|
5715
|
+
return "legacy";
|
|
5716
|
+
})();
|
|
5717
|
+
const apiEndpoint = options.engineEndpoint ?? openclawEndpointAlias ?? process.env["ARCHAL_ENGINE_ENDPOINT"];
|
|
5718
|
+
const rawOpenClawAgent = options.openclawAgent ?? process.env["OPENCLAW_AGENT_ID"];
|
|
5719
|
+
const rawEngineModel = options.engineModel ?? process.env["ARCHAL_ENGINE_MODEL"];
|
|
5720
|
+
const resolvedEngineToken = resolveEngineToken2(options.engineToken ?? options.openclawToken);
|
|
5721
|
+
const harnessDir = options.harnessDir ?? process.env["ARCHAL_HARNESS_DIR"];
|
|
5722
|
+
let apiEngine;
|
|
5723
|
+
if (engineMode === "api") {
|
|
5724
|
+
const apiTimeoutSeconds = options.engineTimeout ?? options.openclawTimeout ?? parsePositiveIntFromEnv("ARCHAL_ENGINE_TIMEOUT") ?? timeoutSeconds;
|
|
5725
|
+
if (!apiEndpoint || !apiEndpoint.trim()) {
|
|
5726
|
+
throw new Error(
|
|
5727
|
+
"API engine mode requires --engine-endpoint (or --openclaw-url for legacy compatibility)."
|
|
5728
|
+
);
|
|
5729
|
+
}
|
|
5730
|
+
if (!Number.isFinite(apiTimeoutSeconds) || apiTimeoutSeconds <= 0) {
|
|
5731
|
+
throw new Error("Engine timeout must be a positive integer number of seconds.");
|
|
5732
|
+
}
|
|
5733
|
+
const resolvedApiModel = rawEngineModel?.trim() || resolveOpenClawModel2(rawOpenClawAgent) || (openclawEndpointAlias ? "openclaw:main" : void 0);
|
|
5734
|
+
if (!resolvedApiModel) {
|
|
5735
|
+
throw new Error(
|
|
5736
|
+
"API engine mode requires --engine-model/ARCHAL_ENGINE_MODEL (or --openclaw-agent/OPENCLAW_AGENT_ID)."
|
|
5737
|
+
);
|
|
5738
|
+
}
|
|
5739
|
+
apiEngine = {
|
|
5740
|
+
url: apiEndpoint.trim(),
|
|
5741
|
+
token: resolvedEngineToken,
|
|
5742
|
+
model: resolvedApiModel,
|
|
5743
|
+
timeoutMs: apiTimeoutSeconds * 1e3,
|
|
5744
|
+
agentId: rawOpenClawAgent?.trim() || void 0
|
|
4721
5745
|
};
|
|
4722
|
-
if (!
|
|
5746
|
+
if (openclawEndpointAlias && !apiEngine.token) {
|
|
4723
5747
|
throw new Error(
|
|
4724
5748
|
"OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
|
|
4725
5749
|
);
|
|
4726
5750
|
}
|
|
4727
5751
|
}
|
|
4728
|
-
|
|
5752
|
+
let localEngine;
|
|
5753
|
+
if (engineMode === "local") {
|
|
5754
|
+
if (!harnessDir) {
|
|
5755
|
+
throw new Error(
|
|
5756
|
+
"Local engine mode requires --harness-dir (or ARCHAL_HARNESS_DIR)."
|
|
5757
|
+
);
|
|
5758
|
+
}
|
|
5759
|
+
const resolvedHarness = resolveLocalHarness(harnessDir, rawEngineModel);
|
|
5760
|
+
const resolvedFallbackLocalAgentConfig = options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath);
|
|
5761
|
+
const fallbackLocalAgentConfig = resolvedFallbackLocalAgentConfig ?? { command: "openclaw", args: [] };
|
|
5762
|
+
if (!resolvedHarness.manifest) {
|
|
5763
|
+
debug(
|
|
5764
|
+
"Harness manifest not found for local mode; using agent command defaults.",
|
|
5765
|
+
{ manifestPath: resolvedHarness.manifestPath }
|
|
5766
|
+
);
|
|
5767
|
+
} else if (!resolvedHarness.localCommand) {
|
|
5768
|
+
warn(
|
|
5769
|
+
`Harness manifest at ${resolvedHarness.manifestPath} does not define local.command; falling back to agent command defaults.`
|
|
5770
|
+
);
|
|
5771
|
+
}
|
|
5772
|
+
if (!resolvedHarness.localCommand && !resolvedFallbackLocalAgentConfig) {
|
|
5773
|
+
warn(
|
|
5774
|
+
'No local command configured via harness manifest/.archal.json/ARCHAL_AGENT_COMMAND; defaulting to "openclaw".'
|
|
5775
|
+
);
|
|
5776
|
+
}
|
|
5777
|
+
const commandConfig = resolvedHarness.localCommand ?? fallbackLocalAgentConfig;
|
|
5778
|
+
localEngine = {
|
|
5779
|
+
model: resolvedHarness.model,
|
|
5780
|
+
command: commandConfig.command,
|
|
5781
|
+
args: commandConfig.args,
|
|
5782
|
+
env: commandConfig.env,
|
|
5783
|
+
cwd: resolvedHarness.harnessDir,
|
|
5784
|
+
promptContext: resolvedHarness.promptContext
|
|
5785
|
+
};
|
|
5786
|
+
}
|
|
5787
|
+
const remoteTwinUrlOverrides = apiEngine ? parseRemoteTwinUrlOverrides(
|
|
5788
|
+
options.engineTwinUrls ?? options.openclawTwinUrls ?? process.env["ARCHAL_ENGINE_TWIN_URLS"]
|
|
5789
|
+
) : void 0;
|
|
4729
5790
|
const apiBaseUrlOverrides = parseApiBaseUrlOverrides(options.apiBaseUrls);
|
|
4730
5791
|
const apiProxyUrl = parseProxyUrl(options.apiProxyUrl ?? process.env["ARCHAL_API_PROXY_URL"]);
|
|
4731
5792
|
const apiRouting = apiBaseUrlOverrides && Object.keys(apiBaseUrlOverrides).length > 0 || apiProxyUrl ? {
|
|
4732
5793
|
baseUrls: apiBaseUrlOverrides,
|
|
4733
|
-
proxyUrl: apiProxyUrl
|
|
5794
|
+
proxyUrl: apiProxyUrl,
|
|
5795
|
+
bearerToken: options.apiBearerToken,
|
|
5796
|
+
adminToken: options.apiAdminToken,
|
|
5797
|
+
adminUserId: options.apiAdminUserId
|
|
4734
5798
|
} : void 0;
|
|
4735
|
-
const agentConfig =
|
|
5799
|
+
const agentConfig = localEngine ? {
|
|
5800
|
+
command: localEngine.command,
|
|
5801
|
+
args: localEngine.args,
|
|
5802
|
+
env: localEngine.env,
|
|
5803
|
+
cwd: localEngine.cwd
|
|
5804
|
+
} : options.agentConfig ?? resolveAgentConfig(options.agent, projectConfigPath) ?? (apiEngine ? { command: "openclaw", args: [] } : {
|
|
4736
5805
|
command: process.env["ARCHAL_AGENT_COMMAND"] ?? "echo",
|
|
4737
5806
|
args: process.env["ARCHAL_AGENT_COMMAND"] ? [] : ["No agent command configured"]
|
|
4738
5807
|
});
|
|
4739
|
-
if (!
|
|
5808
|
+
if (!apiEngine && !localEngine && agentConfig.command === "echo") {
|
|
4740
5809
|
process.stderr.write(
|
|
4741
|
-
"Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json or
|
|
5810
|
+
"Warning: No agent command configured. Set ARCHAL_AGENT_COMMAND/.archal.json, use --engine-endpoint, or run --engine local with --harness-dir.\n"
|
|
4742
5811
|
);
|
|
4743
5812
|
}
|
|
4744
|
-
if (
|
|
4745
|
-
info("Remote
|
|
5813
|
+
if (apiEngine) {
|
|
5814
|
+
info("Remote API engine mode enabled", { url: apiEngine.url });
|
|
4746
5815
|
warn(
|
|
4747
|
-
"Remote
|
|
5816
|
+
"Remote engine mode requires network reachability from the endpoint to each run's twin MCP URLs. If runs fail to connect, co-locate the engine with Archal or expose twins via a reachable network path."
|
|
4748
5817
|
);
|
|
4749
|
-
|
|
5818
|
+
validateRemoteApiEngineTopology(apiEngine.url, scenario.config.twins, remoteTwinUrlOverrides);
|
|
5819
|
+
}
|
|
5820
|
+
if (localEngine) {
|
|
5821
|
+
info("Local harness engine mode enabled", { harnessDir: localEngine.cwd });
|
|
4750
5822
|
}
|
|
4751
5823
|
if (apiRouting) {
|
|
4752
5824
|
info("API routing context enabled", {
|
|
@@ -4755,18 +5827,18 @@ Run 'archal doctor' for a full system check.`
|
|
|
4755
5827
|
});
|
|
4756
5828
|
}
|
|
4757
5829
|
if (options.preflightOnly) {
|
|
4758
|
-
if (
|
|
4759
|
-
await
|
|
5830
|
+
if (apiEngine) {
|
|
5831
|
+
await runRemoteApiEnginePreflight(
|
|
4760
5832
|
scenario,
|
|
4761
5833
|
seedSelections,
|
|
4762
5834
|
options.rateLimit,
|
|
4763
|
-
|
|
5835
|
+
apiEngine,
|
|
4764
5836
|
remoteTwinUrlOverrides
|
|
4765
5837
|
);
|
|
4766
5838
|
}
|
|
4767
5839
|
info("Preflight checks passed", {
|
|
4768
5840
|
scenario: scenario.title,
|
|
4769
|
-
|
|
5841
|
+
engineMode: apiEngine ? "api" : localEngine ? "local" : "legacy-local"
|
|
4770
5842
|
});
|
|
4771
5843
|
return {
|
|
4772
5844
|
scenarioTitle: scenario.title,
|
|
@@ -4786,6 +5858,7 @@ Run 'archal doctor' for a full system check.`
|
|
|
4786
5858
|
};
|
|
4787
5859
|
const runs = [];
|
|
4788
5860
|
for (let i = 0; i < numRuns; i++) {
|
|
5861
|
+
const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
|
|
4789
5862
|
const result = await executeSingleRun(
|
|
4790
5863
|
i,
|
|
4791
5864
|
scenario,
|
|
@@ -4794,10 +5867,13 @@ Run 'archal doctor' for a full system check.`
|
|
|
4794
5867
|
evaluatorConfig,
|
|
4795
5868
|
timeoutSeconds,
|
|
4796
5869
|
options.rateLimit,
|
|
4797
|
-
|
|
5870
|
+
apiEngine,
|
|
5871
|
+
localEngine,
|
|
4798
5872
|
remoteTwinUrlOverrides,
|
|
4799
5873
|
apiRouting,
|
|
4800
|
-
options.cloudTwinUrls
|
|
5874
|
+
options.cloudTwinUrls,
|
|
5875
|
+
options.apiBearerToken,
|
|
5876
|
+
adminAuth
|
|
4801
5877
|
);
|
|
4802
5878
|
runs.push(result);
|
|
4803
5879
|
printRunProgress(i, numRuns, result.overallScore, result.error);
|
|
@@ -4836,10 +5912,10 @@ function normalizeBaseUrl(value, fallback) {
|
|
|
4836
5912
|
const normalized = trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
|
|
4837
5913
|
return normalized.length > 0 ? normalized : fallback;
|
|
4838
5914
|
}
|
|
4839
|
-
var DEFAULT_BASE_URL = "https://archal.ai";
|
|
5915
|
+
var DEFAULT_BASE_URL = "https://www.archal.ai";
|
|
4840
5916
|
var AUTH_BASE_URL2 = normalizeBaseUrl(process.env["ARCHAL_AUTH_URL"] ?? DEFAULT_BASE_URL, DEFAULT_BASE_URL);
|
|
4841
5917
|
var API_BASE_URL = normalizeBaseUrl(process.env["ARCHAL_API_URL"] ?? AUTH_BASE_URL2, AUTH_BASE_URL2);
|
|
4842
|
-
var
|
|
5918
|
+
var REQUEST_TIMEOUT_MS4 = 8e3;
|
|
4843
5919
|
var RETRYABLE_STATUS_CODES2 = /* @__PURE__ */ new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
4844
5920
|
var RETRYABLE_NETWORK_CODES = /* @__PURE__ */ new Set([
|
|
4845
5921
|
"ECONNABORTED",
|
|
@@ -4864,7 +5940,7 @@ var MAX_RETRIES2 = parseBoundedInt(process.env["ARCHAL_API_MAX_RETRIES"], 3, 0,
|
|
|
4864
5940
|
var RETRY_BASE_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_BASE_MS"], 250, 25, 1e4);
|
|
4865
5941
|
var RETRY_MAX_DELAY_MS = parseBoundedInt(process.env["ARCHAL_API_RETRY_MAX_MS"], 3e3, RETRY_BASE_DELAY_MS, 2e4);
|
|
4866
5942
|
function sleep2(ms) {
|
|
4867
|
-
return new Promise((
|
|
5943
|
+
return new Promise((resolve13) => setTimeout(resolve13, ms));
|
|
4868
5944
|
}
|
|
4869
5945
|
function retryDelayMs(attempt, retryAfter) {
|
|
4870
5946
|
if (retryAfter) {
|
|
@@ -4924,13 +6000,30 @@ function isFinalizeEvidencePath(path) {
|
|
|
4924
6000
|
}
|
|
4925
6001
|
return /^\/api\/sessions\/[^/]+\/evidence\/finalize$/.test(pathname);
|
|
4926
6002
|
}
|
|
6003
|
+
async function tryRefreshToken() {
|
|
6004
|
+
try {
|
|
6005
|
+
const creds = getStoredCredentials();
|
|
6006
|
+
if (!creds || !creds.refreshToken) return null;
|
|
6007
|
+
const refreshed = await refreshCliSession(creds);
|
|
6008
|
+
if (!refreshed) return null;
|
|
6009
|
+
saveCredentials(refreshed);
|
|
6010
|
+
return refreshed.token;
|
|
6011
|
+
} catch {
|
|
6012
|
+
return null;
|
|
6013
|
+
}
|
|
6014
|
+
}
|
|
4927
6015
|
async function request(method, path, token, body) {
|
|
4928
6016
|
const url = `${resolveBaseUrl(path)}${path}`;
|
|
4929
6017
|
const headers = {
|
|
4930
6018
|
"content-type": "application/json",
|
|
4931
|
-
"user-agent":
|
|
6019
|
+
"user-agent": CLI_USER_AGENT
|
|
4932
6020
|
};
|
|
4933
|
-
|
|
6021
|
+
const runtimeAdminToken = process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"]?.trim();
|
|
6022
|
+
if (runtimeAdminToken) {
|
|
6023
|
+
headers["x-archal-admin-token"] = runtimeAdminToken;
|
|
6024
|
+
headers["x-archal-user-id"] = process.env["ARCHAL_RUNTIME_USER_ID"]?.trim() || "cli-user";
|
|
6025
|
+
headers["x-archal-plan"] = process.env["ARCHAL_RUNTIME_PLAN"]?.trim() || "free";
|
|
6026
|
+
} else if (token) {
|
|
4934
6027
|
headers["authorization"] = `Bearer ${token}`;
|
|
4935
6028
|
}
|
|
4936
6029
|
const isIdempotentFinalize = method === "POST" && isFinalizeEvidencePath(path);
|
|
@@ -4938,16 +6031,28 @@ async function request(method, path, token, body) {
|
|
|
4938
6031
|
const attempts = retriesAllowed ? MAX_RETRIES2 + 1 : 1;
|
|
4939
6032
|
let lastError = "request failed";
|
|
4940
6033
|
let lastOffline = false;
|
|
6034
|
+
let refreshAttempted = false;
|
|
4941
6035
|
for (let attempt = 1; attempt <= attempts; attempt += 1) {
|
|
4942
6036
|
try {
|
|
4943
6037
|
const response = await fetch(url, {
|
|
4944
6038
|
method,
|
|
4945
6039
|
headers,
|
|
4946
6040
|
body: body ? JSON.stringify(body) : void 0,
|
|
4947
|
-
signal: AbortSignal.timeout(
|
|
6041
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS4)
|
|
4948
6042
|
});
|
|
4949
6043
|
if (!response.ok) {
|
|
4950
|
-
|
|
6044
|
+
if (response.status === 401 && token && !refreshAttempted) {
|
|
6045
|
+
refreshAttempted = true;
|
|
6046
|
+
const refreshed = await tryRefreshToken();
|
|
6047
|
+
if (refreshed) {
|
|
6048
|
+
token = refreshed;
|
|
6049
|
+
headers["authorization"] = `Bearer ${token}`;
|
|
6050
|
+
attempt -= 1;
|
|
6051
|
+
continue;
|
|
6052
|
+
}
|
|
6053
|
+
}
|
|
6054
|
+
const rawText = await response.text().catch(() => "");
|
|
6055
|
+
const text = rawText.length > 200 ? rawText.slice(0, 200) + "..." : rawText;
|
|
4951
6056
|
const retryable = retriesAllowed && attempt < attempts && RETRYABLE_STATUS_CODES2.has(response.status);
|
|
4952
6057
|
if (retryable) {
|
|
4953
6058
|
await sleep2(retryDelayMs(attempt, response.headers.get("retry-after")));
|
|
@@ -5018,7 +6123,7 @@ function fetchScenarioCatalog(token) {
|
|
|
5018
6123
|
return request("GET", "/api/scenarios", token);
|
|
5019
6124
|
}
|
|
5020
6125
|
|
|
5021
|
-
// src/commands/
|
|
6126
|
+
// src/commands/twins.ts
|
|
5022
6127
|
import { Command as Command2 } from "commander";
|
|
5023
6128
|
|
|
5024
6129
|
// src/constants.ts
|
|
@@ -5045,10 +6150,10 @@ var PLAN_LIMITS = {
|
|
|
5045
6150
|
import { createInterface as createInterface2 } from "readline";
|
|
5046
6151
|
function askLine(question) {
|
|
5047
6152
|
const rl = createInterface2({ input: process.stdin, output: process.stderr });
|
|
5048
|
-
return new Promise((
|
|
6153
|
+
return new Promise((resolve13) => {
|
|
5049
6154
|
rl.question(question, (answer) => {
|
|
5050
6155
|
rl.close();
|
|
5051
|
-
|
|
6156
|
+
resolve13(answer.trim());
|
|
5052
6157
|
});
|
|
5053
6158
|
});
|
|
5054
6159
|
}
|
|
@@ -5057,8 +6162,7 @@ async function askConfirm(question) {
|
|
|
5057
6162
|
return answer.toLowerCase().startsWith("y");
|
|
5058
6163
|
}
|
|
5059
6164
|
|
|
5060
|
-
// src/commands/
|
|
5061
|
-
var runningTwins = /* @__PURE__ */ new Map();
|
|
6165
|
+
// src/commands/twins.ts
|
|
5062
6166
|
var KNOWN_TWINS2 = [
|
|
5063
6167
|
{ name: "github", package: "@archal/twin-github", description: "GitHub digital twin" },
|
|
5064
6168
|
{ name: "slack", package: "@archal/twin-slack", description: "Slack digital twin" },
|
|
@@ -5083,7 +6187,7 @@ async function runInteractiveTwinSelect(token) {
|
|
|
5083
6187
|
const marker = currentlySelected.has(twin.id) ? "\x1B[32m\u2713\x1B[0m" : " ";
|
|
5084
6188
|
const num = String(i + 1).padStart(2);
|
|
5085
6189
|
process.stderr.write(
|
|
5086
|
-
` ${marker} [${num}] ${twin.name.padEnd(18)} (${twin.toolCount} tools) \u2014 ${twin.description}
|
|
6190
|
+
` ${marker} [${num}] ${twin.name.padEnd(18)}${twin.toolCount != null ? ` (${twin.toolCount} tools)` : ""} \u2014 ${twin.description}
|
|
5087
6191
|
`
|
|
5088
6192
|
);
|
|
5089
6193
|
}
|
|
@@ -5169,7 +6273,7 @@ async function listTwinCatalog() {
|
|
|
5169
6273
|
} else {
|
|
5170
6274
|
status = "\x1B[90m\u2717 not selected\x1B[0m";
|
|
5171
6275
|
}
|
|
5172
|
-
return [twin.name, String(twin.toolCount), twin.description, status];
|
|
6276
|
+
return [twin.name, twin.toolCount != null ? String(twin.toolCount) : "\u2014", twin.description, status];
|
|
5173
6277
|
});
|
|
5174
6278
|
table(headers, rows);
|
|
5175
6279
|
if (isUnlimited) {
|
|
@@ -5194,85 +6298,12 @@ async function selectTwinsForPlan() {
|
|
|
5194
6298
|
const refreshed = await refreshAuthFromServer(creds);
|
|
5195
6299
|
saveCredentials(refreshed);
|
|
5196
6300
|
}
|
|
5197
|
-
function
|
|
5198
|
-
const cmd = new Command2("
|
|
5199
|
-
cmd.command("
|
|
5200
|
-
requireAuth({
|
|
5201
|
-
action: `start the "${name}" twin`,
|
|
5202
|
-
nextCommand: `archal twin start ${name}`
|
|
5203
|
-
});
|
|
5204
|
-
const knownTwin = KNOWN_TWINS2.find((t) => t.name === name);
|
|
5205
|
-
if (!knownTwin) {
|
|
5206
|
-
const available = KNOWN_TWINS2.map((t) => t.name).join(", ");
|
|
5207
|
-
error(`Unknown twin: "${name}". Available twins: ${available}`);
|
|
5208
|
-
process.exit(1);
|
|
5209
|
-
}
|
|
5210
|
-
if (runningTwins.has(name)) {
|
|
5211
|
-
warn(`Twin "${name}" is already running (PID: ${runningTwins.get(name)?.pid ?? "unknown"})`);
|
|
5212
|
-
return;
|
|
5213
|
-
}
|
|
5214
|
-
info("`archal run` uses hosted cloud twins. `archal twin start` is for local debugging only.");
|
|
5215
|
-
const args = [knownTwin.package, "--seed", opts.seed, "--transport", "rest"];
|
|
5216
|
-
if (opts.port) {
|
|
5217
|
-
args.push("--port", opts.port);
|
|
5218
|
-
}
|
|
5219
|
-
info(`Starting twin: ${name}`, { seed: opts.seed, transport: "rest" });
|
|
5220
|
-
const child = spawnMcpStdioProcess({
|
|
5221
|
-
command: "npx",
|
|
5222
|
-
args
|
|
5223
|
-
});
|
|
5224
|
-
const pid = child.pid ?? 0;
|
|
5225
|
-
runningTwins.set(name, {
|
|
5226
|
-
name,
|
|
5227
|
-
pid,
|
|
5228
|
-
startedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5229
|
-
process: child
|
|
5230
|
-
});
|
|
5231
|
-
child.on("exit", (code) => {
|
|
5232
|
-
info(`Twin "${name}" exited`, { code: String(code ?? "unknown") });
|
|
5233
|
-
runningTwins.delete(name);
|
|
5234
|
-
});
|
|
5235
|
-
success(`Twin "${name}" started (PID: ${pid})`);
|
|
5236
|
-
});
|
|
5237
|
-
cmd.command("stop").description("Stop a running digital twin").argument("<name>", "Twin name to stop").action(async (name) => {
|
|
5238
|
-
const twin = runningTwins.get(name);
|
|
5239
|
-
if (!twin) {
|
|
5240
|
-
error(`Twin "${name}" is not running`);
|
|
5241
|
-
const running = Array.from(runningTwins.keys());
|
|
5242
|
-
if (running.length > 0) {
|
|
5243
|
-
info(`Running twins: ${running.join(", ")}`);
|
|
5244
|
-
}
|
|
5245
|
-
process.exit(1);
|
|
5246
|
-
}
|
|
5247
|
-
info(`Stopping twin: ${name}`, { pid: String(twin.pid) });
|
|
5248
|
-
await killProcess(twin.process);
|
|
5249
|
-
runningTwins.delete(name);
|
|
5250
|
-
success(`Twin "${name}" stopped`);
|
|
5251
|
-
});
|
|
5252
|
-
cmd.command("status").description("Show status of running digital twins").action(() => {
|
|
5253
|
-
if (runningTwins.size === 0) {
|
|
5254
|
-
info("No twins currently running");
|
|
5255
|
-
return;
|
|
5256
|
-
}
|
|
5257
|
-
const headers = ["Name", "PID", "Started", "Status"];
|
|
5258
|
-
const rows = [];
|
|
5259
|
-
for (const twin of runningTwins.values()) {
|
|
5260
|
-
const isAlive = twin.process.exitCode === null;
|
|
5261
|
-
rows.push([
|
|
5262
|
-
twin.name,
|
|
5263
|
-
String(twin.pid),
|
|
5264
|
-
twin.startedAt,
|
|
5265
|
-
isAlive ? "running" : `exited (${twin.process.exitCode})`
|
|
5266
|
-
]);
|
|
5267
|
-
}
|
|
5268
|
-
table(headers, rows);
|
|
5269
|
-
});
|
|
5270
|
-
cmd.command("list").description("List available digital twins and entitlement status").action(async () => {
|
|
5271
|
-
warn("`archal twin list` is deprecated. Use `archal twins list`.");
|
|
6301
|
+
function createTwinsCommand() {
|
|
6302
|
+
const cmd = new Command2("twins").description("Manage twin catalog entitlements");
|
|
6303
|
+
cmd.command("list").description("List available twins and entitlement status").action(async () => {
|
|
5272
6304
|
await listTwinCatalog();
|
|
5273
6305
|
});
|
|
5274
6306
|
cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
|
|
5275
|
-
warn("`archal twin select` is deprecated. Use `archal twins select`.");
|
|
5276
6307
|
await selectTwinsForPlan();
|
|
5277
6308
|
});
|
|
5278
6309
|
return cmd;
|
|
@@ -5280,7 +6311,13 @@ function createTwinCommand() {
|
|
|
5280
6311
|
|
|
5281
6312
|
// src/commands/run.ts
|
|
5282
6313
|
function createRunCommand() {
|
|
5283
|
-
const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--
|
|
6314
|
+
const cmd = new Command3("run").description("Execute a scenario against digital twins").argument("<scenario>", "Path to scenario markdown file").option("-n, --runs <count>", "Number of runs", "5").option("-t, --timeout <seconds>", "Timeout per run in seconds", "120").option("-m, --model <model>", "Evaluator model for probabilistic criteria").option("-o, --output <format>", "Output format: terminal, json, junit", "terminal").option("--seed <name>", "Override twin seed name").option("--rate-limit <count>", "Rate limit: max total requests before 429").option("--pass-threshold <score>", "Minimum passing satisfaction score (0-100)", "100").option("--engine-endpoint <url>", "API engine endpoint URL (base URL or /v1/responses)").option("--engine-token <token>", "Bearer token for API engine auth").option(
|
|
6315
|
+
"--engine-model <model>",
|
|
6316
|
+
"Model id for API mode; in local mode this is exported as ARCHAL_ENGINE_MODEL"
|
|
6317
|
+
).option("--engine-twin-urls <path>", "Path to JSON mapping twin names to remote-reachable MCP base URLs").option("--engine-timeout <seconds>", "Timeout for API engine HTTP call per run (defaults to run timeout)").option(
|
|
6318
|
+
"--harness-dir <path>",
|
|
6319
|
+
"Local agent execution directory (archal-harness.json is optional)"
|
|
6320
|
+
).option("--openclaw-url <url>", "Deprecated alias for --engine-endpoint").option("--openclaw-token <token>", "Deprecated alias for --engine-token").option("--openclaw-agent <id>", "Deprecated alias for --engine-model").option("--openclaw-twin-urls <path>", "Deprecated alias for --engine-twin-urls").option("--openclaw-timeout <seconds>", "Deprecated alias for --engine-timeout").option("--api-base-urls <path>", "Path to JSON mapping service names to clone API base URLs for raw API code routing").option("--api-proxy-url <url>", "Proxy URL for raw API code routing metadata").option("--preflight-only", "Run environment/config preflight checks only and exit").option("--no-dynamic-seed", "Disable dynamic seed generation (use keyword-matched seed only)").option("--no-seed-cache", "Skip seed cache for dynamic generation").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (scenarioArg, opts) => {
|
|
5284
6321
|
const required = requireAuth({
|
|
5285
6322
|
action: "run a scenario",
|
|
5286
6323
|
nextCommand: `archal run ${scenarioArg}`
|
|
@@ -5296,8 +6333,8 @@ function createRunCommand() {
|
|
|
5296
6333
|
if (opts.verbose) {
|
|
5297
6334
|
configureLogger({ verbose: true, level: "debug" });
|
|
5298
6335
|
}
|
|
5299
|
-
const scenarioPath =
|
|
5300
|
-
if (!
|
|
6336
|
+
const scenarioPath = resolve8(scenarioArg);
|
|
6337
|
+
if (!existsSync12(scenarioPath)) {
|
|
5301
6338
|
process.stderr.write(`Error: Scenario file not found: ${scenarioPath}
|
|
5302
6339
|
`);
|
|
5303
6340
|
process.exit(1);
|
|
@@ -5387,26 +6424,20 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5387
6424
|
process.stderr.write("Error: --pass-threshold must be a number between 0 and 100\n");
|
|
5388
6425
|
process.exit(1);
|
|
5389
6426
|
}
|
|
5390
|
-
|
|
5391
|
-
|
|
5392
|
-
|
|
5393
|
-
|
|
6427
|
+
let engine;
|
|
6428
|
+
try {
|
|
6429
|
+
engine = resolveEngineConfig(opts, timeout);
|
|
6430
|
+
} catch (err) {
|
|
6431
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
6432
|
+
process.stderr.write(`Error: ${message}
|
|
6433
|
+
`);
|
|
5394
6434
|
process.exit(1);
|
|
5395
6435
|
}
|
|
5396
|
-
|
|
5397
|
-
if (opts.openclawTimeout) {
|
|
5398
|
-
openclawTimeout = parseInt(opts.openclawTimeout, 10);
|
|
5399
|
-
if (Number.isNaN(openclawTimeout) || openclawTimeout <= 0) {
|
|
5400
|
-
process.stderr.write("Error: --openclaw-timeout must be a positive integer\n");
|
|
5401
|
-
process.exit(1);
|
|
5402
|
-
}
|
|
5403
|
-
}
|
|
5404
|
-
const resolvedOpenClawToken = resolveOpenClawGatewayToken(opts.openclawToken);
|
|
5405
|
-
if (opts.openclawUrl && !resolvedOpenClawToken) {
|
|
6436
|
+
if (engine.deprecatedAliasesUsed.length > 0) {
|
|
5406
6437
|
process.stderr.write(
|
|
5407
|
-
|
|
6438
|
+
`Warning: OpenClaw flags are deprecated (${engine.deprecatedAliasesUsed.join(", ")}). Use --engine-* equivalents.
|
|
6439
|
+
`
|
|
5408
6440
|
);
|
|
5409
|
-
process.exit(1);
|
|
5410
6441
|
}
|
|
5411
6442
|
{
|
|
5412
6443
|
const sessionResult = await startSession(credentials.token, {
|
|
@@ -5433,9 +6464,9 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5433
6464
|
if (!runFailureMessage && Object.keys(endpointRoots).length > 0) {
|
|
5434
6465
|
cloudTwinUrls = endpointRoots;
|
|
5435
6466
|
}
|
|
5436
|
-
if (!runFailureMessage &&
|
|
5437
|
-
generatedTwinUrlMapPath =
|
|
5438
|
-
`.archal-session-${backendSessionId}-
|
|
6467
|
+
if (!runFailureMessage && engine.mode === "api" && !engine.twinUrlsPath) {
|
|
6468
|
+
generatedTwinUrlMapPath = resolve8(
|
|
6469
|
+
`.archal-session-${backendSessionId}-engine-twin-urls.json`
|
|
5439
6470
|
);
|
|
5440
6471
|
writeFileSync9(
|
|
5441
6472
|
generatedTwinUrlMapPath,
|
|
@@ -5444,7 +6475,7 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5444
6475
|
);
|
|
5445
6476
|
}
|
|
5446
6477
|
if (!runFailureMessage && !opts.apiBaseUrls && apiBaseUrls && Object.keys(apiBaseUrls).length > 0) {
|
|
5447
|
-
generatedApiBaseUrlMapPath =
|
|
6478
|
+
generatedApiBaseUrlMapPath = resolve8(
|
|
5448
6479
|
`.archal-session-${backendSessionId}-api-base-urls.json`
|
|
5449
6480
|
);
|
|
5450
6481
|
writeFileSync9(
|
|
@@ -5454,15 +6485,34 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5454
6485
|
);
|
|
5455
6486
|
}
|
|
5456
6487
|
if (!runFailureMessage) {
|
|
5457
|
-
const
|
|
5458
|
-
|
|
5459
|
-
|
|
5460
|
-
|
|
5461
|
-
|
|
5462
|
-
|
|
6488
|
+
const SESSION_READY_TIMEOUT_MS = 12e4;
|
|
6489
|
+
const SESSION_POLL_INTERVAL_MS = 3e3;
|
|
6490
|
+
const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
|
|
6491
|
+
let sessionReady = false;
|
|
6492
|
+
while (Date.now() < readyDeadline) {
|
|
6493
|
+
const [statusResult, healthResult] = await Promise.all([
|
|
6494
|
+
getSessionStatus(credentials.token, backendSessionId),
|
|
6495
|
+
getSessionHealth(credentials.token, backendSessionId)
|
|
6496
|
+
]);
|
|
6497
|
+
if (!statusResult.ok) {
|
|
6498
|
+
runFailureMessage = `session status check failed (${statusResult.error})`;
|
|
6499
|
+
break;
|
|
6500
|
+
}
|
|
6501
|
+
const status = statusResult.data.status;
|
|
6502
|
+
if (status === "failed" || status === "expired" || status === "ended") {
|
|
6503
|
+
runFailureMessage = `session ${status}`;
|
|
6504
|
+
break;
|
|
6505
|
+
}
|
|
6506
|
+
const healthAlive = healthResult.ok && healthResult.data.alive;
|
|
6507
|
+
const statusAlive = statusResult.data.alive || statusResult.data.status === "ready";
|
|
6508
|
+
if (statusAlive && healthAlive) {
|
|
6509
|
+
sessionReady = true;
|
|
6510
|
+
break;
|
|
6511
|
+
}
|
|
6512
|
+
await new Promise((resolve13) => setTimeout(resolve13, SESSION_POLL_INTERVAL_MS));
|
|
5463
6513
|
}
|
|
5464
|
-
if (!
|
|
5465
|
-
runFailureMessage =
|
|
6514
|
+
if (!sessionReady && !runFailureMessage) {
|
|
6515
|
+
runFailureMessage = "session timed out waiting for twins to become ready";
|
|
5466
6516
|
}
|
|
5467
6517
|
}
|
|
5468
6518
|
} else if (!sessionResult.offline) {
|
|
@@ -5482,17 +6532,26 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5482
6532
|
output: outputFormat,
|
|
5483
6533
|
seed: opts.seed,
|
|
5484
6534
|
rateLimit,
|
|
6535
|
+
engineEndpoint: engine.endpoint,
|
|
6536
|
+
engineToken: engine.token,
|
|
6537
|
+
engineModel: engine.model,
|
|
6538
|
+
engineTwinUrls: generatedTwinUrlMapPath ?? engine.twinUrlsPath,
|
|
6539
|
+
engineTimeout: engine.timeoutSeconds,
|
|
6540
|
+
harnessDir: engine.harnessDir,
|
|
5485
6541
|
openclawUrl: opts.openclawUrl,
|
|
5486
|
-
openclawToken:
|
|
6542
|
+
openclawToken: engine.token,
|
|
5487
6543
|
openclawAgent: opts.openclawAgent,
|
|
5488
6544
|
openclawTwinUrls: generatedTwinUrlMapPath ?? opts.openclawTwinUrls,
|
|
5489
|
-
openclawTimeout,
|
|
6545
|
+
openclawTimeout: engine.timeoutSeconds,
|
|
5490
6546
|
apiBaseUrls: generatedApiBaseUrlMapPath ?? opts.apiBaseUrls,
|
|
5491
6547
|
apiProxyUrl: opts.apiProxyUrl,
|
|
5492
6548
|
preflightOnly: opts.preflightOnly,
|
|
5493
6549
|
cloudTwinUrls,
|
|
5494
6550
|
noDynamicSeed: !opts.dynamicSeed,
|
|
5495
|
-
noSeedCache: !opts.seedCache
|
|
6551
|
+
noSeedCache: !opts.seedCache,
|
|
6552
|
+
apiBearerToken: credentials.token,
|
|
6553
|
+
apiAdminToken: process.env["ARCHAL_RUNTIME_ADMIN_TOKEN"],
|
|
6554
|
+
apiAdminUserId: process.env["ARCHAL_RUNTIME_USER_ID"]
|
|
5496
6555
|
});
|
|
5497
6556
|
if (!opts.preflightOnly && report.satisfactionScore < passThreshold) {
|
|
5498
6557
|
runFailureMessage = `Satisfaction score ${report.satisfactionScore.toFixed(1)} is below pass threshold ${passThreshold}`;
|
|
@@ -5502,10 +6561,10 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5502
6561
|
const message = err instanceof Error ? err.message : String(err);
|
|
5503
6562
|
runFailureMessage = message;
|
|
5504
6563
|
} finally {
|
|
5505
|
-
if (generatedTwinUrlMapPath &&
|
|
6564
|
+
if (generatedTwinUrlMapPath && existsSync12(generatedTwinUrlMapPath)) {
|
|
5506
6565
|
unlinkSync7(generatedTwinUrlMapPath);
|
|
5507
6566
|
}
|
|
5508
|
-
if (generatedApiBaseUrlMapPath &&
|
|
6567
|
+
if (generatedApiBaseUrlMapPath && existsSync12(generatedApiBaseUrlMapPath)) {
|
|
5509
6568
|
unlinkSync7(generatedApiBaseUrlMapPath);
|
|
5510
6569
|
}
|
|
5511
6570
|
if (backendSessionId) {
|
|
@@ -5566,10 +6625,90 @@ Run \`archal twins select\` to choose your twins, or visit https://archal.ai/das
|
|
|
5566
6625
|
});
|
|
5567
6626
|
return cmd;
|
|
5568
6627
|
}
|
|
5569
|
-
function
|
|
6628
|
+
function resolveEngineConfig(opts, runTimeoutSeconds) {
|
|
6629
|
+
const deprecatedAliasesUsed = collectDeprecatedAliases(opts);
|
|
6630
|
+
const mode = resolveEngineMode(opts);
|
|
6631
|
+
const openclawEndpointAlias = firstNonEmpty(opts.openclawUrl, process.env["OPENCLAW_URL"]);
|
|
6632
|
+
const endpoint = firstNonEmpty(
|
|
6633
|
+
opts.engineEndpoint,
|
|
6634
|
+
openclawEndpointAlias,
|
|
6635
|
+
process.env["ARCHAL_ENGINE_ENDPOINT"]
|
|
6636
|
+
);
|
|
6637
|
+
const token = resolveEngineToken(firstNonEmpty(opts.engineToken, opts.openclawToken));
|
|
6638
|
+
const openclawModel = resolveOpenClawModel(firstNonEmpty(opts.openclawAgent, process.env["OPENCLAW_AGENT_ID"]));
|
|
6639
|
+
const model = firstNonEmpty(
|
|
6640
|
+
opts.engineModel,
|
|
6641
|
+
process.env["ARCHAL_ENGINE_MODEL"],
|
|
6642
|
+
openclawModel,
|
|
6643
|
+
// Legacy OpenClaw alias path keeps the historical default model for compatibility.
|
|
6644
|
+
openclawEndpointAlias ? "openclaw:main" : void 0
|
|
6645
|
+
);
|
|
6646
|
+
const timeoutInput = firstNonEmpty(
|
|
6647
|
+
opts.engineTimeout,
|
|
6648
|
+
opts.openclawTimeout,
|
|
6649
|
+
process.env["ARCHAL_ENGINE_TIMEOUT"]
|
|
6650
|
+
);
|
|
6651
|
+
const timeoutSeconds = mode === "api" ? parsePositiveInteger(timeoutInput, "--engine-timeout") ?? runTimeoutSeconds : runTimeoutSeconds;
|
|
6652
|
+
const twinUrlsPath = firstNonEmpty(
|
|
6653
|
+
opts.engineTwinUrls,
|
|
6654
|
+
opts.openclawTwinUrls,
|
|
6655
|
+
process.env["ARCHAL_ENGINE_TWIN_URLS"]
|
|
6656
|
+
);
|
|
6657
|
+
const harnessDir = firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"]);
|
|
6658
|
+
if (mode === "api") {
|
|
6659
|
+
if (!model) {
|
|
6660
|
+
throw new Error(
|
|
6661
|
+
"--engine-model is required for API mode (or use --openclaw-agent/OPENCLAW_AGENT_ID)."
|
|
6662
|
+
);
|
|
6663
|
+
}
|
|
6664
|
+
if (openclawEndpointAlias && !token) {
|
|
6665
|
+
throw new Error(
|
|
6666
|
+
"OpenClaw gateway auth is required when --openclaw-url is set. Provide --openclaw-token or set OPENCLAW_GATEWAY_TOKEN/OPENCLAW_GATEWAY_PASSWORD."
|
|
6667
|
+
);
|
|
6668
|
+
}
|
|
6669
|
+
}
|
|
6670
|
+
return {
|
|
6671
|
+
mode,
|
|
6672
|
+
endpoint,
|
|
6673
|
+
token,
|
|
6674
|
+
model,
|
|
6675
|
+
twinUrlsPath,
|
|
6676
|
+
timeoutSeconds,
|
|
6677
|
+
harnessDir,
|
|
6678
|
+
deprecatedAliasesUsed
|
|
6679
|
+
};
|
|
6680
|
+
}
|
|
6681
|
+
function resolveEngineMode(opts) {
|
|
6682
|
+
if (firstNonEmpty(
|
|
6683
|
+
opts.engineEndpoint,
|
|
6684
|
+
opts.openclawUrl,
|
|
6685
|
+
process.env["ARCHAL_ENGINE_ENDPOINT"],
|
|
6686
|
+
process.env["OPENCLAW_URL"]
|
|
6687
|
+
)) {
|
|
6688
|
+
return "api";
|
|
6689
|
+
}
|
|
6690
|
+
if (firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"])) {
|
|
6691
|
+
return "local";
|
|
6692
|
+
}
|
|
6693
|
+
throw new Error(
|
|
6694
|
+
"No agent execution mode configured. Provide --engine-endpoint for remote agent execution, or --harness-dir for local agent execution."
|
|
6695
|
+
);
|
|
6696
|
+
}
|
|
6697
|
+
function resolveOpenClawModel(raw) {
|
|
6698
|
+
if (!raw || !raw.trim()) {
|
|
6699
|
+
return void 0;
|
|
6700
|
+
}
|
|
6701
|
+
const value = raw.trim();
|
|
6702
|
+
return value.includes(":") ? value : `openclaw:${value}`;
|
|
6703
|
+
}
|
|
6704
|
+
function resolveEngineToken(rawToken) {
|
|
5570
6705
|
if (rawToken && rawToken.trim()) {
|
|
5571
6706
|
return rawToken.trim();
|
|
5572
6707
|
}
|
|
6708
|
+
const engineToken = process.env["ARCHAL_ENGINE_TOKEN"]?.trim();
|
|
6709
|
+
if (engineToken) {
|
|
6710
|
+
return engineToken;
|
|
6711
|
+
}
|
|
5573
6712
|
const token = process.env["OPENCLAW_GATEWAY_TOKEN"]?.trim();
|
|
5574
6713
|
if (token) {
|
|
5575
6714
|
return token;
|
|
@@ -5580,11 +6719,36 @@ function resolveOpenClawGatewayToken(rawToken) {
|
|
|
5580
6719
|
}
|
|
5581
6720
|
return void 0;
|
|
5582
6721
|
}
|
|
6722
|
+
function firstNonEmpty(...values) {
|
|
6723
|
+
for (const value of values) {
|
|
6724
|
+
if (value && value.trim()) {
|
|
6725
|
+
return value.trim();
|
|
6726
|
+
}
|
|
6727
|
+
}
|
|
6728
|
+
return void 0;
|
|
6729
|
+
}
|
|
6730
|
+
function parsePositiveInteger(raw, flagName) {
|
|
6731
|
+
if (!raw) return void 0;
|
|
6732
|
+
const parsed = parseInt(raw, 10);
|
|
6733
|
+
if (Number.isNaN(parsed) || parsed <= 0) {
|
|
6734
|
+
throw new Error(`${flagName} must be a positive integer`);
|
|
6735
|
+
}
|
|
6736
|
+
return parsed;
|
|
6737
|
+
}
|
|
6738
|
+
function collectDeprecatedAliases(opts) {
|
|
6739
|
+
const aliases = [];
|
|
6740
|
+
if (opts.openclawUrl) aliases.push("--openclaw-url");
|
|
6741
|
+
if (opts.openclawToken) aliases.push("--openclaw-token");
|
|
6742
|
+
if (opts.openclawAgent) aliases.push("--openclaw-agent");
|
|
6743
|
+
if (opts.openclawTwinUrls) aliases.push("--openclaw-twin-urls");
|
|
6744
|
+
if (opts.openclawTimeout) aliases.push("--openclaw-timeout");
|
|
6745
|
+
return aliases;
|
|
6746
|
+
}
|
|
5583
6747
|
|
|
5584
6748
|
// src/commands/init.ts
|
|
5585
6749
|
import { Command as Command4 } from "commander";
|
|
5586
|
-
import { existsSync as
|
|
5587
|
-
import { join as join9, resolve as
|
|
6750
|
+
import { existsSync as existsSync13, mkdirSync as mkdirSync6, writeFileSync as writeFileSync10 } from "fs";
|
|
6751
|
+
import { join as join9, resolve as resolve9 } from "path";
|
|
5588
6752
|
var SAMPLE_SCENARIO = `# Close Stale Issues
|
|
5589
6753
|
|
|
5590
6754
|
## Setup
|
|
@@ -5759,7 +6923,7 @@ var SAMPLE_PACKAGE_JSON = `{
|
|
|
5759
6923
|
}
|
|
5760
6924
|
`;
|
|
5761
6925
|
function writeIfMissing(filePath, content) {
|
|
5762
|
-
if (!
|
|
6926
|
+
if (!existsSync13(filePath)) {
|
|
5763
6927
|
writeFileSync10(filePath, content);
|
|
5764
6928
|
info(`Created ${filePath}`);
|
|
5765
6929
|
} else {
|
|
@@ -5768,8 +6932,8 @@ function writeIfMissing(filePath, content) {
|
|
|
5768
6932
|
}
|
|
5769
6933
|
function createInitCommand() {
|
|
5770
6934
|
const cmd = new Command4("init").description("Initialize an Archal test directory with sample scenario and agent").argument("[directory]", "Directory to initialize", "archal").action((directory) => {
|
|
5771
|
-
const targetDir =
|
|
5772
|
-
if (
|
|
6935
|
+
const targetDir = resolve9(directory);
|
|
6936
|
+
if (existsSync13(targetDir)) {
|
|
5773
6937
|
warn(`Directory already exists: ${targetDir}`);
|
|
5774
6938
|
warn("Skipping files that already exist.");
|
|
5775
6939
|
} else {
|
|
@@ -5792,23 +6956,10 @@ function createInitCommand() {
|
|
|
5792
6956
|
return cmd;
|
|
5793
6957
|
}
|
|
5794
6958
|
|
|
5795
|
-
// src/commands/twins.ts
|
|
5796
|
-
import { Command as Command5 } from "commander";
|
|
5797
|
-
function createTwinsCommand() {
|
|
5798
|
-
const cmd = new Command5("twins").description("Manage twin catalog entitlements");
|
|
5799
|
-
cmd.command("list").description("List available twins and entitlement status").action(async () => {
|
|
5800
|
-
await listTwinCatalog();
|
|
5801
|
-
});
|
|
5802
|
-
cmd.command("select").description("Choose which twins to use on your free plan").action(async () => {
|
|
5803
|
-
await selectTwinsForPlan();
|
|
5804
|
-
});
|
|
5805
|
-
return cmd;
|
|
5806
|
-
}
|
|
5807
|
-
|
|
5808
6959
|
// src/commands/scenario.ts
|
|
5809
|
-
import { Command as
|
|
5810
|
-
import { existsSync as
|
|
5811
|
-
import { resolve as
|
|
6960
|
+
import { Command as Command5 } from "commander";
|
|
6961
|
+
import { existsSync as existsSync14, readdirSync as readdirSync3, writeFileSync as writeFileSync11, mkdirSync as mkdirSync7 } from "fs";
|
|
6962
|
+
import { resolve as resolve10, join as join10, extname, relative } from "path";
|
|
5812
6963
|
var SCENARIO_TEMPLATE = `# {{NAME}}
|
|
5813
6964
|
|
|
5814
6965
|
## Setup
|
|
@@ -5834,15 +6985,15 @@ timeout: 120
|
|
|
5834
6985
|
runs: 5
|
|
5835
6986
|
`;
|
|
5836
6987
|
var SCENARIO_DIR_CANDIDATES = [
|
|
5837
|
-
|
|
5838
|
-
|
|
5839
|
-
|
|
5840
|
-
|
|
5841
|
-
|
|
6988
|
+
resolve10("scenarios"),
|
|
6989
|
+
resolve10("scenario"),
|
|
6990
|
+
resolve10("test", "scenarios"),
|
|
6991
|
+
resolve10("tests", "scenarios"),
|
|
6992
|
+
resolve10(".archal", "scenarios")
|
|
5842
6993
|
];
|
|
5843
6994
|
function findScenarioFiles(dir) {
|
|
5844
6995
|
const files = [];
|
|
5845
|
-
if (!
|
|
6996
|
+
if (!existsSync14(dir)) return files;
|
|
5846
6997
|
const entries = readdirSync3(dir, { withFileTypes: true });
|
|
5847
6998
|
for (const entry of entries) {
|
|
5848
6999
|
const fullPath = join10(dir, entry.name);
|
|
@@ -5856,22 +7007,19 @@ function findScenarioFiles(dir) {
|
|
|
5856
7007
|
}
|
|
5857
7008
|
function findLocalScenariosDir() {
|
|
5858
7009
|
for (const candidate of SCENARIO_DIR_CANDIDATES) {
|
|
5859
|
-
if (
|
|
7010
|
+
if (existsSync14(candidate)) {
|
|
5860
7011
|
return { dir: candidate, candidates: SCENARIO_DIR_CANDIDATES };
|
|
5861
7012
|
}
|
|
5862
7013
|
}
|
|
5863
7014
|
return {
|
|
5864
|
-
dir:
|
|
7015
|
+
dir: resolve10("scenarios"),
|
|
5865
7016
|
candidates: SCENARIO_DIR_CANDIDATES
|
|
5866
7017
|
};
|
|
5867
7018
|
}
|
|
5868
7019
|
function toDisplayPath(path) {
|
|
5869
|
-
const
|
|
5870
|
-
if (
|
|
5871
|
-
|
|
5872
|
-
return `.${path.slice(cwd.length)}`;
|
|
5873
|
-
}
|
|
5874
|
-
return path;
|
|
7020
|
+
const rel = relative(resolve10("."), path);
|
|
7021
|
+
if (!rel) return ".";
|
|
7022
|
+
return rel.startsWith("..") ? path : rel;
|
|
5875
7023
|
}
|
|
5876
7024
|
function getCachedScenariosDir() {
|
|
5877
7025
|
return join10(ensureArchalDir(), "scenarios");
|
|
@@ -5897,14 +7045,14 @@ async function syncRemoteScenarios(token) {
|
|
|
5897
7045
|
return scenarios;
|
|
5898
7046
|
}
|
|
5899
7047
|
function createScenarioCommand() {
|
|
5900
|
-
const cmd = new
|
|
7048
|
+
const cmd = new Command5("scenario").description("Manage test scenarios");
|
|
5901
7049
|
cmd.command("list").description("List available scenarios").option("-d, --dir <directory>", "Scenario directory to search").option("--local", "Only show local scenarios (skip remote fetch)").action(async (opts) => {
|
|
5902
7050
|
const creds = getCredentials();
|
|
5903
7051
|
const headers = ["Scenario", "Source", "Criteria", "Twins"];
|
|
5904
7052
|
const rows = [];
|
|
5905
|
-
const localResolution = opts.dir ? { dir:
|
|
7053
|
+
const localResolution = opts.dir ? { dir: resolve10(opts.dir), candidates: [resolve10(opts.dir)] } : findLocalScenariosDir();
|
|
5906
7054
|
const localDir = localResolution.dir;
|
|
5907
|
-
if (
|
|
7055
|
+
if (existsSync14(localDir)) {
|
|
5908
7056
|
const localFiles = findScenarioFiles(localDir);
|
|
5909
7057
|
let hiddenCount = 0;
|
|
5910
7058
|
for (const file of localFiles) {
|
|
@@ -5917,7 +7065,7 @@ function createScenarioCommand() {
|
|
|
5917
7065
|
continue;
|
|
5918
7066
|
}
|
|
5919
7067
|
}
|
|
5920
|
-
const relativePath =
|
|
7068
|
+
const relativePath = relative(resolve10("."), file);
|
|
5921
7069
|
rows.push([
|
|
5922
7070
|
scenario.title,
|
|
5923
7071
|
relativePath,
|
|
@@ -5926,7 +7074,7 @@ function createScenarioCommand() {
|
|
|
5926
7074
|
]);
|
|
5927
7075
|
} catch (err) {
|
|
5928
7076
|
const message = err instanceof Error ? err.message : String(err);
|
|
5929
|
-
const relativePath =
|
|
7077
|
+
const relativePath = relative(resolve10("."), file);
|
|
5930
7078
|
rows.push([`(parse error)`, relativePath, "-", message]);
|
|
5931
7079
|
}
|
|
5932
7080
|
}
|
|
@@ -5971,8 +7119,8 @@ function createScenarioCommand() {
|
|
|
5971
7119
|
Found ${rows.length} scenario(s)`);
|
|
5972
7120
|
});
|
|
5973
7121
|
cmd.command("validate").description("Parse and validate a scenario file").argument("<file>", "Path to scenario markdown file").action((file) => {
|
|
5974
|
-
const filePath =
|
|
5975
|
-
if (!
|
|
7122
|
+
const filePath = resolve10(file);
|
|
7123
|
+
if (!existsSync14(filePath)) {
|
|
5976
7124
|
error(`File not found: ${filePath}`);
|
|
5977
7125
|
process.exit(1);
|
|
5978
7126
|
}
|
|
@@ -6014,14 +7162,14 @@ Found ${rows.length} scenario(s)`);
|
|
|
6014
7162
|
info("Run `archal twins select` to change your selection or `archal upgrade` to unlock all twins.");
|
|
6015
7163
|
process.exit(1);
|
|
6016
7164
|
}
|
|
6017
|
-
const scenariosDir = opts.dir ?
|
|
6018
|
-
if (!
|
|
7165
|
+
const scenariosDir = opts.dir ? resolve10(opts.dir) : findLocalScenariosDir().dir;
|
|
7166
|
+
if (!existsSync14(scenariosDir)) {
|
|
6019
7167
|
mkdirSync7(scenariosDir, { recursive: true });
|
|
6020
7168
|
info(`Created scenarios directory: ${scenariosDir}`);
|
|
6021
7169
|
}
|
|
6022
7170
|
const fileName = name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "") + ".md";
|
|
6023
7171
|
const filePath = join10(scenariosDir, fileName);
|
|
6024
|
-
if (
|
|
7172
|
+
if (existsSync14(filePath)) {
|
|
6025
7173
|
error(`Scenario file already exists: ${filePath}`);
|
|
6026
7174
|
process.exit(1);
|
|
6027
7175
|
}
|
|
@@ -6038,9 +7186,9 @@ Found ${rows.length} scenario(s)`);
|
|
|
6038
7186
|
|
|
6039
7187
|
// src/commands/trace.ts
|
|
6040
7188
|
import { writeFileSync as writeFileSync12 } from "fs";
|
|
6041
|
-
import { resolve as
|
|
7189
|
+
import { resolve as resolve11 } from "path";
|
|
6042
7190
|
import { createInterface as createInterface3 } from "readline";
|
|
6043
|
-
import { Command as
|
|
7191
|
+
import { Command as Command6 } from "commander";
|
|
6044
7192
|
function formatTimestamp2(iso) {
|
|
6045
7193
|
try {
|
|
6046
7194
|
return new Date(iso).toLocaleString();
|
|
@@ -6063,10 +7211,10 @@ var TRACE_HEADERS = ["ID", "Scenario", "Score", "Runs", "Entries", "Timestamp"];
|
|
|
6063
7211
|
function confirmPrompt(message) {
|
|
6064
7212
|
if (!process.stdin.isTTY) return Promise.resolve(false);
|
|
6065
7213
|
const rl = createInterface3({ input: process.stdin, output: process.stderr });
|
|
6066
|
-
return new Promise((
|
|
7214
|
+
return new Promise((resolve13) => {
|
|
6067
7215
|
rl.question(`${message} [y/N] `, (answer) => {
|
|
6068
7216
|
rl.close();
|
|
6069
|
-
|
|
7217
|
+
resolve13(answer.trim().toLowerCase() === "y");
|
|
6070
7218
|
});
|
|
6071
7219
|
});
|
|
6072
7220
|
}
|
|
@@ -6079,7 +7227,7 @@ function parsePositiveInt(val, flag) {
|
|
|
6079
7227
|
return n;
|
|
6080
7228
|
}
|
|
6081
7229
|
function createTraceCommand() {
|
|
6082
|
-
const cmd = new
|
|
7230
|
+
const cmd = new Command6("trace").description("Inspect, search, and manage run traces");
|
|
6083
7231
|
cmd.command("list").description("List recent traces").option("-n, --limit <count>", "Number of traces to show", "20").action((opts) => {
|
|
6084
7232
|
const traces = listTraces(parsePositiveInt(opts.limit, "--limit"));
|
|
6085
7233
|
if (traces.length === 0) {
|
|
@@ -6183,7 +7331,7 @@ ${traces.length} trace(s) found`);
|
|
|
6183
7331
|
process.exit(1);
|
|
6184
7332
|
}
|
|
6185
7333
|
if (opts.output) {
|
|
6186
|
-
const outPath =
|
|
7334
|
+
const outPath = resolve11(opts.output);
|
|
6187
7335
|
writeFileSync12(outPath, json, "utf-8");
|
|
6188
7336
|
info(`Trace exported to: ${outPath}`);
|
|
6189
7337
|
} else {
|
|
@@ -6260,10 +7408,10 @@ ${traces.length} trace(s) found`);
|
|
|
6260
7408
|
}
|
|
6261
7409
|
|
|
6262
7410
|
// src/commands/config.ts
|
|
6263
|
-
import { existsSync as
|
|
6264
|
-
import { Command as
|
|
7411
|
+
import { existsSync as existsSync15, unlinkSync as unlinkSync8 } from "fs";
|
|
7412
|
+
import { Command as Command7 } from "commander";
|
|
6265
7413
|
function createConfigCommand() {
|
|
6266
|
-
const cmd = new
|
|
7414
|
+
const cmd = new Command7("config").description("Manage Archal configuration");
|
|
6267
7415
|
cmd.command("show").description("Print current configuration").option("--json", "Output as JSON").action((opts) => {
|
|
6268
7416
|
const display = getConfigDisplay();
|
|
6269
7417
|
if (opts.json) {
|
|
@@ -6279,6 +7427,11 @@ function createConfigCommand() {
|
|
|
6279
7427
|
model: evaluator["model"] ?? "(not set)",
|
|
6280
7428
|
apiKey: evaluator["apiKey"] ?? "(not set)"
|
|
6281
7429
|
});
|
|
7430
|
+
const seedGen = display["seedGeneration"];
|
|
7431
|
+
printConfigSection("Seed Generation", {
|
|
7432
|
+
model: seedGen["model"] ?? "(not set)",
|
|
7433
|
+
geminiApiKey: seedGen["geminiApiKey"] ?? "(not set)"
|
|
7434
|
+
});
|
|
6282
7435
|
const defaults = display["defaults"];
|
|
6283
7436
|
printConfigSection("Defaults", {
|
|
6284
7437
|
runs: String(defaults["runs"]),
|
|
@@ -6291,12 +7444,16 @@ function createConfigCommand() {
|
|
|
6291
7444
|
});
|
|
6292
7445
|
process.stdout.write("\n");
|
|
6293
7446
|
info("Set values with: archal config set <key> <value>");
|
|
6294
|
-
info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, defaults.runs, defaults.timeout");
|
|
7447
|
+
info("Valid keys: telemetry, evaluator.model, evaluator.apiKey, seedGeneration.model, seedGeneration.geminiApiKey, defaults.runs, defaults.timeout");
|
|
6295
7448
|
});
|
|
6296
7449
|
cmd.command("set").description("Set a configuration value").argument("<key>", "Configuration key (e.g., evaluator.model, defaults.runs)").argument("<value>", "Value to set").action((key, value) => {
|
|
6297
7450
|
try {
|
|
6298
7451
|
setConfigValue(key, value);
|
|
6299
7452
|
success(`Set ${key} = ${key.includes("apiKey") ? "***" : value}`);
|
|
7453
|
+
if (key.includes("apiKey") && !value.startsWith("env:")) {
|
|
7454
|
+
warn("API key stored in plaintext in config file. Consider using env: prefix instead:");
|
|
7455
|
+
info(` archal config set ${key} env:YOUR_ENV_VAR_NAME`);
|
|
7456
|
+
}
|
|
6300
7457
|
} catch (err) {
|
|
6301
7458
|
const message = err instanceof Error ? err.message : String(err);
|
|
6302
7459
|
error(message);
|
|
@@ -6306,7 +7463,7 @@ function createConfigCommand() {
|
|
|
6306
7463
|
cmd.command("init").description("Create default configuration file").option("--force", "Overwrite existing config").action((opts) => {
|
|
6307
7464
|
const configPath = getConfigPath();
|
|
6308
7465
|
if (opts.force) {
|
|
6309
|
-
if (
|
|
7466
|
+
if (existsSync15(configPath)) {
|
|
6310
7467
|
unlinkSync8(configPath);
|
|
6311
7468
|
}
|
|
6312
7469
|
}
|
|
@@ -6316,7 +7473,7 @@ function createConfigCommand() {
|
|
|
6316
7473
|
info("\nNext steps:");
|
|
6317
7474
|
info(" 1. Set your API key:");
|
|
6318
7475
|
info(" archal config set evaluator.apiKey your-key-here");
|
|
6319
|
-
info(" or set
|
|
7476
|
+
info(" or set GEMINI_API_KEY environment variable (default provider)");
|
|
6320
7477
|
info("");
|
|
6321
7478
|
info(" 2. Create a scenario:");
|
|
6322
7479
|
info(" archal scenario create my-first-test");
|
|
@@ -6345,31 +7502,33 @@ function printConfigSection(name, values) {
|
|
|
6345
7502
|
}
|
|
6346
7503
|
|
|
6347
7504
|
// src/commands/demo.ts
|
|
6348
|
-
import { Command as
|
|
6349
|
-
import { existsSync as
|
|
6350
|
-
import { resolve as
|
|
6351
|
-
import { fileURLToPath as
|
|
7505
|
+
import { Command as Command8 } from "commander";
|
|
7506
|
+
import { existsSync as existsSync16 } from "fs";
|
|
7507
|
+
import { resolve as resolve12, dirname as dirname4 } from "path";
|
|
7508
|
+
import { fileURLToPath as fileURLToPath5 } from "url";
|
|
6352
7509
|
import { createRequire as createRequire4 } from "module";
|
|
6353
|
-
var
|
|
7510
|
+
var __dirname5 = fileURLToPath5(new URL(".", import.meta.url));
|
|
6354
7511
|
function resolveDemoDir() {
|
|
6355
|
-
const
|
|
6356
|
-
if (
|
|
6357
|
-
return
|
|
7512
|
+
const demoDir = resolve12(__dirname5, "..", "demo");
|
|
7513
|
+
if (existsSync16(resolve12(demoDir, "scenario.md"))) {
|
|
7514
|
+
return demoDir;
|
|
6358
7515
|
}
|
|
6359
7516
|
try {
|
|
6360
7517
|
const require2 = createRequire4(import.meta.url);
|
|
6361
7518
|
const cliMain = require2.resolve("@archal/cli");
|
|
6362
7519
|
const pkgDir = dirname4(dirname4(cliMain));
|
|
6363
|
-
const npmDemoDir =
|
|
6364
|
-
if (
|
|
7520
|
+
const npmDemoDir = resolve12(pkgDir, "demo");
|
|
7521
|
+
if (existsSync16(resolve12(npmDemoDir, "scenario.md"))) {
|
|
6365
7522
|
return npmDemoDir;
|
|
6366
7523
|
}
|
|
6367
7524
|
} catch {
|
|
6368
7525
|
}
|
|
6369
|
-
throw new Error(
|
|
7526
|
+
throw new Error(
|
|
7527
|
+
"Demo files not found. Ensure @archal/cli is installed correctly.\nIf installed globally, try reinstalling: npm install -g @archal/cli"
|
|
7528
|
+
);
|
|
6370
7529
|
}
|
|
6371
7530
|
function createDemoCommand() {
|
|
6372
|
-
const cmd = new
|
|
7531
|
+
const cmd = new Command8("demo").description("Run a built-in demo: good agent vs bad agent on the same scenario").option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").action(async (opts) => {
|
|
6373
7532
|
if (opts.quiet) {
|
|
6374
7533
|
configureLogger({ quiet: true });
|
|
6375
7534
|
}
|
|
@@ -6377,9 +7536,9 @@ function createDemoCommand() {
|
|
|
6377
7536
|
configureLogger({ verbose: true, level: "debug" });
|
|
6378
7537
|
}
|
|
6379
7538
|
const demoDir = resolveDemoDir();
|
|
6380
|
-
const scenarioPath =
|
|
6381
|
-
const goodAgentPath =
|
|
6382
|
-
const badAgentPath =
|
|
7539
|
+
const scenarioPath = resolve12(demoDir, "scenario.md");
|
|
7540
|
+
const goodAgentPath = resolve12(demoDir, "good-agent.mjs");
|
|
7541
|
+
const badAgentPath = resolve12(demoDir, "bad-agent.mjs");
|
|
6383
7542
|
process.stderr.write("\n\x1B[36m\x1B[1marchal demo\x1B[0m \x1B[2m\u2014 same scenario, two agents\x1B[0m\n\n");
|
|
6384
7543
|
process.stderr.write("\x1B[1m\x1B[32m\u25B8 Good agent\x1B[0m \x1B[2m(checks labels, skips keep-open)\x1B[0m\n");
|
|
6385
7544
|
const goodReport = await runScenario({
|
|
@@ -6412,100 +7571,194 @@ function createDemoCommand() {
|
|
|
6412
7571
|
}
|
|
6413
7572
|
|
|
6414
7573
|
// src/commands/login.ts
|
|
6415
|
-
import { Command as
|
|
7574
|
+
import { Command as Command9 } from "commander";
|
|
6416
7575
|
import { exec } from "child_process";
|
|
6417
|
-
import { randomBytes } from "crypto";
|
|
7576
|
+
import { createHash as createHash3, randomBytes } from "crypto";
|
|
6418
7577
|
import { createServer } from "http";
|
|
6419
|
-
|
|
7578
|
+
function normalizeAuthUrl2(value) {
|
|
7579
|
+
const trimmed = value.trim().replace(/\/+$/, "");
|
|
7580
|
+
return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
|
|
7581
|
+
}
|
|
7582
|
+
var AUTH_BASE_URL3 = normalizeAuthUrl2(process.env["ARCHAL_AUTH_URL"] ?? "https://www.archal.ai");
|
|
6420
7583
|
var START_PORT = 51423;
|
|
6421
7584
|
var LOGIN_TIMEOUT_MS = 5 * 60 * 1e3;
|
|
7585
|
+
var TOKEN_FALLBACK_TTL_SECONDS = 10 * 365 * 24 * 60 * 60;
|
|
7586
|
+
function escapeHtml(value) {
|
|
7587
|
+
return value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """).replaceAll("'", "'");
|
|
7588
|
+
}
|
|
6422
7589
|
function openBrowser(url) {
|
|
6423
7590
|
const platform = process.platform;
|
|
6424
7591
|
const command = platform === "darwin" ? `open "${url}"` : platform === "win32" ? `start "" "${url}"` : `xdg-open "${url}"`;
|
|
6425
|
-
exec(command, () => {
|
|
7592
|
+
exec(command, (err) => {
|
|
7593
|
+
if (err) {
|
|
7594
|
+
info("Could not open browser automatically.");
|
|
7595
|
+
info(`Please visit the URL above manually to complete login.`);
|
|
7596
|
+
}
|
|
6426
7597
|
});
|
|
6427
7598
|
}
|
|
7599
|
+
function createPkcePair() {
|
|
7600
|
+
const codeVerifier = randomBytes(32).toString("base64url");
|
|
7601
|
+
const codeChallenge = createHash3("sha256").update(codeVerifier).digest("base64url");
|
|
7602
|
+
return { codeVerifier, codeChallenge };
|
|
7603
|
+
}
|
|
7604
|
+
function isPlan2(value) {
|
|
7605
|
+
return value === "free" || value === "pro" || value === "enterprise";
|
|
7606
|
+
}
|
|
7607
|
+
function credentialsFromApiToken(token) {
|
|
7608
|
+
const nowSeconds = Math.floor(Date.now() / 1e3);
|
|
7609
|
+
return {
|
|
7610
|
+
token,
|
|
7611
|
+
refreshToken: "",
|
|
7612
|
+
email: "(from token)",
|
|
7613
|
+
plan: "free",
|
|
7614
|
+
selectedTwins: [],
|
|
7615
|
+
expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
|
|
7616
|
+
};
|
|
7617
|
+
}
|
|
7618
|
+
function credentialsFromLegacyCallback(requestUrl) {
|
|
7619
|
+
const token = requestUrl.searchParams.get("token") ?? requestUrl.searchParams.get("access_token");
|
|
7620
|
+
const refreshToken = requestUrl.searchParams.get("refresh_token") ?? requestUrl.searchParams.get("refreshToken") ?? "";
|
|
7621
|
+
const email = requestUrl.searchParams.get("email");
|
|
7622
|
+
const planParam = requestUrl.searchParams.get("plan");
|
|
7623
|
+
const twins = requestUrl.searchParams.get("twins");
|
|
7624
|
+
if (!token || !email || !isPlan2(planParam)) {
|
|
7625
|
+
return null;
|
|
7626
|
+
}
|
|
7627
|
+
const nowSeconds = Math.floor(Date.now() / 1e3);
|
|
7628
|
+
return {
|
|
7629
|
+
token,
|
|
7630
|
+
refreshToken,
|
|
7631
|
+
email,
|
|
7632
|
+
plan: planParam,
|
|
7633
|
+
selectedTwins: twins ? twins.split(",").filter(Boolean) : [],
|
|
7634
|
+
expiresAt: getJwtExpiry(token) ?? nowSeconds + TOKEN_FALLBACK_TTL_SECONDS
|
|
7635
|
+
};
|
|
7636
|
+
}
|
|
6428
7637
|
function findFreePort(startPort) {
|
|
6429
|
-
return new Promise((
|
|
7638
|
+
return new Promise((resolve13, reject) => {
|
|
6430
7639
|
const server = createServer();
|
|
6431
7640
|
server.listen(startPort, "127.0.0.1", () => {
|
|
6432
7641
|
const address = server.address();
|
|
6433
7642
|
const port = typeof address === "object" && address ? address.port : startPort;
|
|
6434
|
-
server.close(() =>
|
|
7643
|
+
server.close(() => resolve13(port));
|
|
6435
7644
|
});
|
|
6436
7645
|
server.on("error", () => {
|
|
6437
7646
|
if (startPort < START_PORT + 100) {
|
|
6438
|
-
findFreePort(startPort + 1).then(
|
|
7647
|
+
findFreePort(startPort + 1).then(resolve13).catch(reject);
|
|
6439
7648
|
} else {
|
|
6440
|
-
reject(new Error(
|
|
7649
|
+
reject(new Error(
|
|
7650
|
+
"Could not find a free localhost callback port (tried ports 51423-51523).\nTry closing other services, or use token login: archal login --token <your-token>"
|
|
7651
|
+
));
|
|
6441
7652
|
}
|
|
6442
7653
|
});
|
|
6443
7654
|
});
|
|
6444
7655
|
}
|
|
6445
7656
|
function createLoginCommand() {
|
|
6446
|
-
return new
|
|
7657
|
+
return new Command9("login").description("Log in via archal.ai browser auth").option("--no-browser", "Do not automatically open the login URL in a browser").option("--token <token>", "Use an API key/token directly (CI/service fallback)").action(async (opts) => {
|
|
7658
|
+
const directToken = opts.token?.trim();
|
|
7659
|
+
if (directToken) {
|
|
7660
|
+
let credentials = credentialsFromApiToken(directToken);
|
|
7661
|
+
credentials = await refreshAuthFromServer(credentials);
|
|
7662
|
+
saveCredentials(credentials);
|
|
7663
|
+
success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
|
|
7664
|
+
return;
|
|
7665
|
+
}
|
|
6447
7666
|
const port = await findFreePort(START_PORT);
|
|
6448
7667
|
const state = randomBytes(16).toString("hex");
|
|
6449
7668
|
const redirectUrl = `http://localhost:${port}/callback`;
|
|
6450
|
-
const
|
|
7669
|
+
const { codeVerifier, codeChallenge } = createPkcePair();
|
|
7670
|
+
const authUrl = `${AUTH_BASE_URL3}/cli-auth?redirect=${encodeURIComponent(redirectUrl)}&state=${encodeURIComponent(state)}&code_challenge=${encodeURIComponent(codeChallenge)}&code_challenge_method=S256`;
|
|
6451
7671
|
info("Opening browser for authentication...");
|
|
6452
7672
|
info(`If your browser does not open, visit:
|
|
6453
7673
|
${authUrl}`);
|
|
6454
|
-
|
|
6455
|
-
|
|
6456
|
-
|
|
6457
|
-
|
|
6458
|
-
|
|
6459
|
-
|
|
6460
|
-
|
|
6461
|
-
|
|
6462
|
-
|
|
6463
|
-
|
|
6464
|
-
|
|
6465
|
-
|
|
6466
|
-
|
|
6467
|
-
|
|
6468
|
-
|
|
7674
|
+
if (opts.browser !== false) {
|
|
7675
|
+
openBrowser(authUrl);
|
|
7676
|
+
}
|
|
7677
|
+
await new Promise((resolve13, reject) => {
|
|
7678
|
+
let settled = false;
|
|
7679
|
+
const settleResolve = () => {
|
|
7680
|
+
if (settled) return;
|
|
7681
|
+
settled = true;
|
|
7682
|
+
resolve13();
|
|
7683
|
+
};
|
|
7684
|
+
const settleReject = (error2) => {
|
|
7685
|
+
if (settled) return;
|
|
7686
|
+
settled = true;
|
|
7687
|
+
reject(error2);
|
|
7688
|
+
};
|
|
7689
|
+
function closeAndResolve() {
|
|
7690
|
+
if (!server.listening) {
|
|
7691
|
+
settleResolve();
|
|
6469
7692
|
return;
|
|
6470
7693
|
}
|
|
6471
|
-
|
|
6472
|
-
|
|
6473
|
-
|
|
6474
|
-
|
|
6475
|
-
|
|
6476
|
-
if (!token || !email || !plan) {
|
|
6477
|
-
res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
|
|
6478
|
-
res.end("<h1>Login failed</h1><p>Missing callback parameters.</p>");
|
|
6479
|
-
server.close();
|
|
6480
|
-
reject(new Error("Missing token/email/plan in callback"));
|
|
7694
|
+
server.close(() => settleResolve());
|
|
7695
|
+
}
|
|
7696
|
+
function closeAndReject(error2) {
|
|
7697
|
+
if (!server.listening) {
|
|
7698
|
+
settleReject(error2);
|
|
6481
7699
|
return;
|
|
6482
7700
|
}
|
|
6483
|
-
|
|
6484
|
-
|
|
6485
|
-
|
|
6486
|
-
|
|
6487
|
-
|
|
6488
|
-
|
|
6489
|
-
|
|
6490
|
-
|
|
6491
|
-
|
|
6492
|
-
|
|
6493
|
-
|
|
6494
|
-
|
|
6495
|
-
|
|
6496
|
-
|
|
6497
|
-
|
|
6498
|
-
|
|
6499
|
-
|
|
6500
|
-
|
|
6501
|
-
|
|
7701
|
+
server.close(() => settleReject(error2));
|
|
7702
|
+
}
|
|
7703
|
+
const server = createServer((req, res) => {
|
|
7704
|
+
void (async () => {
|
|
7705
|
+
try {
|
|
7706
|
+
const requestUrl = new URL(req.url ?? "/", `http://localhost:${port}`);
|
|
7707
|
+
if (requestUrl.pathname !== "/callback") {
|
|
7708
|
+
res.writeHead(404);
|
|
7709
|
+
res.end("Not found");
|
|
7710
|
+
return;
|
|
7711
|
+
}
|
|
7712
|
+
const returnedState = requestUrl.searchParams.get("state");
|
|
7713
|
+
if (returnedState !== state) {
|
|
7714
|
+
res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
|
|
7715
|
+
res.end("<h1>Login failed</h1><p>State mismatch.</p>");
|
|
7716
|
+
closeAndReject(new Error("State mismatch in callback"));
|
|
7717
|
+
return;
|
|
7718
|
+
}
|
|
7719
|
+
const code = requestUrl.searchParams.get("code");
|
|
7720
|
+
const credentials = code ? await exchangeCliAuthCode({
|
|
7721
|
+
code,
|
|
7722
|
+
codeVerifier,
|
|
7723
|
+
redirectUri: redirectUrl
|
|
7724
|
+
}) : credentialsFromLegacyCallback(requestUrl);
|
|
7725
|
+
if (!credentials) {
|
|
7726
|
+
res.writeHead(400, { "content-type": "text/html; charset=utf-8" });
|
|
7727
|
+
res.end("<h1>Login failed</h1><p>Missing auth code.</p>");
|
|
7728
|
+
closeAndReject(new Error("Missing code in callback"));
|
|
7729
|
+
return;
|
|
7730
|
+
}
|
|
7731
|
+
saveCredentials(credentials);
|
|
7732
|
+
res.writeHead(200, { "content-type": "text/html; charset=utf-8" });
|
|
7733
|
+
res.end("<h1>Login successful</h1><p>You can close this tab.</p>");
|
|
7734
|
+
success(`Logged in as ${credentials.email} (${credentials.plan} plan)`);
|
|
7735
|
+
if (credentials.plan === "free" && credentials.selectedTwins.length === 0) {
|
|
7736
|
+
info(
|
|
7737
|
+
"You haven't selected any twins yet.\n Run `archal twins select` to choose up to 5 twins for your free plan."
|
|
7738
|
+
);
|
|
7739
|
+
}
|
|
7740
|
+
closeAndResolve();
|
|
7741
|
+
} catch (error2) {
|
|
7742
|
+
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
7743
|
+
if (!res.headersSent) {
|
|
7744
|
+
res.writeHead(500, { "content-type": "text/html; charset=utf-8" });
|
|
7745
|
+
res.end(`<h1>Login failed</h1><p>${escapeHtml(message)}</p>`);
|
|
7746
|
+
}
|
|
7747
|
+
closeAndReject(error2);
|
|
7748
|
+
}
|
|
7749
|
+
})().catch((error2) => {
|
|
7750
|
+
closeAndReject(error2);
|
|
7751
|
+
});
|
|
6502
7752
|
});
|
|
6503
|
-
server.listen(port, "127.0.0.1");
|
|
6504
7753
|
const timeout = setTimeout(() => {
|
|
6505
|
-
|
|
6506
|
-
reject(new Error("Login timed out. Run archal login again."));
|
|
7754
|
+
closeAndReject(new Error("Login timed out. Run archal login again."));
|
|
6507
7755
|
}, LOGIN_TIMEOUT_MS);
|
|
6508
7756
|
server.on("close", () => clearTimeout(timeout));
|
|
7757
|
+
server.once("error", (error2) => {
|
|
7758
|
+
clearTimeout(timeout);
|
|
7759
|
+
closeAndReject(error2);
|
|
7760
|
+
});
|
|
7761
|
+
server.listen(port, "127.0.0.1");
|
|
6509
7762
|
}).catch((error2) => {
|
|
6510
7763
|
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
6511
7764
|
error(message);
|
|
@@ -6515,9 +7768,9 @@ function createLoginCommand() {
|
|
|
6515
7768
|
}
|
|
6516
7769
|
|
|
6517
7770
|
// src/commands/logout.ts
|
|
6518
|
-
import { Command as
|
|
7771
|
+
import { Command as Command10 } from "commander";
|
|
6519
7772
|
function createLogoutCommand() {
|
|
6520
|
-
return new
|
|
7773
|
+
return new Command10("logout").description("Log out and remove stored credentials").action(() => {
|
|
6521
7774
|
const creds = getCredentials();
|
|
6522
7775
|
if (!creds) {
|
|
6523
7776
|
info("Not currently logged in.");
|
|
@@ -6535,7 +7788,7 @@ function createLogoutCommand() {
|
|
|
6535
7788
|
}
|
|
6536
7789
|
|
|
6537
7790
|
// src/commands/whoami.ts
|
|
6538
|
-
import { Command as
|
|
7791
|
+
import { Command as Command11 } from "commander";
|
|
6539
7792
|
var RESET2 = "\x1B[0m";
|
|
6540
7793
|
var BOLD2 = "\x1B[1m";
|
|
6541
7794
|
var DIM2 = "\x1B[2m";
|
|
@@ -6543,11 +7796,12 @@ var CYAN2 = "\x1B[36m";
|
|
|
6543
7796
|
var GREEN2 = "\x1B[32m";
|
|
6544
7797
|
var YELLOW2 = "\x1B[33m";
|
|
6545
7798
|
function createWhoamiCommand() {
|
|
6546
|
-
return new
|
|
6547
|
-
let current =
|
|
6548
|
-
|
|
6549
|
-
|
|
6550
|
-
|
|
7799
|
+
return new Command11("whoami").description("Show current login status, plan, and entitlements").option("--refresh", "Force refresh from server").action(async (opts) => {
|
|
7800
|
+
let current = getCredentials();
|
|
7801
|
+
if (!current) {
|
|
7802
|
+
info("Not logged in. Run: archal login");
|
|
7803
|
+
return;
|
|
7804
|
+
}
|
|
6551
7805
|
if (opts.refresh) {
|
|
6552
7806
|
current = await refreshAuthFromServer(current);
|
|
6553
7807
|
saveCredentials(current);
|
|
@@ -6611,7 +7865,7 @@ function planBadge(plan) {
|
|
|
6611
7865
|
}
|
|
6612
7866
|
|
|
6613
7867
|
// src/commands/upgrade.ts
|
|
6614
|
-
import { Command as
|
|
7868
|
+
import { Command as Command12 } from "commander";
|
|
6615
7869
|
import { exec as exec2 } from "child_process";
|
|
6616
7870
|
var BILLING_URL = "https://archal.ai/dashboard/billing";
|
|
6617
7871
|
function openBrowser2(url) {
|
|
@@ -6621,7 +7875,7 @@ function openBrowser2(url) {
|
|
|
6621
7875
|
});
|
|
6622
7876
|
}
|
|
6623
7877
|
function createUpgradeCommand() {
|
|
6624
|
-
return new
|
|
7878
|
+
return new Command12("upgrade").description("Open the Archal billing page to upgrade your plan").action(() => {
|
|
6625
7879
|
const creds = getCredentials();
|
|
6626
7880
|
if (creds?.plan === "enterprise") {
|
|
6627
7881
|
info("You are already on the enterprise plan.");
|
|
@@ -6640,7 +7894,7 @@ function createUpgradeCommand() {
|
|
|
6640
7894
|
}
|
|
6641
7895
|
|
|
6642
7896
|
// src/commands/help.ts
|
|
6643
|
-
import { Command as
|
|
7897
|
+
import { Command as Command13 } from "commander";
|
|
6644
7898
|
var RESET3 = "\x1B[0m";
|
|
6645
7899
|
var BOLD3 = "\x1B[1m";
|
|
6646
7900
|
var DIM3 = "\x1B[2m";
|
|
@@ -6668,15 +7922,7 @@ var COMMAND_GROUPS = [
|
|
|
6668
7922
|
]
|
|
6669
7923
|
},
|
|
6670
7924
|
{
|
|
6671
|
-
heading: "
|
|
6672
|
-
commands: [
|
|
6673
|
-
{ name: "twin start <name>", description: "Start a local twin process (debug/local only)" },
|
|
6674
|
-
{ name: "twin stop <name>", description: "Stop a running local twin process" },
|
|
6675
|
-
{ name: "twin status", description: "Show running local twin processes" }
|
|
6676
|
-
]
|
|
6677
|
-
},
|
|
6678
|
-
{
|
|
6679
|
-
heading: "Twin Catalog",
|
|
7925
|
+
heading: "Twins",
|
|
6680
7926
|
commands: [
|
|
6681
7927
|
{ name: "twins list", description: "List available twins and entitlement status" },
|
|
6682
7928
|
{ name: "twins select", description: "Choose which twins to use on your free plan" }
|
|
@@ -6700,7 +7946,7 @@ var COMMAND_GROUPS = [
|
|
|
6700
7946
|
];
|
|
6701
7947
|
function showHelp() {
|
|
6702
7948
|
process.stderr.write(`
|
|
6703
|
-
${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}
|
|
7949
|
+
${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v${CLI_VERSION}${RESET3}
|
|
6704
7950
|
`);
|
|
6705
7951
|
process.stderr.write(`${DIM3}The QA layer for the software factory era${RESET3}
|
|
6706
7952
|
|
|
@@ -6722,21 +7968,21 @@ ${CYAN3}${BOLD3}Archal CLI${RESET3} ${DIM3}v0.1.0${RESET3}
|
|
|
6722
7968
|
`);
|
|
6723
7969
|
}
|
|
6724
7970
|
function createHelpCommand() {
|
|
6725
|
-
return new
|
|
7971
|
+
return new Command13("help").description("Show all available commands").action(() => {
|
|
6726
7972
|
showHelp();
|
|
6727
7973
|
});
|
|
6728
7974
|
}
|
|
6729
7975
|
|
|
6730
7976
|
// src/commands/setup.ts
|
|
6731
|
-
import { Command as
|
|
6732
|
-
import { existsSync as
|
|
7977
|
+
import { Command as Command14 } from "commander";
|
|
7978
|
+
import { existsSync as existsSync17 } from "fs";
|
|
6733
7979
|
var RESET4 = "\x1B[0m";
|
|
6734
7980
|
var BOLD4 = "\x1B[1m";
|
|
6735
7981
|
var DIM4 = "\x1B[2m";
|
|
6736
7982
|
var CYAN4 = "\x1B[36m";
|
|
6737
7983
|
var GREEN3 = "\x1B[32m";
|
|
6738
7984
|
function createSetupCommand() {
|
|
6739
|
-
return new
|
|
7985
|
+
return new Command14("setup").description("Guided onboarding wizard for first-time setup").action(async () => {
|
|
6740
7986
|
process.stderr.write(`
|
|
6741
7987
|
${CYAN4}${BOLD4}Archal Setup${RESET4}
|
|
6742
7988
|
`);
|
|
@@ -6758,7 +8004,7 @@ ${CYAN4}${BOLD4}Archal Setup${RESET4}
|
|
|
6758
8004
|
${BOLD4}Step 2: Configuration${RESET4}
|
|
6759
8005
|
`);
|
|
6760
8006
|
const configPath = getConfigPath();
|
|
6761
|
-
if (
|
|
8007
|
+
if (existsSync17(configPath)) {
|
|
6762
8008
|
success(`Config file exists: ${configPath}`);
|
|
6763
8009
|
} else {
|
|
6764
8010
|
const create = await askConfirm("Create a default config file?");
|
|
@@ -6823,7 +8069,7 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
|
|
|
6823
8069
|
`);
|
|
6824
8070
|
process.stderr.write(` ${CYAN4}archal scenario create my-first-test${RESET4} ${DIM4}Create a scenario${RESET4}
|
|
6825
8071
|
`);
|
|
6826
|
-
process.stderr.write(` ${CYAN4}archal run scenario.md --
|
|
8072
|
+
process.stderr.write(` ${CYAN4}archal run scenario.md --engine-endpoint "..." --engine-model "..."${RESET4} ${DIM4}Run a scenario${RESET4}
|
|
6827
8073
|
`);
|
|
6828
8074
|
process.stderr.write(` ${CYAN4}archal help${RESET4} ${DIM4}See all commands${RESET4}
|
|
6829
8075
|
|
|
@@ -6832,8 +8078,8 @@ ${DIM4}${"\u2500".repeat(45)}${RESET4}
|
|
|
6832
8078
|
}
|
|
6833
8079
|
|
|
6834
8080
|
// src/index.ts
|
|
6835
|
-
var program = new
|
|
6836
|
-
program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version(
|
|
8081
|
+
var program = new Command15();
|
|
8082
|
+
program.name("archal").description("The QA layer for the software factory era \u2014 test AI agents against digital twins").version(CLI_VERSION).option("-q, --quiet", "Suppress non-error output").option("-v, --verbose", "Enable debug logging").hook("preAction", (_thisCommand) => {
|
|
6837
8083
|
const opts = program.opts();
|
|
6838
8084
|
if (opts.quiet) {
|
|
6839
8085
|
configureLogger({ quiet: true });
|
|
@@ -6848,7 +8094,6 @@ program.addCommand(createWhoamiCommand());
|
|
|
6848
8094
|
program.addCommand(createSetupCommand());
|
|
6849
8095
|
program.addCommand(createRunCommand());
|
|
6850
8096
|
program.addCommand(createInitCommand());
|
|
6851
|
-
program.addCommand(createTwinCommand());
|
|
6852
8097
|
program.addCommand(createTwinsCommand());
|
|
6853
8098
|
program.addCommand(createScenarioCommand());
|
|
6854
8099
|
program.addCommand(createTraceCommand());
|
|
@@ -6864,6 +8109,14 @@ program.action(() => {
|
|
|
6864
8109
|
process.stderr.write("\x1B[33mNot logged in.\x1B[0m Get started with: \x1B[36marchal login\x1B[0m\n\n");
|
|
6865
8110
|
}
|
|
6866
8111
|
});
|
|
8112
|
+
function handleShutdown(signal) {
|
|
8113
|
+
process.stderr.write(`
|
|
8114
|
+
Received ${signal}, shutting down...
|
|
8115
|
+
`);
|
|
8116
|
+
process.exit(128 + (signal === "SIGINT" ? 2 : 15));
|
|
8117
|
+
}
|
|
8118
|
+
process.on("SIGINT", () => handleShutdown("SIGINT"));
|
|
8119
|
+
process.on("SIGTERM", () => handleShutdown("SIGTERM"));
|
|
6867
8120
|
program.parseAsync(process.argv).catch((err) => {
|
|
6868
8121
|
const message = err instanceof Error ? err.message : String(err);
|
|
6869
8122
|
process.stderr.write(`Error: ${message}
|