archal 0.9.9 → 0.9.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +310 -144
- package/dist/seed/dynamic-generator.cjs +0 -1
- package/dist/vitest/{chunk-YV6BH6DO.js → chunk-KTMNDJFB.js} +0 -1
- package/dist/vitest/{chunk-RKYS44AS.js → chunk-L6HSMJ3F.js} +1 -1
- package/dist/vitest/index.cjs +0 -1
- package/dist/vitest/index.js +2 -2
- package/dist/vitest/runtime/hosted-session-reaper.cjs +0 -1
- package/dist/vitest/runtime/hosted-session-reaper.js +1 -1
- package/dist/vitest/runtime/setup-files.js +2 -2
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -31869,7 +31869,6 @@ var init_scenario_schemas = __esm({
|
|
|
31869
31869
|
timeout: external_exports3.number().default(120),
|
|
31870
31870
|
runs: external_exports3.number().default(5),
|
|
31871
31871
|
evaluatorModel: external_exports3.string().optional(),
|
|
31872
|
-
difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
|
|
31873
31872
|
tags: external_exports3.array(external_exports3.string()).default([])
|
|
31874
31873
|
});
|
|
31875
31874
|
}
|
|
@@ -58333,8 +58332,7 @@ function loadArchalFile(explicitPath, ...searchFrom) {
|
|
|
58333
58332
|
);
|
|
58334
58333
|
}
|
|
58335
58334
|
if (typeof c["agentModel"] === "string") r.agentModel = c["agentModel"].trim() || void 0;
|
|
58336
|
-
if (typeof c["
|
|
58337
|
-
if (typeof c["model"] === "string") r.model = c["model"].trim() || void 0;
|
|
58335
|
+
if (typeof c["evaluatorModel"] === "string") r.evaluatorModel = c["evaluatorModel"].trim() || void 0;
|
|
58338
58336
|
if (typeof c["runs"] === "number" && Number.isInteger(c["runs"]) && c["runs"] > 0) r.runs = c["runs"];
|
|
58339
58337
|
if (typeof c["timeout"] === "number" && c["timeout"] > 0) r.timeout = c["timeout"];
|
|
58340
58338
|
return { config: r, configDir: (0, import_node_path28.dirname)(filePath) };
|
|
@@ -60424,21 +60422,12 @@ function parseConfigSection(configText) {
|
|
|
60424
60422
|
result.evaluatorModel = value;
|
|
60425
60423
|
break;
|
|
60426
60424
|
}
|
|
60427
|
-
case "difficulty": {
|
|
60428
|
-
const normalized = value.toLowerCase();
|
|
60429
|
-
if (normalized === "easy" || normalized === "medium" || normalized === "hard") {
|
|
60430
|
-
result.difficulty = normalized;
|
|
60431
|
-
} else {
|
|
60432
|
-
warn(`Invalid difficulty "${value}" in scenario config \u2014 ignoring. Must be "easy", "medium", or "hard".`);
|
|
60433
|
-
}
|
|
60434
|
-
break;
|
|
60435
|
-
}
|
|
60436
60425
|
case "tags": {
|
|
60437
60426
|
result.tags = value.split(",").map((tag) => tag.trim()).filter(Boolean);
|
|
60438
60427
|
break;
|
|
60439
60428
|
}
|
|
60440
60429
|
default: {
|
|
60441
|
-
const knownKeys = ["twins", "timeout", "seed", "runs", "evaluator", "evaluator-model", "evaluatormodel", "model", "
|
|
60430
|
+
const knownKeys = ["twins", "timeout", "seed", "runs", "evaluator", "evaluator-model", "evaluatormodel", "model", "tags"];
|
|
60442
60431
|
const close = knownKeys.find((k) => k.startsWith(key));
|
|
60443
60432
|
const hint = close ? ` Did you mean "${close}"?` : ` Known keys: ${knownKeys.join(", ")}.`;
|
|
60444
60433
|
warn(`Unknown config key "${key}" in scenario \u2014 ignored.${hint}`);
|
|
@@ -60502,8 +60491,7 @@ function parseScenarioMarkdown(markdown, sourcePath) {
|
|
|
60502
60491
|
timeout: parsedConfig.timeout ?? 180,
|
|
60503
60492
|
runs: parsedConfig.runs ?? 1,
|
|
60504
60493
|
evaluatorModel: parsedConfig.evaluatorModel,
|
|
60505
|
-
tags: parsedConfig.tags ?? []
|
|
60506
|
-
difficulty: parsedConfig.difficulty
|
|
60494
|
+
tags: parsedConfig.tags ?? []
|
|
60507
60495
|
};
|
|
60508
60496
|
debug("Parsed scenario", {
|
|
60509
60497
|
title: sections.title,
|
|
@@ -60813,7 +60801,7 @@ function traceEntriesForTool(trace, toolName) {
|
|
|
60813
60801
|
// src/runner/reporter-files.ts
|
|
60814
60802
|
function writeLastRunLog(report) {
|
|
60815
60803
|
try {
|
|
60816
|
-
const dir = (0, import_node_path8.join)(process.cwd(), ".archal");
|
|
60804
|
+
const dir = (0, import_node_path8.join)(process.cwd(), ".archal", "cache");
|
|
60817
60805
|
if (!(0, import_node_fs10.existsSync)(dir)) {
|
|
60818
60806
|
(0, import_node_fs10.mkdirSync)(dir, { recursive: true });
|
|
60819
60807
|
}
|
|
@@ -60839,7 +60827,7 @@ function writeLastRunLog(report) {
|
|
|
60839
60827
|
}
|
|
60840
60828
|
function saveRunTrace(report) {
|
|
60841
60829
|
try {
|
|
60842
|
-
const dir = (0, import_node_path8.join)(process.cwd(), ".archal", "runs");
|
|
60830
|
+
const dir = (0, import_node_path8.join)(process.cwd(), ".archal", "cache", "runs");
|
|
60843
60831
|
(0, import_node_fs10.mkdirSync)(dir, { recursive: true });
|
|
60844
60832
|
const ts = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
|
60845
60833
|
const slug2 = (report.scenarioTitle ?? "untitled").toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 60);
|
|
@@ -63404,6 +63392,55 @@ function buildCriterionMaps(scenario) {
|
|
|
63404
63392
|
}
|
|
63405
63393
|
return { criterionDescriptions, criterionTypes };
|
|
63406
63394
|
}
|
|
63395
|
+
function summarizeExistingState(snapshot) {
|
|
63396
|
+
if (!snapshot) return "(existing state)";
|
|
63397
|
+
const nonEmpty = Object.entries(snapshot).filter(([, entities]) => Array.isArray(entities) && entities.length > 0).map(([collection, entities]) => ({ collection, count: entities.length })).sort((a, b) => b.count - a.count).slice(0, 2);
|
|
63398
|
+
if (nonEmpty.length === 0) return "(existing state)";
|
|
63399
|
+
return nonEmpty.map(({ count, collection }) => `${count} ${collection}`).join(", ");
|
|
63400
|
+
}
|
|
63401
|
+
function emitSkipReapplyBanner(declaredSeed, stateSummary) {
|
|
63402
|
+
const body = `Seed not re-applied: scenario declares seed "${declaredSeed}" but reusing existing
|
|
63403
|
+
session state from an earlier \`archal twin start\` (has: ${stateSummary}).
|
|
63404
|
+
Pass --fresh-seed to reset and re-apply, or run \`archal twin stop\` and retry.`;
|
|
63405
|
+
warn(body);
|
|
63406
|
+
}
|
|
63407
|
+
var RESET_STATE_TIMEOUT_MS = 3e4;
|
|
63408
|
+
var RESET_STATE_RETRIES = 6;
|
|
63409
|
+
var RESET_STATE_BACKOFF_MS = [1500, 2500, 3500, 5e3, 6e3, 7e3];
|
|
63410
|
+
async function resetLiveTwinState(twinApiUrl, bearerToken, adminAuth) {
|
|
63411
|
+
const baseUrl = twinApiUrl.replace(/\/(mcp|api)\/?$/, "");
|
|
63412
|
+
const url2 = `${baseUrl}/state`;
|
|
63413
|
+
const headers = buildTwinHttpHeaders(bearerToken, adminAuth);
|
|
63414
|
+
let response;
|
|
63415
|
+
try {
|
|
63416
|
+
response = await fetchWithRetry2(
|
|
63417
|
+
url2,
|
|
63418
|
+
{ method: "DELETE", headers },
|
|
63419
|
+
{
|
|
63420
|
+
maxRetries: RESET_STATE_RETRIES,
|
|
63421
|
+
timeoutMs: RESET_STATE_TIMEOUT_MS,
|
|
63422
|
+
backoffMs: RESET_STATE_BACKOFF_MS,
|
|
63423
|
+
label: "twin-state-reset"
|
|
63424
|
+
}
|
|
63425
|
+
);
|
|
63426
|
+
} catch (error49) {
|
|
63427
|
+
const detail2 = error49 instanceof Error ? error49.message : String(error49);
|
|
63428
|
+
return { ok: false, status: 0, detail: detail2 };
|
|
63429
|
+
}
|
|
63430
|
+
if (response.ok) {
|
|
63431
|
+
return { ok: true };
|
|
63432
|
+
}
|
|
63433
|
+
let detail;
|
|
63434
|
+
try {
|
|
63435
|
+
const raw = await response.text();
|
|
63436
|
+
const parsed = raw.trim() ? JSON.parse(raw) : {};
|
|
63437
|
+
const msg = parsed["error"] ?? parsed["message"];
|
|
63438
|
+
detail = typeof msg === "string" ? msg : raw.slice(0, 200) || void 0;
|
|
63439
|
+
} catch {
|
|
63440
|
+
detail = void 0;
|
|
63441
|
+
}
|
|
63442
|
+
return { ok: false, status: response.status, detail };
|
|
63443
|
+
}
|
|
63407
63444
|
async function hydrateStaticSeedSelections(seedSelections, seedDescription, options) {
|
|
63408
63445
|
const autoSelections = generateSeedSelections(
|
|
63409
63446
|
seedSelections.map((selection) => selection.twinName),
|
|
@@ -63463,6 +63500,43 @@ async function hydrateStaticSeedSelections(seedSelections, seedDescription, opti
|
|
|
63463
63500
|
continue;
|
|
63464
63501
|
}
|
|
63465
63502
|
if (options.isReusedEnvSession && twinHasExistingState(selection.twinName)) {
|
|
63503
|
+
if (options.freshSeed) {
|
|
63504
|
+
const twinUrl2 = options.cloudTwinUrls[selection.twinName];
|
|
63505
|
+
if (!twinUrl2) {
|
|
63506
|
+
throw new Error(
|
|
63507
|
+
`--fresh-seed requested but hosted session is missing the API base URL for twin "${selection.twinName}". Re-provision the hosted session before retrying.`
|
|
63508
|
+
);
|
|
63509
|
+
}
|
|
63510
|
+
info(
|
|
63511
|
+
`--fresh-seed: resetting twin "${selection.twinName}" and re-applying seed "${selection.seedName}".`
|
|
63512
|
+
);
|
|
63513
|
+
const resetResult = await resetLiveTwinState(
|
|
63514
|
+
twinUrl2,
|
|
63515
|
+
options.apiBearerToken,
|
|
63516
|
+
adminAuth
|
|
63517
|
+
);
|
|
63518
|
+
if (!resetResult.ok) {
|
|
63519
|
+
throw new Error(
|
|
63520
|
+
`--fresh-seed: failed to reset state on twin "${selection.twinName}" (HTTP ${resetResult.status}${resetResult.detail ? `: ${resetResult.detail}` : ""}). Stop the twin session with \`archal twin stop\` and retry.`
|
|
63521
|
+
);
|
|
63522
|
+
}
|
|
63523
|
+
const loadResult2 = await loadNamedSeedIntoLiveTwin(
|
|
63524
|
+
twinUrl2,
|
|
63525
|
+
selection.seedName,
|
|
63526
|
+
options.apiBearerToken,
|
|
63527
|
+
adminAuth
|
|
63528
|
+
);
|
|
63529
|
+
if (!loadResult2.ok) {
|
|
63530
|
+
throw new Error(
|
|
63531
|
+
`--fresh-seed: could not load seed "${selection.seedName}" into twin "${selection.twinName}" (HTTP ${loadResult2.status}${loadResult2.detail ? `: ${loadResult2.detail}` : ""}). Re-provision the hosted session with the requested seed, or point --seed at a seed this twin exposes.`
|
|
63532
|
+
);
|
|
63533
|
+
}
|
|
63534
|
+
continue;
|
|
63535
|
+
}
|
|
63536
|
+
const stateSummary = summarizeExistingState(
|
|
63537
|
+
probedSnapshot ? normalizeSeedState(probedSnapshot[selection.twinName]) : null
|
|
63538
|
+
);
|
|
63539
|
+
emitSkipReapplyBanner(selection.seedName, stateSummary);
|
|
63466
63540
|
info(
|
|
63467
63541
|
`Using existing state on reused session; scenario seed '${selection.seedName}' not re-applied.`
|
|
63468
63542
|
);
|
|
@@ -64311,40 +64385,27 @@ function resolveMarkdownPromptOrder(markdownFiles) {
|
|
|
64311
64385
|
// src/runner/harness-paths.ts
|
|
64312
64386
|
var import_node_fs24 = require("fs");
|
|
64313
64387
|
var import_node_path22 = require("path");
|
|
64314
|
-
var MANIFEST_FILE = "
|
|
64315
|
-
var
|
|
64316
|
-
"harness.ts",
|
|
64317
|
-
"harness.js",
|
|
64318
|
-
"harness.mjs",
|
|
64319
|
-
"harness.cjs",
|
|
64320
|
-
"archal-harness.ts",
|
|
64321
|
-
"archal-harness.js",
|
|
64322
|
-
"archal-harness.mjs",
|
|
64323
|
-
"archal-harness.cjs"
|
|
64324
|
-
];
|
|
64325
|
-
var PROJECT_ARCHAL_HARNESS_FILENAMES = /* @__PURE__ */ new Set([
|
|
64388
|
+
var MANIFEST_FILE = "harness.json";
|
|
64389
|
+
var HARNESS_ENTRY_CANDIDATES = [
|
|
64326
64390
|
"harness.ts",
|
|
64327
64391
|
"harness.js",
|
|
64328
64392
|
"harness.mjs",
|
|
64329
64393
|
"harness.cjs"
|
|
64330
|
-
]);
|
|
64331
|
-
var PROJECT_ARCHAL_HARNESS_ENTRY_CANDIDATES = [
|
|
64332
|
-
"harness.ts",
|
|
64333
|
-
"harness.js",
|
|
64334
|
-
"harness.mjs",
|
|
64335
|
-
"harness.cjs",
|
|
64336
|
-
"archal-harness.ts",
|
|
64337
|
-
"archal-harness.js",
|
|
64338
|
-
"archal-harness.mjs",
|
|
64339
|
-
"archal-harness.cjs"
|
|
64340
64394
|
];
|
|
64395
|
+
var PROJECT_ARCHAL_HARNESS_FILENAMES = new Set(HARNESS_ENTRY_CANDIDATES);
|
|
64341
64396
|
var HARNESS_DISCOVERY_BOUNDARY_FILES = [
|
|
64342
64397
|
".git",
|
|
64343
|
-
".archal
|
|
64398
|
+
".archal"
|
|
64344
64399
|
];
|
|
64345
|
-
|
|
64346
|
-
|
|
64347
|
-
|
|
64400
|
+
var PROJECT_ROOT_MARKERS = [
|
|
64401
|
+
"package.json",
|
|
64402
|
+
".archal.json",
|
|
64403
|
+
".git"
|
|
64404
|
+
];
|
|
64405
|
+
function isProperAncestor(candidate, target) {
|
|
64406
|
+
if (candidate === target) return false;
|
|
64407
|
+
const withSep = candidate.endsWith(import_node_path22.sep) ? candidate : candidate + import_node_path22.sep;
|
|
64408
|
+
return target.startsWith(withSep);
|
|
64348
64409
|
}
|
|
64349
64410
|
function isProjectArchalHarnessFile(entryPath) {
|
|
64350
64411
|
return (0, import_node_path22.basename)((0, import_node_path22.dirname)(entryPath)) === ".archal" && PROJECT_ARCHAL_HARNESS_FILENAMES.has((0, import_node_path22.basename)(entryPath).toLowerCase());
|
|
@@ -64356,6 +64417,28 @@ function resolveHarnessExecutionRootFromPath(entryPath) {
|
|
|
64356
64417
|
}
|
|
64357
64418
|
return entryDir;
|
|
64358
64419
|
}
|
|
64420
|
+
function resolveHarnessProjectRoot(startPath, fallback, stopAt) {
|
|
64421
|
+
let current;
|
|
64422
|
+
try {
|
|
64423
|
+
const stats = (0, import_node_fs24.statSync)(startPath);
|
|
64424
|
+
current = stats.isDirectory() ? startPath : (0, import_node_path22.dirname)(startPath);
|
|
64425
|
+
} catch {
|
|
64426
|
+
current = (0, import_node_path22.dirname)(startPath);
|
|
64427
|
+
}
|
|
64428
|
+
const effectiveFallback = fallback ?? current;
|
|
64429
|
+
while (true) {
|
|
64430
|
+
for (const marker of PROJECT_ROOT_MARKERS) {
|
|
64431
|
+
if ((0, import_node_fs24.existsSync)((0, import_node_path22.resolve)(current, marker))) {
|
|
64432
|
+
return current;
|
|
64433
|
+
}
|
|
64434
|
+
}
|
|
64435
|
+
const parent = (0, import_node_path22.dirname)(current);
|
|
64436
|
+
if (parent === current || stopAt !== void 0 && isProperAncestor(current, stopAt)) {
|
|
64437
|
+
return effectiveFallback;
|
|
64438
|
+
}
|
|
64439
|
+
current = parent;
|
|
64440
|
+
}
|
|
64441
|
+
}
|
|
64359
64442
|
function normalizeHarnessDiscoveryStart(startPath) {
|
|
64360
64443
|
const trimmed = startPath.trim();
|
|
64361
64444
|
if (!trimmed) {
|
|
@@ -64400,20 +64483,11 @@ function discoverRepoLocalHarness(...startPaths) {
|
|
|
64400
64483
|
let current = startDir;
|
|
64401
64484
|
while (!visited.has(current)) {
|
|
64402
64485
|
visited.add(current);
|
|
64403
|
-
for (const candidate of
|
|
64486
|
+
for (const candidate of HARNESS_ENTRY_CANDIDATES) {
|
|
64404
64487
|
if ((0, import_node_fs24.existsSync)((0, import_node_path22.resolve)(current, candidate))) {
|
|
64405
|
-
return {
|
|
64406
|
-
harnessDir: current,
|
|
64407
|
-
reason: candidate.startsWith("archal-") ? "archal-harness" : "harness"
|
|
64408
|
-
};
|
|
64488
|
+
return { harnessDir: current };
|
|
64409
64489
|
}
|
|
64410
64490
|
}
|
|
64411
|
-
if (trimToUndefined(readPackageJson3(current)?.archal?.harness)) {
|
|
64412
|
-
return {
|
|
64413
|
-
harnessDir: current,
|
|
64414
|
-
reason: "package.json#archal.harness"
|
|
64415
|
-
};
|
|
64416
|
-
}
|
|
64417
64491
|
const reachedBoundary = HARNESS_DISCOVERY_BOUNDARY_FILES.some((file2) => (0, import_node_fs24.existsSync)((0, import_node_path22.resolve)(current, file2)));
|
|
64418
64492
|
const parent = (0, import_node_path22.dirname)(current);
|
|
64419
64493
|
if (reachedBoundary || parent === current) {
|
|
@@ -64439,29 +64513,11 @@ function resolveHarnessLocation(harnessInput, visited = /* @__PURE__ */ new Set(
|
|
|
64439
64513
|
return {
|
|
64440
64514
|
executionRoot,
|
|
64441
64515
|
manifestPath: (0, import_node_path22.resolve)(executionRoot, MANIFEST_FILE),
|
|
64442
|
-
entryPath: resolvedInput
|
|
64443
|
-
|
|
64444
|
-
}
|
|
64445
|
-
const packageJson = readPackageJson3(resolvedInput);
|
|
64446
|
-
const configuredHarnessPath = trimToUndefined(packageJson?.archal?.harness);
|
|
64447
|
-
if (configuredHarnessPath) {
|
|
64448
|
-
const configuredAbsolutePath = (0, import_node_path22.resolve)(resolvedInput, configuredHarnessPath);
|
|
64449
|
-
if (!(0, import_node_fs24.existsSync)(configuredAbsolutePath)) {
|
|
64450
|
-
throw new Error(
|
|
64451
|
-
`package.json archal.harness points to a missing path: ${configuredAbsolutePath}`
|
|
64452
|
-
);
|
|
64453
|
-
}
|
|
64454
|
-
const configuredStats = (0, import_node_fs24.statSync)(configuredAbsolutePath);
|
|
64455
|
-
if (configuredStats.isDirectory()) {
|
|
64456
|
-
return resolveHarnessLocation(configuredAbsolutePath, visited);
|
|
64457
|
-
}
|
|
64458
|
-
return {
|
|
64459
|
-
executionRoot: resolvedInput,
|
|
64460
|
-
manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE),
|
|
64461
|
-
entryPath: configuredAbsolutePath
|
|
64516
|
+
entryPath: resolvedInput,
|
|
64517
|
+
projectRoot: resolveHarnessProjectRoot(resolvedInput, executionRoot)
|
|
64462
64518
|
};
|
|
64463
64519
|
}
|
|
64464
|
-
for (const candidate of
|
|
64520
|
+
for (const candidate of HARNESS_ENTRY_CANDIDATES) {
|
|
64465
64521
|
const candidatePath = (0, import_node_path22.resolve)(resolvedInput, candidate);
|
|
64466
64522
|
if (!(0, import_node_fs24.existsSync)(candidatePath)) {
|
|
64467
64523
|
continue;
|
|
@@ -64469,12 +64525,14 @@ function resolveHarnessLocation(harnessInput, visited = /* @__PURE__ */ new Set(
|
|
|
64469
64525
|
return {
|
|
64470
64526
|
executionRoot: resolvedInput,
|
|
64471
64527
|
manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE),
|
|
64472
|
-
entryPath: candidatePath
|
|
64528
|
+
entryPath: candidatePath,
|
|
64529
|
+
projectRoot: resolvedInput
|
|
64473
64530
|
};
|
|
64474
64531
|
}
|
|
64475
64532
|
return {
|
|
64476
64533
|
executionRoot: resolvedInput,
|
|
64477
|
-
manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE)
|
|
64534
|
+
manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE),
|
|
64535
|
+
projectRoot: resolvedInput
|
|
64478
64536
|
};
|
|
64479
64537
|
}
|
|
64480
64538
|
|
|
@@ -64496,14 +64554,14 @@ var DIRECT_FILE_RUNNERS = {
|
|
|
64496
64554
|
".mjs": { command: "node", args: [] },
|
|
64497
64555
|
".cjs": { command: "node", args: [] }
|
|
64498
64556
|
};
|
|
64499
|
-
function inferLocalCommandFromEntryPath(
|
|
64557
|
+
function inferLocalCommandFromEntryPath(spawnCwd, entryPath) {
|
|
64500
64558
|
const runner = DIRECT_FILE_RUNNERS[(0, import_node_path23.extname)(entryPath).toLowerCase()];
|
|
64501
64559
|
if (!runner) {
|
|
64502
64560
|
throw new Error(
|
|
64503
64561
|
`Unsupported harness entrypoint extension for ${entryPath}. Use .ts, .js, .mjs, or .cjs.`
|
|
64504
64562
|
);
|
|
64505
64563
|
}
|
|
64506
|
-
const relativeEntryPath = (0, import_node_path23.relative)(
|
|
64564
|
+
const relativeEntryPath = (0, import_node_path23.relative)(spawnCwd, entryPath) || entryPath;
|
|
64507
64565
|
return {
|
|
64508
64566
|
command: runner.command,
|
|
64509
64567
|
args: [...runner.args, relativeEntryPath]
|
|
@@ -64520,7 +64578,7 @@ function parseHarnessManifest(manifestPath) {
|
|
|
64520
64578
|
);
|
|
64521
64579
|
}
|
|
64522
64580
|
}
|
|
64523
|
-
function
|
|
64581
|
+
function trimToUndefined(value) {
|
|
64524
64582
|
const trimmed = value?.trim();
|
|
64525
64583
|
return trimmed ? trimmed : void 0;
|
|
64526
64584
|
}
|
|
@@ -64530,10 +64588,11 @@ function buildResolvedLocalHarness(location, explicitModel, manifest) {
|
|
|
64530
64588
|
command: manifest.local.command,
|
|
64531
64589
|
args: manifest.local.args,
|
|
64532
64590
|
env: manifest.local.env
|
|
64533
|
-
} : location.entryPath ? inferLocalCommandFromEntryPath(location.
|
|
64534
|
-
const model = explicitModel ??
|
|
64591
|
+
} : location.entryPath ? inferLocalCommandFromEntryPath(location.projectRoot, location.entryPath) : void 0;
|
|
64592
|
+
const model = explicitModel ?? trimToUndefined(manifest?.defaultModel);
|
|
64535
64593
|
return {
|
|
64536
64594
|
harnessDir: location.executionRoot,
|
|
64595
|
+
projectRoot: location.projectRoot,
|
|
64537
64596
|
manifestPath: location.manifestPath,
|
|
64538
64597
|
manifest,
|
|
64539
64598
|
model,
|
|
@@ -64543,7 +64602,7 @@ function buildResolvedLocalHarness(location, explicitModel, manifest) {
|
|
|
64543
64602
|
}
|
|
64544
64603
|
function resolveLocalHarness(harnessDirInput, explicitModel) {
|
|
64545
64604
|
const location = resolveHarnessLocation(harnessDirInput);
|
|
64546
|
-
const explicit =
|
|
64605
|
+
const explicit = trimToUndefined(explicitModel);
|
|
64547
64606
|
if (!(0, import_node_fs25.existsSync)(location.manifestPath)) {
|
|
64548
64607
|
if (location.entryPath) {
|
|
64549
64608
|
return buildResolvedLocalHarness(location, explicit);
|
|
@@ -64677,7 +64736,7 @@ function resolveEngineConfiguration(scenario, options, timeoutSeconds, projectCo
|
|
|
64677
64736
|
model: rawEngineModel?.trim() || void 0,
|
|
64678
64737
|
command: "docker",
|
|
64679
64738
|
args: [],
|
|
64680
|
-
cwd: harnessDir,
|
|
64739
|
+
cwd: resolveHarnessProjectRoot(harnessDir),
|
|
64681
64740
|
promptContext: void 0
|
|
64682
64741
|
};
|
|
64683
64742
|
} else {
|
|
@@ -64705,8 +64764,12 @@ function resolveEngineConfiguration(scenario, options, timeoutSeconds, projectCo
|
|
|
64705
64764
|
model: resolvedHarness.model,
|
|
64706
64765
|
command: commandConfig.command,
|
|
64707
64766
|
args: commandConfig.args,
|
|
64708
|
-
env:
|
|
64709
|
-
|
|
64767
|
+
env: {
|
|
64768
|
+
...commandConfig.env,
|
|
64769
|
+
ARCHAL_PROJECT_ROOT: resolvedHarness.projectRoot,
|
|
64770
|
+
ARCHAL_HARNESS_ENTRY_DIR: resolvedHarness.harnessDir
|
|
64771
|
+
},
|
|
64772
|
+
cwd: resolvedHarness.projectRoot,
|
|
64710
64773
|
promptContext: resolvedHarness.promptContext
|
|
64711
64774
|
};
|
|
64712
64775
|
}
|
|
@@ -65107,7 +65170,7 @@ function parseBatchJudgeResponse(text, criteriaIds) {
|
|
|
65107
65170
|
if (results.size > 0) {
|
|
65108
65171
|
const missing = criteriaIds.filter((id) => !results.has(id));
|
|
65109
65172
|
if (missing.length > 0) {
|
|
65110
|
-
warn(`Batch parse: ${missing.length} criteria missing from response (will
|
|
65173
|
+
warn(`Batch parse: ${missing.length} criteria missing from response (sequential fallback will be attempted, #2551): ${missing.join(", ")}`);
|
|
65111
65174
|
}
|
|
65112
65175
|
return results;
|
|
65113
65176
|
}
|
|
@@ -66219,6 +66282,28 @@ function resolveJudgeProvider(config2) {
|
|
|
66219
66282
|
}
|
|
66220
66283
|
return { provider, apiKey, canUseManagedRouting };
|
|
66221
66284
|
}
|
|
66285
|
+
function accumulateTokenUsageSummaries(primary, secondary) {
|
|
66286
|
+
if (!primary && !secondary) return null;
|
|
66287
|
+
const a = primary ?? { inputTokens: 0, outputTokens: 0 };
|
|
66288
|
+
const b = secondary ?? { inputTokens: 0, outputTokens: 0 };
|
|
66289
|
+
const callCount = typeof a.llmCallCount === "number" || typeof b.llmCallCount === "number" ? (a.llmCallCount ?? 0) + (b.llmCallCount ?? 0) : void 0;
|
|
66290
|
+
const combined = {
|
|
66291
|
+
inputTokens: a.inputTokens + b.inputTokens,
|
|
66292
|
+
outputTokens: a.outputTokens + b.outputTokens,
|
|
66293
|
+
...callCount !== void 0 ? { llmCallCount: callCount } : {}
|
|
66294
|
+
};
|
|
66295
|
+
return combined;
|
|
66296
|
+
}
|
|
66297
|
+
async function evaluateMissingCriteriaSequentially(missingCriteria, context, config2) {
|
|
66298
|
+
const results = /* @__PURE__ */ new Map();
|
|
66299
|
+
let tokenUsage = null;
|
|
66300
|
+
for (const criterion of missingCriteria) {
|
|
66301
|
+
const singleResult = await evaluateWithLlm(criterion, context, config2);
|
|
66302
|
+
results.set(criterion.id, singleResult.evaluation);
|
|
66303
|
+
tokenUsage = accumulateTokenUsageSummaries(tokenUsage, singleResult.tokenUsage);
|
|
66304
|
+
}
|
|
66305
|
+
return { results, tokenUsage };
|
|
66306
|
+
}
|
|
66222
66307
|
async function evaluateBatchWithLlm(criteria, context, config2) {
|
|
66223
66308
|
const resolved = resolveJudgeProvider(config2);
|
|
66224
66309
|
if (!resolved) {
|
|
@@ -66242,7 +66327,8 @@ async function evaluateBatchWithLlm(criteria, context, config2) {
|
|
|
66242
66327
|
provider,
|
|
66243
66328
|
traceLength: String(context.trace.length)
|
|
66244
66329
|
});
|
|
66245
|
-
const
|
|
66330
|
+
const BATCH_MAX_TOKENS_CEILING = 32768;
|
|
66331
|
+
const maxTokens = Math.min(256 + criteria.length * 512, BATCH_MAX_TOKENS_CEILING);
|
|
66246
66332
|
try {
|
|
66247
66333
|
const response = await callLlmWithUsage({
|
|
66248
66334
|
provider,
|
|
@@ -66269,6 +66355,17 @@ async function evaluateBatchWithLlm(criteria, context, config2) {
|
|
|
66269
66355
|
const text = response.text;
|
|
66270
66356
|
const criteriaIds = criteria.map((c) => c.id);
|
|
66271
66357
|
const parsed = parseBatchJudgeResponse(text, criteriaIds);
|
|
66358
|
+
const missingCriteria = criteria.filter((c) => !parsed.has(c.id));
|
|
66359
|
+
let fallbackResults = /* @__PURE__ */ new Map();
|
|
66360
|
+
let fallbackTokenUsage = null;
|
|
66361
|
+
if (missingCriteria.length > 0) {
|
|
66362
|
+
warn(
|
|
66363
|
+
`Batch evaluation missing ${String(missingCriteria.length)}/${String(criteria.length)} criteria \u2014 falling back to sequential single-criterion evaluation (#2551)`
|
|
66364
|
+
);
|
|
66365
|
+
const fallback = await evaluateMissingCriteriaSequentially(missingCriteria, context, config2);
|
|
66366
|
+
fallbackResults = fallback.results;
|
|
66367
|
+
fallbackTokenUsage = fallback.tokenUsage;
|
|
66368
|
+
}
|
|
66272
66369
|
return {
|
|
66273
66370
|
evaluations: criteria.map((c) => {
|
|
66274
66371
|
const result = parsed.get(c.id);
|
|
@@ -66285,15 +66382,24 @@ async function evaluateBatchWithLlm(criteria, context, config2) {
|
|
|
66285
66382
|
explanation: result.explanation
|
|
66286
66383
|
};
|
|
66287
66384
|
}
|
|
66288
|
-
|
|
66385
|
+
const fallback = fallbackResults.get(c.id);
|
|
66386
|
+
if (fallback) {
|
|
66387
|
+
debug("LLM judge batch fallback (sequential)", {
|
|
66388
|
+
criterion: c.id,
|
|
66389
|
+
status: fallback.status,
|
|
66390
|
+
confidence: fallback.confidence.toFixed(2)
|
|
66391
|
+
});
|
|
66392
|
+
return fallback;
|
|
66393
|
+
}
|
|
66394
|
+
warn(`Batch + sequential fallback both missing result for criterion "${c.id}" \u2014 defaulting to fail`);
|
|
66289
66395
|
return {
|
|
66290
66396
|
criterionId: c.id,
|
|
66291
66397
|
status: "fail",
|
|
66292
66398
|
confidence: 0.3,
|
|
66293
|
-
explanation: "Criterion not found in batch evaluation response"
|
|
66399
|
+
explanation: "Criterion not found in batch or sequential evaluation response"
|
|
66294
66400
|
};
|
|
66295
66401
|
}),
|
|
66296
|
-
tokenUsage: response.usage
|
|
66402
|
+
tokenUsage: accumulateTokenUsageSummaries(response.usage, fallbackTokenUsage)
|
|
66297
66403
|
};
|
|
66298
66404
|
} catch (err) {
|
|
66299
66405
|
const message = errorMessage(err);
|
|
@@ -68749,7 +68855,7 @@ async function assertLocalHarnessBootable(harnessInput, explicitModel, extraEnv)
|
|
|
68749
68855
|
command: harness.localCommand.command,
|
|
68750
68856
|
args: harness.localCommand.args,
|
|
68751
68857
|
timeoutMs,
|
|
68752
|
-
cwd: harness.
|
|
68858
|
+
cwd: harness.projectRoot,
|
|
68753
68859
|
env: {
|
|
68754
68860
|
...harness.localCommand.env,
|
|
68755
68861
|
...extraEnv,
|
|
@@ -68762,19 +68868,21 @@ async function assertLocalHarnessBootable(harnessInput, explicitModel, extraEnv)
|
|
|
68762
68868
|
ARCHAL_REST_CONFIG: restConfigPath,
|
|
68763
68869
|
ARCHAL_METRICS_FILE: metricsFilePath,
|
|
68764
68870
|
ARCHAL_AGENT_TRACE_FILE: traceFilePath,
|
|
68765
|
-
ARCHAL_PREFLIGHT: "1"
|
|
68871
|
+
ARCHAL_PREFLIGHT: "1",
|
|
68872
|
+
ARCHAL_PROJECT_ROOT: harness.projectRoot,
|
|
68873
|
+
ARCHAL_HARNESS_ENTRY_DIR: harness.harnessDir
|
|
68766
68874
|
}
|
|
68767
68875
|
});
|
|
68768
68876
|
if (result.timedOut) {
|
|
68769
68877
|
throw new Error(formatLocalHarnessFailure(
|
|
68770
68878
|
`Harness boot timed out after ${Math.round(timeoutMs / 1e3)}s.`,
|
|
68771
|
-
{ cwd: harness.
|
|
68879
|
+
{ cwd: harness.projectRoot, phase: "preflight" }
|
|
68772
68880
|
));
|
|
68773
68881
|
}
|
|
68774
68882
|
if (result.exitCode !== 0) {
|
|
68775
68883
|
const detail = trimSnippet(result.stderr) || trimSnippet(result.stdout) || `Harness exited with code ${result.exitCode}.`;
|
|
68776
68884
|
throw new Error(formatLocalHarnessFailure(detail, {
|
|
68777
|
-
cwd: harness.
|
|
68885
|
+
cwd: harness.projectRoot,
|
|
68778
68886
|
phase: "preflight"
|
|
68779
68887
|
}));
|
|
68780
68888
|
}
|
|
@@ -68783,7 +68891,7 @@ async function assertLocalHarnessBootable(harnessInput, explicitModel, extraEnv)
|
|
|
68783
68891
|
throw err;
|
|
68784
68892
|
}
|
|
68785
68893
|
throw new Error(formatLocalHarnessFailure(errorMessage(err), {
|
|
68786
|
-
cwd: harness.
|
|
68894
|
+
cwd: harness.projectRoot,
|
|
68787
68895
|
phase: "preflight"
|
|
68788
68896
|
}));
|
|
68789
68897
|
} finally {
|
|
@@ -71576,11 +71684,11 @@ function resolveEngineMode(opts) {
|
|
|
71576
71684
|
}
|
|
71577
71685
|
if (opts.task && opts.task.trim()) {
|
|
71578
71686
|
throw new CliUsageError(
|
|
71579
|
-
'No agent configured.\n --task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n
|
|
71687
|
+
'No agent configured.\n --task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
|
|
71580
71688
|
);
|
|
71581
71689
|
}
|
|
71582
71690
|
throw new CliUsageError(
|
|
71583
|
-
'No agent configured.\n Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n
|
|
71691
|
+
'No agent configured.\n Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run scenario.md --harness ./.archal/harness.ts\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
|
|
71584
71692
|
);
|
|
71585
71693
|
}
|
|
71586
71694
|
function resolveRequestedEngineModel(opts) {
|
|
@@ -71839,19 +71947,17 @@ function resolveHarnessAndEngine(opts, timeout, resolved) {
|
|
|
71839
71947
|
if (opts.task && opts.task.trim()) {
|
|
71840
71948
|
validationError(
|
|
71841
71949
|
"No agent configured.",
|
|
71842
|
-
'--task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n
|
|
71950
|
+
'--task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
|
|
71843
71951
|
);
|
|
71844
71952
|
}
|
|
71845
71953
|
validationError(
|
|
71846
71954
|
"No agent configured.",
|
|
71847
|
-
'Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n
|
|
71955
|
+
'Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run scenario.md --harness ./.archal/harness.ts\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
|
|
71848
71956
|
);
|
|
71849
71957
|
}
|
|
71850
71958
|
inferredRepoHarness = true;
|
|
71851
71959
|
opts.harnessDir = inferredHarness.harnessDir;
|
|
71852
|
-
info(
|
|
71853
|
-
`Using repo-local harness from ${inferredHarness.harnessDir} (${inferredHarness.reason}).`
|
|
71854
|
-
);
|
|
71960
|
+
info(`Using repo-local harness from ${inferredHarness.harnessDir}.`);
|
|
71855
71961
|
}
|
|
71856
71962
|
}
|
|
71857
71963
|
assertDockerAndSandboxAreCompatible(dockerExplicitlyRequested, opts.sandbox);
|
|
@@ -71924,6 +72030,21 @@ function resolveHarnessAndEngine(opts, timeout, resolved) {
|
|
|
71924
72030
|
if (opts.sandbox) {
|
|
71925
72031
|
return engine;
|
|
71926
72032
|
}
|
|
72033
|
+
if (engine.mode === "local" && opts.proxy === void 0) {
|
|
72034
|
+
opts.proxy = true;
|
|
72035
|
+
info("TLS proxy enabled by default for local harness runs. Use --no-proxy to disable.");
|
|
72036
|
+
const existingProxy = process.env["HTTPS_PROXY"]?.trim() || process.env["https_proxy"]?.trim();
|
|
72037
|
+
const existingCa = process.env["NODE_EXTRA_CA_CERTS"]?.trim();
|
|
72038
|
+
if (existingProxy || existingCa) {
|
|
72039
|
+
const collisions = [
|
|
72040
|
+
existingProxy ? "HTTPS_PROXY" : null,
|
|
72041
|
+
existingCa ? "NODE_EXTRA_CA_CERTS" : null
|
|
72042
|
+
].filter(Boolean).join(" / ");
|
|
72043
|
+
warn(
|
|
72044
|
+
`Auto-enabled --proxy will override your shell's ${collisions} for the agent process. If you rely on a corporate CA or custom proxy, pass --no-proxy and wire twin base URLs via ARCHAL_<TWIN>_BASE_URL env vars instead.`
|
|
72045
|
+
);
|
|
72046
|
+
}
|
|
72047
|
+
}
|
|
71927
72048
|
if (engine.mode === "local" && !process.env["ARCHAL_ENGINE_API_KEY"]) {
|
|
71928
72049
|
const requestedModel = firstNonEmpty(
|
|
71929
72050
|
engine.model,
|
|
@@ -72260,9 +72381,9 @@ function resolveRunConfigWithSources(scenarioPath, opts, sources, preloaded) {
|
|
|
72260
72381
|
detail: "ARCHAL_MODEL"
|
|
72261
72382
|
},
|
|
72262
72383
|
{
|
|
72263
|
-
value: toTrimmedString(archalConfig?.config.
|
|
72384
|
+
value: toTrimmedString(archalConfig?.config.evaluatorModel),
|
|
72264
72385
|
source: "project",
|
|
72265
|
-
detail: ".archal.json:
|
|
72386
|
+
detail: ".archal.json:evaluatorModel"
|
|
72266
72387
|
},
|
|
72267
72388
|
{
|
|
72268
72389
|
value: toTrimmedString(dashboardConfig["runEvaluatorModel"]),
|
|
@@ -72293,11 +72414,6 @@ function resolveRunConfigWithSources(scenarioPath, opts, sources, preloaded) {
|
|
|
72293
72414
|
source: "env",
|
|
72294
72415
|
detail: "ARCHAL_DEFAULT_HARNESS"
|
|
72295
72416
|
},
|
|
72296
|
-
{
|
|
72297
|
-
value: archalConfig?.config.harness ? (0, import_node_path38.resolve)(archalConfig.configDir, archalConfig.config.harness) : void 0,
|
|
72298
|
-
source: "project",
|
|
72299
|
-
detail: ".archal.json:harness"
|
|
72300
|
-
},
|
|
72301
72417
|
{
|
|
72302
72418
|
value: toTrimmedString(dashboardConfig["runHarness"]),
|
|
72303
72419
|
source: "project",
|
|
@@ -72583,9 +72699,6 @@ function applyResolvedRunConfigToOptions(opts, resolved) {
|
|
|
72583
72699
|
if (resolved.model.source === "project") {
|
|
72584
72700
|
opts.model = resolved.model.value;
|
|
72585
72701
|
}
|
|
72586
|
-
if (resolved.harness.value && resolved.harness.source === "project" && resolved.harness.detail === ".archal.json:harness" && !opts.harness) {
|
|
72587
|
-
opts.harness = resolved.harness.value;
|
|
72588
|
-
}
|
|
72589
72702
|
const hasExplicitLocalHarness = (() => {
|
|
72590
72703
|
if (firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"])) {
|
|
72591
72704
|
return true;
|
|
@@ -78088,6 +78201,7 @@ async function executeRunForScenario(scenarioArg, opts, command, configDefaults,
|
|
|
78088
78201
|
hostedSessionId: ctx.backendSessionId,
|
|
78089
78202
|
isReusedEnvSession: ctx.isReusedEnvSession,
|
|
78090
78203
|
keepState: opts.keepState,
|
|
78204
|
+
freshSeed: opts.freshSeed,
|
|
78091
78205
|
allowEmptyState: opts.allowEmptyState,
|
|
78092
78206
|
noSeedCache: !opts.seedCache,
|
|
78093
78207
|
staticSeed: hasStaticSeed,
|
|
@@ -78191,8 +78305,33 @@ function deriveTitle(task) {
|
|
|
78191
78305
|
const lastSpace = truncated.lastIndexOf(" ");
|
|
78192
78306
|
return lastSpace > 20 ? truncated.slice(0, lastSpace) : truncated;
|
|
78193
78307
|
}
|
|
78308
|
+
var LEADING_CONTEXT_PATTERNS = [
|
|
78309
|
+
/^using\s+[^,]+?,\s+/,
|
|
78310
|
+
/^(?:please|kindly)\s+/,
|
|
78311
|
+
/^(?:can|could|would|will)\s+you\s+(?:please\s+)?/,
|
|
78312
|
+
/^(?:i\s+want\s+(?:you\s+)?to|i\s+(?:would\s+)?(?:'d\s+)?like\s+(?:you\s+)?to|i\s+need\s+(?:you\s+)?to|i'?d\s+like\s+(?:you\s+)?to)\s+/,
|
|
78313
|
+
/^help\s+me\s+(?:please\s+)?/,
|
|
78314
|
+
/^for\s+me,\s+/
|
|
78315
|
+
];
|
|
78316
|
+
function stripLeadingContext(input) {
|
|
78317
|
+
let cur = input;
|
|
78318
|
+
for (let i = 0; i < 4; i++) {
|
|
78319
|
+
let changed = false;
|
|
78320
|
+
for (const pattern of LEADING_CONTEXT_PATTERNS) {
|
|
78321
|
+
const next = cur.replace(pattern, "");
|
|
78322
|
+
if (next !== cur) {
|
|
78323
|
+
cur = next;
|
|
78324
|
+
changed = true;
|
|
78325
|
+
}
|
|
78326
|
+
}
|
|
78327
|
+
if (!changed) break;
|
|
78328
|
+
}
|
|
78329
|
+
return cur;
|
|
78330
|
+
}
|
|
78194
78331
|
function isReadOnlyTask(task) {
|
|
78195
|
-
const normalized =
|
|
78332
|
+
const normalized = stripLeadingContext(
|
|
78333
|
+
task.trim().toLowerCase().replace(/\s+/g, " ")
|
|
78334
|
+
);
|
|
78196
78335
|
const writePrefixes = [
|
|
78197
78336
|
"mark ",
|
|
78198
78337
|
"star ",
|
|
@@ -78298,10 +78437,10 @@ function isReadOnlyTask(task) {
|
|
|
78298
78437
|
return true;
|
|
78299
78438
|
}
|
|
78300
78439
|
function generateTaskScenario(options) {
|
|
78301
|
-
const { task, twins } = options;
|
|
78440
|
+
const { task, twins, readOnlyOverride } = options;
|
|
78302
78441
|
const title = deriveTitle(task);
|
|
78303
78442
|
const twinsConfig = twins.join(", ");
|
|
78304
|
-
const readOnly = isReadOnlyTask(task);
|
|
78443
|
+
const readOnly = typeof readOnlyOverride === "boolean" ? readOnlyOverride : isReadOnlyTask(task);
|
|
78305
78444
|
const criteria = [];
|
|
78306
78445
|
if (readOnly) {
|
|
78307
78446
|
criteria.push(
|
|
@@ -78474,7 +78613,11 @@ async function resolveRunCommandScenarios(scenarioArg, opts, command) {
|
|
|
78474
78613
|
if (resolvedTwins.length === 0) {
|
|
78475
78614
|
throw new CliUsageError("--task requires --twin (could not auto-detect).");
|
|
78476
78615
|
}
|
|
78477
|
-
taskScenarioPath = generateTaskScenario({
|
|
78616
|
+
taskScenarioPath = generateTaskScenario({
|
|
78617
|
+
task: opts.task,
|
|
78618
|
+
twins: resolvedTwins,
|
|
78619
|
+
readOnlyOverride: opts.readOnly === true ? true : void 0
|
|
78620
|
+
});
|
|
78478
78621
|
scenariosToRun.push(taskScenarioPath);
|
|
78479
78622
|
if (!opts.seed && !opts.seedOverrides && !opts.fileSeedPaths) {
|
|
78480
78623
|
opts.seed = "empty";
|
|
@@ -78516,6 +78659,7 @@ function applyRunHelpVisibility(cmd, showAdvancedHelp) {
|
|
|
78516
78659
|
"--config",
|
|
78517
78660
|
"--task",
|
|
78518
78661
|
"--twin",
|
|
78662
|
+
"--read-only",
|
|
78519
78663
|
"--runs",
|
|
78520
78664
|
"--timeout",
|
|
78521
78665
|
"--output",
|
|
@@ -78526,6 +78670,7 @@ function applyRunHelpVisibility(cmd, showAdvancedHelp) {
|
|
|
78526
78670
|
"--verbose",
|
|
78527
78671
|
"--reuse-session",
|
|
78528
78672
|
"--keep-state",
|
|
78673
|
+
"--fresh-seed",
|
|
78529
78674
|
"--help"
|
|
78530
78675
|
]);
|
|
78531
78676
|
for (const option of cmd.options) {
|
|
@@ -78552,12 +78697,17 @@ function createRunCommand() {
|
|
|
78552
78697
|
return acc;
|
|
78553
78698
|
},
|
|
78554
78699
|
[]
|
|
78700
|
+
).addOption(
|
|
78701
|
+
new Option(
|
|
78702
|
+
"--read-only",
|
|
78703
|
+
"Force the generated --task scenario to use the read-only (single-criterion) scaffold, bypassing the heuristic classifier. Only meaningful alongside --task."
|
|
78704
|
+
)
|
|
78555
78705
|
).option("-n, --runs <count>", "Number of runs", String(configDefaults.runs)).option("-t, --timeout <seconds>", "Timeout per run in seconds", String(configDefaults.timeout)).addOption(new Option("-m, --model <model>", "Evaluator model").hideHelp()).addOption(
|
|
78556
78706
|
new Option("-o, --output <format>", "Output format: terminal, json").default("terminal")
|
|
78557
78707
|
).addOption(new Option("--seed <name-or-path>", "Seed name or file path (.json / .md)")).addOption(new Option("--tag <tag>", "Only run if scenario has this tag")).addOption(new Option("-q, --quiet", "Suppress non-error output")).addOption(new Option("-v, --verbose", "Enable debug logging")).addOption(
|
|
78558
78708
|
new Option(
|
|
78559
78709
|
"--harness <path>",
|
|
78560
|
-
"Path to a runnable headless harness (e.g. ./.archal/harness.ts). Omit this flag and Archal discovers a repo-local harness by walking up from cwd for a top-level harness.{ts,js,mjs,cjs}
|
|
78710
|
+
"Path to a runnable headless harness (e.g. ./.archal/harness.ts). Omit this flag and Archal discovers a repo-local harness by walking up from cwd for a top-level harness.{ts,js,mjs,cjs}. You can also set `agent` in .archal.json."
|
|
78561
78711
|
)
|
|
78562
78712
|
).addOption(
|
|
78563
78713
|
new Option(
|
|
@@ -78570,18 +78720,28 @@ function createRunCommand() {
|
|
|
78570
78720
|
"Skip the scenario's named-seed re-apply on a reused session. Use after sideloading twin state with 'archal twin seed --file'. Alias: --no-reseed."
|
|
78571
78721
|
)
|
|
78572
78722
|
).addOption(new Option("--no-reseed").hideHelp()).addOption(
|
|
78723
|
+
new Option(
|
|
78724
|
+
"--fresh-seed",
|
|
78725
|
+
"Force re-apply of the scenario's named seed on a reused session, wiping any existing twin state first. Opposite of --keep-state. Use when an earlier `archal twin start` left state around and you want the scenario's declared seed applied cleanly. Mutually exclusive with --keep-state."
|
|
78726
|
+
)
|
|
78727
|
+
).addOption(
|
|
78573
78728
|
new Option(
|
|
78574
78729
|
"--allow-empty-state",
|
|
78575
78730
|
"Allow the run to proceed when the reused-session state probe fails. Rare debug-only; default off \u2014 a probe failure normally fails the run to protect sideloaded state."
|
|
78576
78731
|
).hideHelp()
|
|
78577
|
-
).addOption(new Option("--rate-limit <count>").hideHelp()).addOption(new Option("--pass-threshold <score>").default("0").hideHelp()).addOption(new Option("--api-key <key>").hideHelp()).addOption(new Option("--engine-endpoint <url>").hideHelp()).addOption(new Option("--engine-token <token>").hideHelp()).addOption(new Option("--agent-model <model>").hideHelp()).addOption(new Option("--engine-twin-urls <path>").hideHelp()).addOption(new Option("--engine-timeout <seconds>").hideHelp()).addOption(new Option("--dockerfile <path>").hideHelp()).addOption(new Option("--api-base-urls <path>").hideHelp()).addOption(new Option("--api-proxy-url <url>").hideHelp()).addOption(new Option("--preflight-only").hideHelp()).addOption(new Option("--seed-cache").hideHelp()).addOption(new Option("--clear-seed-cache").hideHelp()).addOption(new Option("--replay-seed <path>").hideHelp()).addOption(new Option("--save-seed <path>").hideHelp()).addOption(new Option("--no-failure-analysis").hideHelp()).addOption(new Option("--allow-ambiguous-seed").hideHelp()).addOption(new Option("--strict-seed").hideHelp()).addOption(new Option("--run-project-id <id>").hideHelp()).addOption(new Option("--run-project-name <name>").hideHelp()).addOption(new Option("--run-project-description <text>").hideHelp()).addOption(new Option("--sandbox").hideHelp()).addOption(new Option("--openclaw-home <dir>").hideHelp()).addOption(new Option("--workspace <dir>").hideHelp()).addOption(new Option("--openclaw-config <path>").hideHelp()).addOption(new Option("--openclaw-version <version>").hideHelp()).addOption(new Option("--openclaw-eval-mode <mode>").hideHelp()).addOption(new Option("--docker").hideHelp()).addOption(new Option("--no-docker").hideHelp()).addOption(new Option("--proxy", "
|
|
78732
|
+
).addOption(new Option("--rate-limit <count>").hideHelp()).addOption(new Option("--pass-threshold <score>").default("0").hideHelp()).addOption(new Option("--api-key <key>").hideHelp()).addOption(new Option("--engine-endpoint <url>").hideHelp()).addOption(new Option("--engine-token <token>").hideHelp()).addOption(new Option("--agent-model <model>").hideHelp()).addOption(new Option("--engine-twin-urls <path>").hideHelp()).addOption(new Option("--engine-timeout <seconds>").hideHelp()).addOption(new Option("--dockerfile <path>").hideHelp()).addOption(new Option("--api-base-urls <path>").hideHelp()).addOption(new Option("--api-proxy-url <url>").hideHelp()).addOption(new Option("--preflight-only").hideHelp()).addOption(new Option("--seed-cache").hideHelp()).addOption(new Option("--clear-seed-cache").hideHelp()).addOption(new Option("--replay-seed <path>").hideHelp()).addOption(new Option("--save-seed <path>").hideHelp()).addOption(new Option("--no-failure-analysis").hideHelp()).addOption(new Option("--allow-ambiguous-seed").hideHelp()).addOption(new Option("--strict-seed").hideHelp()).addOption(new Option("--run-project-id <id>").hideHelp()).addOption(new Option("--run-project-name <name>").hideHelp()).addOption(new Option("--run-project-description <text>").hideHelp()).addOption(new Option("--sandbox").hideHelp()).addOption(new Option("--openclaw-home <dir>").hideHelp()).addOption(new Option("--workspace <dir>").hideHelp()).addOption(new Option("--openclaw-config <path>").hideHelp()).addOption(new Option("--openclaw-version <version>").hideHelp()).addOption(new Option("--openclaw-eval-mode <mode>").hideHelp()).addOption(new Option("--docker").hideHelp()).addOption(new Option("--no-docker").hideHelp()).addOption(new Option("--proxy", "Force the TLS proxy on (on by default for local harness runs).").hideHelp()).addOption(new Option("--no-proxy", "Disable the TLS proxy for local harness runs.").hideHelp()).addOption(new Option("--advanced").hideHelp()).addHelpText(
|
|
78578
78733
|
"after",
|
|
78579
|
-
'\nExamples:\n archal run # uses .archal.json in current directory\n archal run --config path/to/.archal.json # use specific config\n archal run scenario.md # run a single scenario directly\n archal run scenario.md --runs 5 # run 5 times\n archal
|
|
78734
|
+
'\nExamples:\n archal run # uses .archal.json in current directory\n archal run --config path/to/.archal.json # use specific config\n archal run scenario.md # run a single scenario directly\n archal run scenario.md --runs 5 # run 5 times\n archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n # inline task with an explicit harness path\n archal twin start github # then in the same shell:\n archal run scenario.md --reuse-session # runs against the active twin session\n archal run scenario.md --reuse-session ses-abc # targets a specific session id\n\n.archal.json (one valid project-default setup path):\n {\n "title": "my-agent-tests",\n "agent": "npx tsx ./.archal/harness.ts",\n "twins": ["github"],\n "agentModel": "gemini-2.0-flash"\n }\n\nFor full reference: https://docs.archal.ai/cli/run\n'
|
|
78580
78735
|
).action(async (scenarioArg, opts, command) => {
|
|
78581
78736
|
const reseedOpt = opts.reseed;
|
|
78582
78737
|
if (reseedOpt === false) {
|
|
78583
78738
|
opts.keepState = true;
|
|
78584
78739
|
}
|
|
78740
|
+
if (opts.freshSeed && opts.keepState) {
|
|
78741
|
+
command.error(
|
|
78742
|
+
"error: --fresh-seed and --keep-state are mutually exclusive. Pick one: --fresh-seed to wipe and re-apply the scenario's seed, or --keep-state to preserve the existing twin state."
|
|
78743
|
+
);
|
|
78744
|
+
}
|
|
78585
78745
|
const {
|
|
78586
78746
|
archalFile,
|
|
78587
78747
|
configuredAgentConfig,
|
|
@@ -79279,7 +79439,6 @@ function isRunnableScenarioCandidate(filePath) {
|
|
|
79279
79439
|
}
|
|
79280
79440
|
function toLocalScenarioRows(opts) {
|
|
79281
79441
|
const tagFilter = opts.tag?.toLowerCase();
|
|
79282
|
-
const difficultyFilter = opts.difficulty?.toLowerCase();
|
|
79283
79442
|
const rows = [];
|
|
79284
79443
|
const localResolution = findLocalScenariosDir();
|
|
79285
79444
|
const localDir = localResolution.dir;
|
|
@@ -79299,7 +79458,6 @@ function toLocalScenarioRows(opts) {
|
|
|
79299
79458
|
const scenarioTags = scenario.config.tags.map((t) => t.toLowerCase());
|
|
79300
79459
|
if (!scenarioTags.includes(tagFilter)) continue;
|
|
79301
79460
|
}
|
|
79302
|
-
if (difficultyFilter && (scenario.config.difficulty ?? "") !== difficultyFilter) continue;
|
|
79303
79461
|
rows.push({
|
|
79304
79462
|
scenario: scenario.title,
|
|
79305
79463
|
slug: (0, import_node_path49.relative)(localDir, file2).replace(/\\/gu, "/").replace(/\.md$/u, "") || (0, import_node_path49.basename)(file2, ".md"),
|
|
@@ -79313,13 +79471,9 @@ function toLocalScenarioRows(opts) {
|
|
|
79313
79471
|
}
|
|
79314
79472
|
function matchesScenarioFilters(scenario, opts) {
|
|
79315
79473
|
const tagFilter = opts.tag?.toLowerCase();
|
|
79316
|
-
const difficultyFilter = opts.difficulty?.toLowerCase();
|
|
79317
79474
|
if (tagFilter && !scenario.tags.some((tag) => tag.toLowerCase() === tagFilter)) {
|
|
79318
79475
|
return false;
|
|
79319
79476
|
}
|
|
79320
|
-
if (difficultyFilter && (scenario.difficulty ?? "") !== difficultyFilter) {
|
|
79321
|
-
return false;
|
|
79322
|
-
}
|
|
79323
79477
|
return true;
|
|
79324
79478
|
}
|
|
79325
79479
|
async function listScenarios(opts) {
|
|
@@ -79385,7 +79539,7 @@ Found ${rows.length} scenario(s)`);
|
|
|
79385
79539
|
}
|
|
79386
79540
|
function createScenarioCommand() {
|
|
79387
79541
|
const scenario = new Command("scenario").description("Browse and manage scenarios");
|
|
79388
|
-
scenario.command("list").description("List runnable scenarios").option("--tag <tag>", "Filter scenarios by tag").option("--
|
|
79542
|
+
scenario.command("list").description("List runnable scenarios").option("--tag <tag>", "Filter scenarios by tag").option("--json", "Output as JSON").action(async (opts) => {
|
|
79389
79543
|
await listScenarios(opts);
|
|
79390
79544
|
});
|
|
79391
79545
|
return scenario;
|
|
@@ -80440,6 +80594,21 @@ function sortSessionsByCreatedAt(sessions) {
|
|
|
80440
80594
|
function isActiveTwinSession(session) {
|
|
80441
80595
|
return session.status !== "ended" && session.status !== "expired" && session.status !== "completed";
|
|
80442
80596
|
}
|
|
80597
|
+
async function probeSavedTwinSession(token, saved) {
|
|
80598
|
+
const result = await getSessionStatus(token, saved.sessionId, saved.controlPlaneBaseUrl);
|
|
80599
|
+
if (!result.ok) {
|
|
80600
|
+
if (result.status === 404) {
|
|
80601
|
+
clearSession();
|
|
80602
|
+
return { state: "stale", reason: "not found" };
|
|
80603
|
+
}
|
|
80604
|
+
throw new CliRuntimeError(`Server error fetching session status: ${result.error}`);
|
|
80605
|
+
}
|
|
80606
|
+
if (!isActiveTwinSession(result.data)) {
|
|
80607
|
+
clearSession();
|
|
80608
|
+
return { state: "stale", reason: result.data.status };
|
|
80609
|
+
}
|
|
80610
|
+
return { state: "alive", response: result.data };
|
|
80611
|
+
}
|
|
80443
80612
|
var WORKER_HEALTH_TIMEOUT_BASE_MS = 6e4;
|
|
80444
80613
|
var WORKER_HEALTH_TIMEOUT_PER_TWIN_MS = 2e4;
|
|
80445
80614
|
var WORKER_HEALTH_TIMEOUT_MAX_MS = 18e4;
|
|
@@ -80759,14 +80928,21 @@ function createTwinCommand() {
|
|
|
80759
80928
|
});
|
|
80760
80929
|
});
|
|
80761
80930
|
command.command("start").argument("[twins...]", "Twins to start").option("--all", "Start all available twins").option("--seed <seeds...>", "Seeds per twin (for example: stripe:small-business github:enterprise-repo)").option("--setup <description>", "Describe desired state in natural language and seed the first twin after startup").option("--setup-file <path>", "Read setup description from a file and seed the first twin after startup").option("--seed-file <path>", "Load a JSON or markdown seed file after the first twin is ready").option("--name <name>", "Custom session name shown on the dashboard (defaults to twin names)").option("--ttl-seconds <seconds>", "Requested twin lifetime in seconds (capped server-side)").option("--fresh", "Bypass idempotency replay \u2014 always create a new session even if request params match a recent one").description("Start a persistent hosted twin session with hosted twin APIs").action(async (twins, opts) => {
|
|
80931
|
+
const token = requireToken();
|
|
80762
80932
|
const existing = loadSession();
|
|
80763
80933
|
if (existing) {
|
|
80764
|
-
|
|
80765
|
-
|
|
80934
|
+
const probe = await probeSavedTwinSession(token, existing);
|
|
80935
|
+
if (probe.state === "alive") {
|
|
80936
|
+
throw new CliRuntimeError(
|
|
80937
|
+
`Active session found: ${existing.sessionId}
|
|
80766
80938
|
Run 'archal twin stop' first, or 'archal twin status' to inspect it.`
|
|
80939
|
+
);
|
|
80940
|
+
}
|
|
80941
|
+
process.stderr.write(
|
|
80942
|
+
`${DIM6}[twin] Clearing stale local session ${existing.sessionId} (${probe.reason}); starting a new one.${RESET6}
|
|
80943
|
+
`
|
|
80767
80944
|
);
|
|
80768
80945
|
}
|
|
80769
|
-
const token = requireToken();
|
|
80770
80946
|
const requestedTwins = await resolveRequestedTwins(token, twins, opts.all);
|
|
80771
80947
|
if (requestedTwins.length === 0) {
|
|
80772
80948
|
if (opts.all) {
|
|
@@ -80843,25 +81019,15 @@ Run 'archal twin stop' first, or 'archal twin status' to inspect it.`
|
|
|
80843
81019
|
`);
|
|
80844
81020
|
return;
|
|
80845
81021
|
}
|
|
80846
|
-
const
|
|
80847
|
-
if (
|
|
80848
|
-
if (result.status === 404) {
|
|
80849
|
-
process.stderr.write(`${DIM6}Session ${saved.sessionId} is no longer active (not found).${RESET6}
|
|
80850
|
-
`);
|
|
80851
|
-
clearSession();
|
|
80852
|
-
return;
|
|
80853
|
-
}
|
|
80854
|
-
throw new CliRuntimeError(`Server error fetching session status: ${result.error}`);
|
|
80855
|
-
}
|
|
80856
|
-
const response = result.data;
|
|
80857
|
-
if (response.status === "ended" || response.status === "expired" || response.status === "completed") {
|
|
81022
|
+
const probe = await probeSavedTwinSession(token, saved);
|
|
81023
|
+
if (probe.state === "stale") {
|
|
80858
81024
|
process.stderr.write(
|
|
80859
|
-
`${DIM6}Session ${saved.sessionId} is no longer active (${
|
|
81025
|
+
`${DIM6}Session ${saved.sessionId} is no longer active (${probe.reason}).${RESET6}
|
|
80860
81026
|
`
|
|
80861
81027
|
);
|
|
80862
|
-
clearSession();
|
|
80863
81028
|
return;
|
|
80864
81029
|
}
|
|
81030
|
+
const response = probe.response;
|
|
80865
81031
|
if (opts.json) {
|
|
80866
81032
|
process.stdout.write(`${JSON.stringify({ ...response, sessionId: saved.sessionId }, null, 2)}
|
|
80867
81033
|
`);
|
|
@@ -26994,7 +26994,6 @@ var scenarioConfigSchema = external_exports3.object({
|
|
|
26994
26994
|
timeout: external_exports3.number().default(120),
|
|
26995
26995
|
runs: external_exports3.number().default(5),
|
|
26996
26996
|
evaluatorModel: external_exports3.string().optional(),
|
|
26997
|
-
difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
|
|
26998
26997
|
tags: external_exports3.array(external_exports3.string()).default([])
|
|
26999
26998
|
});
|
|
27000
26999
|
|
|
@@ -27194,7 +27194,6 @@ var scenarioConfigSchema = external_exports3.object({
|
|
|
27194
27194
|
timeout: external_exports3.number().default(120),
|
|
27195
27195
|
runs: external_exports3.number().default(5),
|
|
27196
27196
|
evaluatorModel: external_exports3.string().optional(),
|
|
27197
|
-
difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
|
|
27198
27197
|
tags: external_exports3.array(external_exports3.string()).default([])
|
|
27199
27198
|
});
|
|
27200
27199
|
|
package/dist/vitest/index.cjs
CHANGED
|
@@ -30086,7 +30086,6 @@ var scenarioConfigSchema = external_exports3.object({
|
|
|
30086
30086
|
timeout: external_exports3.number().default(120),
|
|
30087
30087
|
runs: external_exports3.number().default(5),
|
|
30088
30088
|
evaluatorModel: external_exports3.string().optional(),
|
|
30089
|
-
difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
|
|
30090
30089
|
tags: external_exports3.array(external_exports3.string()).default([])
|
|
30091
30090
|
});
|
|
30092
30091
|
|
package/dist/vitest/index.js
CHANGED
|
@@ -8,11 +8,11 @@ import {
|
|
|
8
8
|
readArchalVitestConfig,
|
|
9
9
|
resetArchalTwins,
|
|
10
10
|
resolveRuntimeModule
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-L6HSMJ3F.js";
|
|
12
12
|
import {
|
|
13
13
|
encodeConfig,
|
|
14
14
|
getSessionIdFilePath
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-KTMNDJFB.js";
|
|
16
16
|
import {
|
|
17
17
|
__commonJS,
|
|
18
18
|
__toESM
|
|
@@ -27434,7 +27434,6 @@ var scenarioConfigSchema = external_exports3.object({
|
|
|
27434
27434
|
timeout: external_exports3.number().default(120),
|
|
27435
27435
|
runs: external_exports3.number().default(5),
|
|
27436
27436
|
evaluatorModel: external_exports3.string().optional(),
|
|
27437
|
-
difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
|
|
27438
27437
|
tags: external_exports3.array(external_exports3.string()).default([])
|
|
27439
27438
|
});
|
|
27440
27439
|
|