archal 0.9.9 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -31869,7 +31869,6 @@ var init_scenario_schemas = __esm({
31869
31869
  timeout: external_exports3.number().default(120),
31870
31870
  runs: external_exports3.number().default(5),
31871
31871
  evaluatorModel: external_exports3.string().optional(),
31872
- difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
31873
31872
  tags: external_exports3.array(external_exports3.string()).default([])
31874
31873
  });
31875
31874
  }
@@ -58333,8 +58332,7 @@ function loadArchalFile(explicitPath, ...searchFrom) {
58333
58332
  );
58334
58333
  }
58335
58334
  if (typeof c["agentModel"] === "string") r.agentModel = c["agentModel"].trim() || void 0;
58336
- if (typeof c["harness"] === "string") r.harness = c["harness"].trim() || void 0;
58337
- if (typeof c["model"] === "string") r.model = c["model"].trim() || void 0;
58335
+ if (typeof c["evaluatorModel"] === "string") r.evaluatorModel = c["evaluatorModel"].trim() || void 0;
58338
58336
  if (typeof c["runs"] === "number" && Number.isInteger(c["runs"]) && c["runs"] > 0) r.runs = c["runs"];
58339
58337
  if (typeof c["timeout"] === "number" && c["timeout"] > 0) r.timeout = c["timeout"];
58340
58338
  return { config: r, configDir: (0, import_node_path28.dirname)(filePath) };
@@ -60424,21 +60422,12 @@ function parseConfigSection(configText) {
60424
60422
  result.evaluatorModel = value;
60425
60423
  break;
60426
60424
  }
60427
- case "difficulty": {
60428
- const normalized = value.toLowerCase();
60429
- if (normalized === "easy" || normalized === "medium" || normalized === "hard") {
60430
- result.difficulty = normalized;
60431
- } else {
60432
- warn(`Invalid difficulty "${value}" in scenario config \u2014 ignoring. Must be "easy", "medium", or "hard".`);
60433
- }
60434
- break;
60435
- }
60436
60425
  case "tags": {
60437
60426
  result.tags = value.split(",").map((tag) => tag.trim()).filter(Boolean);
60438
60427
  break;
60439
60428
  }
60440
60429
  default: {
60441
- const knownKeys = ["twins", "timeout", "seed", "runs", "evaluator", "evaluator-model", "evaluatormodel", "model", "difficulty", "tags"];
60430
+ const knownKeys = ["twins", "timeout", "seed", "runs", "evaluator", "evaluator-model", "evaluatormodel", "model", "tags"];
60442
60431
  const close = knownKeys.find((k) => k.startsWith(key));
60443
60432
  const hint = close ? ` Did you mean "${close}"?` : ` Known keys: ${knownKeys.join(", ")}.`;
60444
60433
  warn(`Unknown config key "${key}" in scenario \u2014 ignored.${hint}`);
@@ -60502,8 +60491,7 @@ function parseScenarioMarkdown(markdown, sourcePath) {
60502
60491
  timeout: parsedConfig.timeout ?? 180,
60503
60492
  runs: parsedConfig.runs ?? 1,
60504
60493
  evaluatorModel: parsedConfig.evaluatorModel,
60505
- tags: parsedConfig.tags ?? [],
60506
- difficulty: parsedConfig.difficulty
60494
+ tags: parsedConfig.tags ?? []
60507
60495
  };
60508
60496
  debug("Parsed scenario", {
60509
60497
  title: sections.title,
@@ -60813,7 +60801,7 @@ function traceEntriesForTool(trace, toolName) {
60813
60801
  // src/runner/reporter-files.ts
60814
60802
  function writeLastRunLog(report) {
60815
60803
  try {
60816
- const dir = (0, import_node_path8.join)(process.cwd(), ".archal");
60804
+ const dir = (0, import_node_path8.join)(process.cwd(), ".archal", "cache");
60817
60805
  if (!(0, import_node_fs10.existsSync)(dir)) {
60818
60806
  (0, import_node_fs10.mkdirSync)(dir, { recursive: true });
60819
60807
  }
@@ -60839,7 +60827,7 @@ function writeLastRunLog(report) {
60839
60827
  }
60840
60828
  function saveRunTrace(report) {
60841
60829
  try {
60842
- const dir = (0, import_node_path8.join)(process.cwd(), ".archal", "runs");
60830
+ const dir = (0, import_node_path8.join)(process.cwd(), ".archal", "cache", "runs");
60843
60831
  (0, import_node_fs10.mkdirSync)(dir, { recursive: true });
60844
60832
  const ts = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, 19);
60845
60833
  const slug2 = (report.scenarioTitle ?? "untitled").toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 60);
@@ -63404,6 +63392,55 @@ function buildCriterionMaps(scenario) {
63404
63392
  }
63405
63393
  return { criterionDescriptions, criterionTypes };
63406
63394
  }
63395
+ function summarizeExistingState(snapshot) {
63396
+ if (!snapshot) return "(existing state)";
63397
+ const nonEmpty = Object.entries(snapshot).filter(([, entities]) => Array.isArray(entities) && entities.length > 0).map(([collection, entities]) => ({ collection, count: entities.length })).sort((a, b) => b.count - a.count).slice(0, 2);
63398
+ if (nonEmpty.length === 0) return "(existing state)";
63399
+ return nonEmpty.map(({ count, collection }) => `${count} ${collection}`).join(", ");
63400
+ }
63401
+ function emitSkipReapplyBanner(declaredSeed, stateSummary) {
63402
+ const body = `Seed not re-applied: scenario declares seed "${declaredSeed}" but reusing existing
63403
+ session state from an earlier \`archal twin start\` (has: ${stateSummary}).
63404
+ Pass --fresh-seed to reset and re-apply, or run \`archal twin stop\` and retry.`;
63405
+ warn(body);
63406
+ }
63407
+ var RESET_STATE_TIMEOUT_MS = 3e4;
63408
+ var RESET_STATE_RETRIES = 6;
63409
+ var RESET_STATE_BACKOFF_MS = [1500, 2500, 3500, 5e3, 6e3, 7e3];
63410
+ async function resetLiveTwinState(twinApiUrl, bearerToken, adminAuth) {
63411
+ const baseUrl = twinApiUrl.replace(/\/(mcp|api)\/?$/, "");
63412
+ const url2 = `${baseUrl}/state`;
63413
+ const headers = buildTwinHttpHeaders(bearerToken, adminAuth);
63414
+ let response;
63415
+ try {
63416
+ response = await fetchWithRetry2(
63417
+ url2,
63418
+ { method: "DELETE", headers },
63419
+ {
63420
+ maxRetries: RESET_STATE_RETRIES,
63421
+ timeoutMs: RESET_STATE_TIMEOUT_MS,
63422
+ backoffMs: RESET_STATE_BACKOFF_MS,
63423
+ label: "twin-state-reset"
63424
+ }
63425
+ );
63426
+ } catch (error49) {
63427
+ const detail2 = error49 instanceof Error ? error49.message : String(error49);
63428
+ return { ok: false, status: 0, detail: detail2 };
63429
+ }
63430
+ if (response.ok) {
63431
+ return { ok: true };
63432
+ }
63433
+ let detail;
63434
+ try {
63435
+ const raw = await response.text();
63436
+ const parsed = raw.trim() ? JSON.parse(raw) : {};
63437
+ const msg = parsed["error"] ?? parsed["message"];
63438
+ detail = typeof msg === "string" ? msg : raw.slice(0, 200) || void 0;
63439
+ } catch {
63440
+ detail = void 0;
63441
+ }
63442
+ return { ok: false, status: response.status, detail };
63443
+ }
63407
63444
  async function hydrateStaticSeedSelections(seedSelections, seedDescription, options) {
63408
63445
  const autoSelections = generateSeedSelections(
63409
63446
  seedSelections.map((selection) => selection.twinName),
@@ -63463,6 +63500,43 @@ async function hydrateStaticSeedSelections(seedSelections, seedDescription, opti
63463
63500
  continue;
63464
63501
  }
63465
63502
  if (options.isReusedEnvSession && twinHasExistingState(selection.twinName)) {
63503
+ if (options.freshSeed) {
63504
+ const twinUrl2 = options.cloudTwinUrls[selection.twinName];
63505
+ if (!twinUrl2) {
63506
+ throw new Error(
63507
+ `--fresh-seed requested but hosted session is missing the API base URL for twin "${selection.twinName}". Re-provision the hosted session before retrying.`
63508
+ );
63509
+ }
63510
+ info(
63511
+ `--fresh-seed: resetting twin "${selection.twinName}" and re-applying seed "${selection.seedName}".`
63512
+ );
63513
+ const resetResult = await resetLiveTwinState(
63514
+ twinUrl2,
63515
+ options.apiBearerToken,
63516
+ adminAuth
63517
+ );
63518
+ if (!resetResult.ok) {
63519
+ throw new Error(
63520
+ `--fresh-seed: failed to reset state on twin "${selection.twinName}" (HTTP ${resetResult.status}${resetResult.detail ? `: ${resetResult.detail}` : ""}). Stop the twin session with \`archal twin stop\` and retry.`
63521
+ );
63522
+ }
63523
+ const loadResult2 = await loadNamedSeedIntoLiveTwin(
63524
+ twinUrl2,
63525
+ selection.seedName,
63526
+ options.apiBearerToken,
63527
+ adminAuth
63528
+ );
63529
+ if (!loadResult2.ok) {
63530
+ throw new Error(
63531
+ `--fresh-seed: could not load seed "${selection.seedName}" into twin "${selection.twinName}" (HTTP ${loadResult2.status}${loadResult2.detail ? `: ${loadResult2.detail}` : ""}). Re-provision the hosted session with the requested seed, or point --seed at a seed this twin exposes.`
63532
+ );
63533
+ }
63534
+ continue;
63535
+ }
63536
+ const stateSummary = summarizeExistingState(
63537
+ probedSnapshot ? normalizeSeedState(probedSnapshot[selection.twinName]) : null
63538
+ );
63539
+ emitSkipReapplyBanner(selection.seedName, stateSummary);
63466
63540
  info(
63467
63541
  `Using existing state on reused session; scenario seed '${selection.seedName}' not re-applied.`
63468
63542
  );
@@ -64311,40 +64385,27 @@ function resolveMarkdownPromptOrder(markdownFiles) {
64311
64385
  // src/runner/harness-paths.ts
64312
64386
  var import_node_fs24 = require("fs");
64313
64387
  var import_node_path22 = require("path");
64314
- var MANIFEST_FILE = "archal-harness.json";
64315
- var EXPLICIT_HARNESS_ENTRY_CANDIDATES = [
64316
- "harness.ts",
64317
- "harness.js",
64318
- "harness.mjs",
64319
- "harness.cjs",
64320
- "archal-harness.ts",
64321
- "archal-harness.js",
64322
- "archal-harness.mjs",
64323
- "archal-harness.cjs"
64324
- ];
64325
- var PROJECT_ARCHAL_HARNESS_FILENAMES = /* @__PURE__ */ new Set([
64388
+ var MANIFEST_FILE = "harness.json";
64389
+ var HARNESS_ENTRY_CANDIDATES = [
64326
64390
  "harness.ts",
64327
64391
  "harness.js",
64328
64392
  "harness.mjs",
64329
64393
  "harness.cjs"
64330
- ]);
64331
- var PROJECT_ARCHAL_HARNESS_ENTRY_CANDIDATES = [
64332
- "harness.ts",
64333
- "harness.js",
64334
- "harness.mjs",
64335
- "harness.cjs",
64336
- "archal-harness.ts",
64337
- "archal-harness.js",
64338
- "archal-harness.mjs",
64339
- "archal-harness.cjs"
64340
64394
  ];
64395
+ var PROJECT_ARCHAL_HARNESS_FILENAMES = new Set(HARNESS_ENTRY_CANDIDATES);
64341
64396
  var HARNESS_DISCOVERY_BOUNDARY_FILES = [
64342
64397
  ".git",
64343
- ".archal.json"
64398
+ ".archal"
64344
64399
  ];
64345
- function trimToUndefined(value) {
64346
- const trimmed = value?.trim();
64347
- return trimmed ? trimmed : void 0;
64400
+ var PROJECT_ROOT_MARKERS = [
64401
+ "package.json",
64402
+ ".archal.json",
64403
+ ".git"
64404
+ ];
64405
+ function isProperAncestor(candidate, target) {
64406
+ if (candidate === target) return false;
64407
+ const withSep = candidate.endsWith(import_node_path22.sep) ? candidate : candidate + import_node_path22.sep;
64408
+ return target.startsWith(withSep);
64348
64409
  }
64349
64410
  function isProjectArchalHarnessFile(entryPath) {
64350
64411
  return (0, import_node_path22.basename)((0, import_node_path22.dirname)(entryPath)) === ".archal" && PROJECT_ARCHAL_HARNESS_FILENAMES.has((0, import_node_path22.basename)(entryPath).toLowerCase());
@@ -64356,6 +64417,28 @@ function resolveHarnessExecutionRootFromPath(entryPath) {
64356
64417
  }
64357
64418
  return entryDir;
64358
64419
  }
64420
+ function resolveHarnessProjectRoot(startPath, fallback, stopAt) {
64421
+ let current;
64422
+ try {
64423
+ const stats = (0, import_node_fs24.statSync)(startPath);
64424
+ current = stats.isDirectory() ? startPath : (0, import_node_path22.dirname)(startPath);
64425
+ } catch {
64426
+ current = (0, import_node_path22.dirname)(startPath);
64427
+ }
64428
+ const effectiveFallback = fallback ?? current;
64429
+ while (true) {
64430
+ for (const marker of PROJECT_ROOT_MARKERS) {
64431
+ if ((0, import_node_fs24.existsSync)((0, import_node_path22.resolve)(current, marker))) {
64432
+ return current;
64433
+ }
64434
+ }
64435
+ const parent = (0, import_node_path22.dirname)(current);
64436
+ if (parent === current || stopAt !== void 0 && isProperAncestor(current, stopAt)) {
64437
+ return effectiveFallback;
64438
+ }
64439
+ current = parent;
64440
+ }
64441
+ }
64359
64442
  function normalizeHarnessDiscoveryStart(startPath) {
64360
64443
  const trimmed = startPath.trim();
64361
64444
  if (!trimmed) {
@@ -64400,20 +64483,11 @@ function discoverRepoLocalHarness(...startPaths) {
64400
64483
  let current = startDir;
64401
64484
  while (!visited.has(current)) {
64402
64485
  visited.add(current);
64403
- for (const candidate of PROJECT_ARCHAL_HARNESS_ENTRY_CANDIDATES) {
64486
+ for (const candidate of HARNESS_ENTRY_CANDIDATES) {
64404
64487
  if ((0, import_node_fs24.existsSync)((0, import_node_path22.resolve)(current, candidate))) {
64405
- return {
64406
- harnessDir: current,
64407
- reason: candidate.startsWith("archal-") ? "archal-harness" : "harness"
64408
- };
64488
+ return { harnessDir: current };
64409
64489
  }
64410
64490
  }
64411
- if (trimToUndefined(readPackageJson3(current)?.archal?.harness)) {
64412
- return {
64413
- harnessDir: current,
64414
- reason: "package.json#archal.harness"
64415
- };
64416
- }
64417
64491
  const reachedBoundary = HARNESS_DISCOVERY_BOUNDARY_FILES.some((file2) => (0, import_node_fs24.existsSync)((0, import_node_path22.resolve)(current, file2)));
64418
64492
  const parent = (0, import_node_path22.dirname)(current);
64419
64493
  if (reachedBoundary || parent === current) {
@@ -64439,29 +64513,11 @@ function resolveHarnessLocation(harnessInput, visited = /* @__PURE__ */ new Set(
64439
64513
  return {
64440
64514
  executionRoot,
64441
64515
  manifestPath: (0, import_node_path22.resolve)(executionRoot, MANIFEST_FILE),
64442
- entryPath: resolvedInput
64443
- };
64444
- }
64445
- const packageJson = readPackageJson3(resolvedInput);
64446
- const configuredHarnessPath = trimToUndefined(packageJson?.archal?.harness);
64447
- if (configuredHarnessPath) {
64448
- const configuredAbsolutePath = (0, import_node_path22.resolve)(resolvedInput, configuredHarnessPath);
64449
- if (!(0, import_node_fs24.existsSync)(configuredAbsolutePath)) {
64450
- throw new Error(
64451
- `package.json archal.harness points to a missing path: ${configuredAbsolutePath}`
64452
- );
64453
- }
64454
- const configuredStats = (0, import_node_fs24.statSync)(configuredAbsolutePath);
64455
- if (configuredStats.isDirectory()) {
64456
- return resolveHarnessLocation(configuredAbsolutePath, visited);
64457
- }
64458
- return {
64459
- executionRoot: resolvedInput,
64460
- manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE),
64461
- entryPath: configuredAbsolutePath
64516
+ entryPath: resolvedInput,
64517
+ projectRoot: resolveHarnessProjectRoot(resolvedInput, executionRoot)
64462
64518
  };
64463
64519
  }
64464
- for (const candidate of EXPLICIT_HARNESS_ENTRY_CANDIDATES) {
64520
+ for (const candidate of HARNESS_ENTRY_CANDIDATES) {
64465
64521
  const candidatePath = (0, import_node_path22.resolve)(resolvedInput, candidate);
64466
64522
  if (!(0, import_node_fs24.existsSync)(candidatePath)) {
64467
64523
  continue;
@@ -64469,12 +64525,14 @@ function resolveHarnessLocation(harnessInput, visited = /* @__PURE__ */ new Set(
64469
64525
  return {
64470
64526
  executionRoot: resolvedInput,
64471
64527
  manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE),
64472
- entryPath: candidatePath
64528
+ entryPath: candidatePath,
64529
+ projectRoot: resolvedInput
64473
64530
  };
64474
64531
  }
64475
64532
  return {
64476
64533
  executionRoot: resolvedInput,
64477
- manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE)
64534
+ manifestPath: (0, import_node_path22.resolve)(resolvedInput, MANIFEST_FILE),
64535
+ projectRoot: resolvedInput
64478
64536
  };
64479
64537
  }
64480
64538
 
@@ -64496,14 +64554,14 @@ var DIRECT_FILE_RUNNERS = {
64496
64554
  ".mjs": { command: "node", args: [] },
64497
64555
  ".cjs": { command: "node", args: [] }
64498
64556
  };
64499
- function inferLocalCommandFromEntryPath(executionRoot, entryPath) {
64557
+ function inferLocalCommandFromEntryPath(spawnCwd, entryPath) {
64500
64558
  const runner = DIRECT_FILE_RUNNERS[(0, import_node_path23.extname)(entryPath).toLowerCase()];
64501
64559
  if (!runner) {
64502
64560
  throw new Error(
64503
64561
  `Unsupported harness entrypoint extension for ${entryPath}. Use .ts, .js, .mjs, or .cjs.`
64504
64562
  );
64505
64563
  }
64506
- const relativeEntryPath = (0, import_node_path23.relative)(executionRoot, entryPath) || entryPath;
64564
+ const relativeEntryPath = (0, import_node_path23.relative)(spawnCwd, entryPath) || entryPath;
64507
64565
  return {
64508
64566
  command: runner.command,
64509
64567
  args: [...runner.args, relativeEntryPath]
@@ -64520,7 +64578,7 @@ function parseHarnessManifest(manifestPath) {
64520
64578
  );
64521
64579
  }
64522
64580
  }
64523
- function trimToUndefined2(value) {
64581
+ function trimToUndefined(value) {
64524
64582
  const trimmed = value?.trim();
64525
64583
  return trimmed ? trimmed : void 0;
64526
64584
  }
@@ -64530,10 +64588,11 @@ function buildResolvedLocalHarness(location, explicitModel, manifest) {
64530
64588
  command: manifest.local.command,
64531
64589
  args: manifest.local.args,
64532
64590
  env: manifest.local.env
64533
- } : location.entryPath ? inferLocalCommandFromEntryPath(location.executionRoot, location.entryPath) : void 0;
64534
- const model = explicitModel ?? trimToUndefined2(manifest?.defaultModel);
64591
+ } : location.entryPath ? inferLocalCommandFromEntryPath(location.projectRoot, location.entryPath) : void 0;
64592
+ const model = explicitModel ?? trimToUndefined(manifest?.defaultModel);
64535
64593
  return {
64536
64594
  harnessDir: location.executionRoot,
64595
+ projectRoot: location.projectRoot,
64537
64596
  manifestPath: location.manifestPath,
64538
64597
  manifest,
64539
64598
  model,
@@ -64543,7 +64602,7 @@ function buildResolvedLocalHarness(location, explicitModel, manifest) {
64543
64602
  }
64544
64603
  function resolveLocalHarness(harnessDirInput, explicitModel) {
64545
64604
  const location = resolveHarnessLocation(harnessDirInput);
64546
- const explicit = trimToUndefined2(explicitModel);
64605
+ const explicit = trimToUndefined(explicitModel);
64547
64606
  if (!(0, import_node_fs25.existsSync)(location.manifestPath)) {
64548
64607
  if (location.entryPath) {
64549
64608
  return buildResolvedLocalHarness(location, explicit);
@@ -64677,7 +64736,7 @@ function resolveEngineConfiguration(scenario, options, timeoutSeconds, projectCo
64677
64736
  model: rawEngineModel?.trim() || void 0,
64678
64737
  command: "docker",
64679
64738
  args: [],
64680
- cwd: harnessDir,
64739
+ cwd: resolveHarnessProjectRoot(harnessDir),
64681
64740
  promptContext: void 0
64682
64741
  };
64683
64742
  } else {
@@ -64705,8 +64764,12 @@ function resolveEngineConfiguration(scenario, options, timeoutSeconds, projectCo
64705
64764
  model: resolvedHarness.model,
64706
64765
  command: commandConfig.command,
64707
64766
  args: commandConfig.args,
64708
- env: commandConfig.env,
64709
- cwd: resolvedHarness.harnessDir,
64767
+ env: {
64768
+ ...commandConfig.env,
64769
+ ARCHAL_PROJECT_ROOT: resolvedHarness.projectRoot,
64770
+ ARCHAL_HARNESS_ENTRY_DIR: resolvedHarness.harnessDir
64771
+ },
64772
+ cwd: resolvedHarness.projectRoot,
64710
64773
  promptContext: resolvedHarness.promptContext
64711
64774
  };
64712
64775
  }
@@ -65107,7 +65170,7 @@ function parseBatchJudgeResponse(text, criteriaIds) {
65107
65170
  if (results.size > 0) {
65108
65171
  const missing = criteriaIds.filter((id) => !results.has(id));
65109
65172
  if (missing.length > 0) {
65110
- warn(`Batch parse: ${missing.length} criteria missing from response (will fall back to fail/0.3): ${missing.join(", ")}`);
65173
+ warn(`Batch parse: ${missing.length} criteria missing from response (sequential fallback will be attempted, #2551): ${missing.join(", ")}`);
65111
65174
  }
65112
65175
  return results;
65113
65176
  }
@@ -66219,6 +66282,28 @@ function resolveJudgeProvider(config2) {
66219
66282
  }
66220
66283
  return { provider, apiKey, canUseManagedRouting };
66221
66284
  }
66285
+ function accumulateTokenUsageSummaries(primary, secondary) {
66286
+ if (!primary && !secondary) return null;
66287
+ const a = primary ?? { inputTokens: 0, outputTokens: 0 };
66288
+ const b = secondary ?? { inputTokens: 0, outputTokens: 0 };
66289
+ const callCount = typeof a.llmCallCount === "number" || typeof b.llmCallCount === "number" ? (a.llmCallCount ?? 0) + (b.llmCallCount ?? 0) : void 0;
66290
+ const combined = {
66291
+ inputTokens: a.inputTokens + b.inputTokens,
66292
+ outputTokens: a.outputTokens + b.outputTokens,
66293
+ ...callCount !== void 0 ? { llmCallCount: callCount } : {}
66294
+ };
66295
+ return combined;
66296
+ }
66297
+ async function evaluateMissingCriteriaSequentially(missingCriteria, context, config2) {
66298
+ const results = /* @__PURE__ */ new Map();
66299
+ let tokenUsage = null;
66300
+ for (const criterion of missingCriteria) {
66301
+ const singleResult = await evaluateWithLlm(criterion, context, config2);
66302
+ results.set(criterion.id, singleResult.evaluation);
66303
+ tokenUsage = accumulateTokenUsageSummaries(tokenUsage, singleResult.tokenUsage);
66304
+ }
66305
+ return { results, tokenUsage };
66306
+ }
66222
66307
  async function evaluateBatchWithLlm(criteria, context, config2) {
66223
66308
  const resolved = resolveJudgeProvider(config2);
66224
66309
  if (!resolved) {
@@ -66242,7 +66327,8 @@ async function evaluateBatchWithLlm(criteria, context, config2) {
66242
66327
  provider,
66243
66328
  traceLength: String(context.trace.length)
66244
66329
  });
66245
- const maxTokens = Math.min(256 + criteria.length * 512, 4096);
66330
+ const BATCH_MAX_TOKENS_CEILING = 32768;
66331
+ const maxTokens = Math.min(256 + criteria.length * 512, BATCH_MAX_TOKENS_CEILING);
66246
66332
  try {
66247
66333
  const response = await callLlmWithUsage({
66248
66334
  provider,
@@ -66269,6 +66355,17 @@ async function evaluateBatchWithLlm(criteria, context, config2) {
66269
66355
  const text = response.text;
66270
66356
  const criteriaIds = criteria.map((c) => c.id);
66271
66357
  const parsed = parseBatchJudgeResponse(text, criteriaIds);
66358
+ const missingCriteria = criteria.filter((c) => !parsed.has(c.id));
66359
+ let fallbackResults = /* @__PURE__ */ new Map();
66360
+ let fallbackTokenUsage = null;
66361
+ if (missingCriteria.length > 0) {
66362
+ warn(
66363
+ `Batch evaluation missing ${String(missingCriteria.length)}/${String(criteria.length)} criteria \u2014 falling back to sequential single-criterion evaluation (#2551)`
66364
+ );
66365
+ const fallback = await evaluateMissingCriteriaSequentially(missingCriteria, context, config2);
66366
+ fallbackResults = fallback.results;
66367
+ fallbackTokenUsage = fallback.tokenUsage;
66368
+ }
66272
66369
  return {
66273
66370
  evaluations: criteria.map((c) => {
66274
66371
  const result = parsed.get(c.id);
@@ -66285,15 +66382,24 @@ async function evaluateBatchWithLlm(criteria, context, config2) {
66285
66382
  explanation: result.explanation
66286
66383
  };
66287
66384
  }
66288
- warn(`Batch response missing result for criterion "${c.id}" \u2014 defaulting to fail`);
66385
+ const fallback = fallbackResults.get(c.id);
66386
+ if (fallback) {
66387
+ debug("LLM judge batch fallback (sequential)", {
66388
+ criterion: c.id,
66389
+ status: fallback.status,
66390
+ confidence: fallback.confidence.toFixed(2)
66391
+ });
66392
+ return fallback;
66393
+ }
66394
+ warn(`Batch + sequential fallback both missing result for criterion "${c.id}" \u2014 defaulting to fail`);
66289
66395
  return {
66290
66396
  criterionId: c.id,
66291
66397
  status: "fail",
66292
66398
  confidence: 0.3,
66293
- explanation: "Criterion not found in batch evaluation response"
66399
+ explanation: "Criterion not found in batch or sequential evaluation response"
66294
66400
  };
66295
66401
  }),
66296
- tokenUsage: response.usage
66402
+ tokenUsage: accumulateTokenUsageSummaries(response.usage, fallbackTokenUsage)
66297
66403
  };
66298
66404
  } catch (err) {
66299
66405
  const message = errorMessage(err);
@@ -68749,7 +68855,7 @@ async function assertLocalHarnessBootable(harnessInput, explicitModel, extraEnv)
68749
68855
  command: harness.localCommand.command,
68750
68856
  args: harness.localCommand.args,
68751
68857
  timeoutMs,
68752
- cwd: harness.harnessDir,
68858
+ cwd: harness.projectRoot,
68753
68859
  env: {
68754
68860
  ...harness.localCommand.env,
68755
68861
  ...extraEnv,
@@ -68762,19 +68868,21 @@ async function assertLocalHarnessBootable(harnessInput, explicitModel, extraEnv)
68762
68868
  ARCHAL_REST_CONFIG: restConfigPath,
68763
68869
  ARCHAL_METRICS_FILE: metricsFilePath,
68764
68870
  ARCHAL_AGENT_TRACE_FILE: traceFilePath,
68765
- ARCHAL_PREFLIGHT: "1"
68871
+ ARCHAL_PREFLIGHT: "1",
68872
+ ARCHAL_PROJECT_ROOT: harness.projectRoot,
68873
+ ARCHAL_HARNESS_ENTRY_DIR: harness.harnessDir
68766
68874
  }
68767
68875
  });
68768
68876
  if (result.timedOut) {
68769
68877
  throw new Error(formatLocalHarnessFailure(
68770
68878
  `Harness boot timed out after ${Math.round(timeoutMs / 1e3)}s.`,
68771
- { cwd: harness.harnessDir, phase: "preflight" }
68879
+ { cwd: harness.projectRoot, phase: "preflight" }
68772
68880
  ));
68773
68881
  }
68774
68882
  if (result.exitCode !== 0) {
68775
68883
  const detail = trimSnippet(result.stderr) || trimSnippet(result.stdout) || `Harness exited with code ${result.exitCode}.`;
68776
68884
  throw new Error(formatLocalHarnessFailure(detail, {
68777
- cwd: harness.harnessDir,
68885
+ cwd: harness.projectRoot,
68778
68886
  phase: "preflight"
68779
68887
  }));
68780
68888
  }
@@ -68783,7 +68891,7 @@ async function assertLocalHarnessBootable(harnessInput, explicitModel, extraEnv)
68783
68891
  throw err;
68784
68892
  }
68785
68893
  throw new Error(formatLocalHarnessFailure(errorMessage(err), {
68786
- cwd: harness.harnessDir,
68894
+ cwd: harness.projectRoot,
68787
68895
  phase: "preflight"
68788
68896
  }));
68789
68897
  } finally {
@@ -71576,11 +71684,11 @@ function resolveEngineMode(opts) {
71576
71684
  }
71577
71685
  if (opts.task && opts.task.trim()) {
71578
71686
  throw new CliUsageError(
71579
- 'No agent configured.\n --task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Next step: archal harness check ./.archal/harness.ts\n Example: archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71687
+ 'No agent configured.\n --task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71580
71688
  );
71581
71689
  }
71582
71690
  throw new CliUsageError(
71583
- 'No agent configured.\n Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Next step: archal harness check ./.archal/harness.ts\n Example: archal run scenario.md --harness ./.archal/harness.ts\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71691
+ 'No agent configured.\n Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run scenario.md --harness ./.archal/harness.ts\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71584
71692
  );
71585
71693
  }
71586
71694
  function resolveRequestedEngineModel(opts) {
@@ -71839,19 +71947,17 @@ function resolveHarnessAndEngine(opts, timeout, resolved) {
71839
71947
  if (opts.task && opts.task.trim()) {
71840
71948
  validationError(
71841
71949
  "No agent configured.",
71842
- '--task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Next step: archal harness check ./.archal/harness.ts\n Example: archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71950
+ '--task still needs a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71843
71951
  );
71844
71952
  }
71845
71953
  validationError(
71846
71954
  "No agent configured.",
71847
- 'Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Next step: archal harness check ./.archal/harness.ts\n Example: archal run scenario.md --harness ./.archal/harness.ts\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71955
+ 'Archal could not find a runnable agent path.\n Archal looked for: --harness, repo-local harness discovery, or .archal.json with "agent".\n Example: archal run scenario.md --harness ./.archal/harness.ts\n Or add: { "agent": "npx tsx ./.archal/harness.ts", "twins": ["github"] }'
71848
71956
  );
71849
71957
  }
71850
71958
  inferredRepoHarness = true;
71851
71959
  opts.harnessDir = inferredHarness.harnessDir;
71852
- info(
71853
- `Using repo-local harness from ${inferredHarness.harnessDir} (${inferredHarness.reason}).`
71854
- );
71960
+ info(`Using repo-local harness from ${inferredHarness.harnessDir}.`);
71855
71961
  }
71856
71962
  }
71857
71963
  assertDockerAndSandboxAreCompatible(dockerExplicitlyRequested, opts.sandbox);
@@ -71924,6 +72030,21 @@ function resolveHarnessAndEngine(opts, timeout, resolved) {
71924
72030
  if (opts.sandbox) {
71925
72031
  return engine;
71926
72032
  }
72033
+ if (engine.mode === "local" && opts.proxy === void 0) {
72034
+ opts.proxy = true;
72035
+ info("TLS proxy enabled by default for local harness runs. Use --no-proxy to disable.");
72036
+ const existingProxy = process.env["HTTPS_PROXY"]?.trim() || process.env["https_proxy"]?.trim();
72037
+ const existingCa = process.env["NODE_EXTRA_CA_CERTS"]?.trim();
72038
+ if (existingProxy || existingCa) {
72039
+ const collisions = [
72040
+ existingProxy ? "HTTPS_PROXY" : null,
72041
+ existingCa ? "NODE_EXTRA_CA_CERTS" : null
72042
+ ].filter(Boolean).join(" / ");
72043
+ warn(
72044
+ `Auto-enabled --proxy will override your shell's ${collisions} for the agent process. If you rely on a corporate CA or custom proxy, pass --no-proxy and wire twin base URLs via ARCHAL_<TWIN>_BASE_URL env vars instead.`
72045
+ );
72046
+ }
72047
+ }
71927
72048
  if (engine.mode === "local" && !process.env["ARCHAL_ENGINE_API_KEY"]) {
71928
72049
  const requestedModel = firstNonEmpty(
71929
72050
  engine.model,
@@ -72260,9 +72381,9 @@ function resolveRunConfigWithSources(scenarioPath, opts, sources, preloaded) {
72260
72381
  detail: "ARCHAL_MODEL"
72261
72382
  },
72262
72383
  {
72263
- value: toTrimmedString(archalConfig?.config.model),
72384
+ value: toTrimmedString(archalConfig?.config.evaluatorModel),
72264
72385
  source: "project",
72265
- detail: ".archal.json:model"
72386
+ detail: ".archal.json:evaluatorModel"
72266
72387
  },
72267
72388
  {
72268
72389
  value: toTrimmedString(dashboardConfig["runEvaluatorModel"]),
@@ -72293,11 +72414,6 @@ function resolveRunConfigWithSources(scenarioPath, opts, sources, preloaded) {
72293
72414
  source: "env",
72294
72415
  detail: "ARCHAL_DEFAULT_HARNESS"
72295
72416
  },
72296
- {
72297
- value: archalConfig?.config.harness ? (0, import_node_path38.resolve)(archalConfig.configDir, archalConfig.config.harness) : void 0,
72298
- source: "project",
72299
- detail: ".archal.json:harness"
72300
- },
72301
72417
  {
72302
72418
  value: toTrimmedString(dashboardConfig["runHarness"]),
72303
72419
  source: "project",
@@ -72583,9 +72699,6 @@ function applyResolvedRunConfigToOptions(opts, resolved) {
72583
72699
  if (resolved.model.source === "project") {
72584
72700
  opts.model = resolved.model.value;
72585
72701
  }
72586
- if (resolved.harness.value && resolved.harness.source === "project" && resolved.harness.detail === ".archal.json:harness" && !opts.harness) {
72587
- opts.harness = resolved.harness.value;
72588
- }
72589
72702
  const hasExplicitLocalHarness = (() => {
72590
72703
  if (firstNonEmpty(opts.harnessDir, process.env["ARCHAL_HARNESS_DIR"])) {
72591
72704
  return true;
@@ -78088,6 +78201,7 @@ async function executeRunForScenario(scenarioArg, opts, command, configDefaults,
78088
78201
  hostedSessionId: ctx.backendSessionId,
78089
78202
  isReusedEnvSession: ctx.isReusedEnvSession,
78090
78203
  keepState: opts.keepState,
78204
+ freshSeed: opts.freshSeed,
78091
78205
  allowEmptyState: opts.allowEmptyState,
78092
78206
  noSeedCache: !opts.seedCache,
78093
78207
  staticSeed: hasStaticSeed,
@@ -78191,8 +78305,33 @@ function deriveTitle(task) {
78191
78305
  const lastSpace = truncated.lastIndexOf(" ");
78192
78306
  return lastSpace > 20 ? truncated.slice(0, lastSpace) : truncated;
78193
78307
  }
78308
+ var LEADING_CONTEXT_PATTERNS = [
78309
+ /^using\s+[^,]+?,\s+/,
78310
+ /^(?:please|kindly)\s+/,
78311
+ /^(?:can|could|would|will)\s+you\s+(?:please\s+)?/,
78312
+ /^(?:i\s+want\s+(?:you\s+)?to|i\s+(?:would\s+)?(?:'d\s+)?like\s+(?:you\s+)?to|i\s+need\s+(?:you\s+)?to|i'?d\s+like\s+(?:you\s+)?to)\s+/,
78313
+ /^help\s+me\s+(?:please\s+)?/,
78314
+ /^for\s+me,\s+/
78315
+ ];
78316
+ function stripLeadingContext(input) {
78317
+ let cur = input;
78318
+ for (let i = 0; i < 4; i++) {
78319
+ let changed = false;
78320
+ for (const pattern of LEADING_CONTEXT_PATTERNS) {
78321
+ const next = cur.replace(pattern, "");
78322
+ if (next !== cur) {
78323
+ cur = next;
78324
+ changed = true;
78325
+ }
78326
+ }
78327
+ if (!changed) break;
78328
+ }
78329
+ return cur;
78330
+ }
78194
78331
  function isReadOnlyTask(task) {
78195
- const normalized = task.trim().toLowerCase().replace(/\s+/g, " ").replace(/^(please|can you|could you|would you|help me)\s+/, "");
78332
+ const normalized = stripLeadingContext(
78333
+ task.trim().toLowerCase().replace(/\s+/g, " ")
78334
+ );
78196
78335
  const writePrefixes = [
78197
78336
  "mark ",
78198
78337
  "star ",
@@ -78298,10 +78437,10 @@ function isReadOnlyTask(task) {
78298
78437
  return true;
78299
78438
  }
78300
78439
  function generateTaskScenario(options) {
78301
- const { task, twins } = options;
78440
+ const { task, twins, readOnlyOverride } = options;
78302
78441
  const title = deriveTitle(task);
78303
78442
  const twinsConfig = twins.join(", ");
78304
- const readOnly = isReadOnlyTask(task);
78443
+ const readOnly = typeof readOnlyOverride === "boolean" ? readOnlyOverride : isReadOnlyTask(task);
78305
78444
  const criteria = [];
78306
78445
  if (readOnly) {
78307
78446
  criteria.push(
@@ -78474,7 +78613,11 @@ async function resolveRunCommandScenarios(scenarioArg, opts, command) {
78474
78613
  if (resolvedTwins.length === 0) {
78475
78614
  throw new CliUsageError("--task requires --twin (could not auto-detect).");
78476
78615
  }
78477
- taskScenarioPath = generateTaskScenario({ task: opts.task, twins: resolvedTwins });
78616
+ taskScenarioPath = generateTaskScenario({
78617
+ task: opts.task,
78618
+ twins: resolvedTwins,
78619
+ readOnlyOverride: opts.readOnly === true ? true : void 0
78620
+ });
78478
78621
  scenariosToRun.push(taskScenarioPath);
78479
78622
  if (!opts.seed && !opts.seedOverrides && !opts.fileSeedPaths) {
78480
78623
  opts.seed = "empty";
@@ -78516,6 +78659,7 @@ function applyRunHelpVisibility(cmd, showAdvancedHelp) {
78516
78659
  "--config",
78517
78660
  "--task",
78518
78661
  "--twin",
78662
+ "--read-only",
78519
78663
  "--runs",
78520
78664
  "--timeout",
78521
78665
  "--output",
@@ -78526,6 +78670,7 @@ function applyRunHelpVisibility(cmd, showAdvancedHelp) {
78526
78670
  "--verbose",
78527
78671
  "--reuse-session",
78528
78672
  "--keep-state",
78673
+ "--fresh-seed",
78529
78674
  "--help"
78530
78675
  ]);
78531
78676
  for (const option of cmd.options) {
@@ -78552,12 +78697,17 @@ function createRunCommand() {
78552
78697
  return acc;
78553
78698
  },
78554
78699
  []
78700
+ ).addOption(
78701
+ new Option(
78702
+ "--read-only",
78703
+ "Force the generated --task scenario to use the read-only (single-criterion) scaffold, bypassing the heuristic classifier. Only meaningful alongside --task."
78704
+ )
78555
78705
  ).option("-n, --runs <count>", "Number of runs", String(configDefaults.runs)).option("-t, --timeout <seconds>", "Timeout per run in seconds", String(configDefaults.timeout)).addOption(new Option("-m, --model <model>", "Evaluator model").hideHelp()).addOption(
78556
78706
  new Option("-o, --output <format>", "Output format: terminal, json").default("terminal")
78557
78707
  ).addOption(new Option("--seed <name-or-path>", "Seed name or file path (.json / .md)")).addOption(new Option("--tag <tag>", "Only run if scenario has this tag")).addOption(new Option("-q, --quiet", "Suppress non-error output")).addOption(new Option("-v, --verbose", "Enable debug logging")).addOption(
78558
78708
  new Option(
78559
78709
  "--harness <path>",
78560
- "Path to a runnable headless harness (e.g. ./.archal/harness.ts). Omit this flag and Archal discovers a repo-local harness by walking up from cwd for a top-level harness.{ts,js,mjs,cjs}, archal-harness.{ts,js,mjs,cjs}, or package.json#archal.harness. You can also set `agent` in .archal.json."
78710
+ "Path to a runnable headless harness (e.g. ./.archal/harness.ts). Omit this flag and Archal discovers a repo-local harness by walking up from cwd for a top-level harness.{ts,js,mjs,cjs}. You can also set `agent` in .archal.json."
78561
78711
  )
78562
78712
  ).addOption(
78563
78713
  new Option(
@@ -78570,18 +78720,28 @@ function createRunCommand() {
78570
78720
  "Skip the scenario's named-seed re-apply on a reused session. Use after sideloading twin state with 'archal twin seed --file'. Alias: --no-reseed."
78571
78721
  )
78572
78722
  ).addOption(new Option("--no-reseed").hideHelp()).addOption(
78723
+ new Option(
78724
+ "--fresh-seed",
78725
+ "Force re-apply of the scenario's named seed on a reused session, wiping any existing twin state first. Opposite of --keep-state. Use when an earlier `archal twin start` left state around and you want the scenario's declared seed applied cleanly. Mutually exclusive with --keep-state."
78726
+ )
78727
+ ).addOption(
78573
78728
  new Option(
78574
78729
  "--allow-empty-state",
78575
78730
  "Allow the run to proceed when the reused-session state probe fails. Rare debug-only; default off \u2014 a probe failure normally fails the run to protect sideloaded state."
78576
78731
  ).hideHelp()
78577
- ).addOption(new Option("--rate-limit <count>").hideHelp()).addOption(new Option("--pass-threshold <score>").default("0").hideHelp()).addOption(new Option("--api-key <key>").hideHelp()).addOption(new Option("--engine-endpoint <url>").hideHelp()).addOption(new Option("--engine-token <token>").hideHelp()).addOption(new Option("--agent-model <model>").hideHelp()).addOption(new Option("--engine-twin-urls <path>").hideHelp()).addOption(new Option("--engine-timeout <seconds>").hideHelp()).addOption(new Option("--dockerfile <path>").hideHelp()).addOption(new Option("--api-base-urls <path>").hideHelp()).addOption(new Option("--api-proxy-url <url>").hideHelp()).addOption(new Option("--preflight-only").hideHelp()).addOption(new Option("--seed-cache").hideHelp()).addOption(new Option("--clear-seed-cache").hideHelp()).addOption(new Option("--replay-seed <path>").hideHelp()).addOption(new Option("--save-seed <path>").hideHelp()).addOption(new Option("--no-failure-analysis").hideHelp()).addOption(new Option("--allow-ambiguous-seed").hideHelp()).addOption(new Option("--strict-seed").hideHelp()).addOption(new Option("--run-project-id <id>").hideHelp()).addOption(new Option("--run-project-name <name>").hideHelp()).addOption(new Option("--run-project-description <text>").hideHelp()).addOption(new Option("--sandbox").hideHelp()).addOption(new Option("--openclaw-home <dir>").hideHelp()).addOption(new Option("--workspace <dir>").hideHelp()).addOption(new Option("--openclaw-config <path>").hideHelp()).addOption(new Option("--openclaw-version <version>").hideHelp()).addOption(new Option("--openclaw-eval-mode <mode>").hideHelp()).addOption(new Option("--docker").hideHelp()).addOption(new Option("--no-docker").hideHelp()).addOption(new Option("--proxy", "Route agent HTTP traffic through TLS proxy to twins")).addOption(new Option("--advanced").hideHelp()).addHelpText(
78732
+ ).addOption(new Option("--rate-limit <count>").hideHelp()).addOption(new Option("--pass-threshold <score>").default("0").hideHelp()).addOption(new Option("--api-key <key>").hideHelp()).addOption(new Option("--engine-endpoint <url>").hideHelp()).addOption(new Option("--engine-token <token>").hideHelp()).addOption(new Option("--agent-model <model>").hideHelp()).addOption(new Option("--engine-twin-urls <path>").hideHelp()).addOption(new Option("--engine-timeout <seconds>").hideHelp()).addOption(new Option("--dockerfile <path>").hideHelp()).addOption(new Option("--api-base-urls <path>").hideHelp()).addOption(new Option("--api-proxy-url <url>").hideHelp()).addOption(new Option("--preflight-only").hideHelp()).addOption(new Option("--seed-cache").hideHelp()).addOption(new Option("--clear-seed-cache").hideHelp()).addOption(new Option("--replay-seed <path>").hideHelp()).addOption(new Option("--save-seed <path>").hideHelp()).addOption(new Option("--no-failure-analysis").hideHelp()).addOption(new Option("--allow-ambiguous-seed").hideHelp()).addOption(new Option("--strict-seed").hideHelp()).addOption(new Option("--run-project-id <id>").hideHelp()).addOption(new Option("--run-project-name <name>").hideHelp()).addOption(new Option("--run-project-description <text>").hideHelp()).addOption(new Option("--sandbox").hideHelp()).addOption(new Option("--openclaw-home <dir>").hideHelp()).addOption(new Option("--workspace <dir>").hideHelp()).addOption(new Option("--openclaw-config <path>").hideHelp()).addOption(new Option("--openclaw-version <version>").hideHelp()).addOption(new Option("--openclaw-eval-mode <mode>").hideHelp()).addOption(new Option("--docker").hideHelp()).addOption(new Option("--no-docker").hideHelp()).addOption(new Option("--proxy", "Force the TLS proxy on (on by default for local harness runs).").hideHelp()).addOption(new Option("--no-proxy", "Disable the TLS proxy for local harness runs.").hideHelp()).addOption(new Option("--advanced").hideHelp()).addHelpText(
78578
78733
  "after",
78579
- '\nExamples:\n archal run # uses .archal.json in current directory\n archal run --config path/to/.archal.json # use specific config\n archal run scenario.md # run a single scenario directly\n archal run scenario.md --runs 5 # run 5 times\n archal harness check ./.archal/harness.ts # verify a repo-local headless harness\n archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n # inline task with an explicit harness path\n archal twin start github # then in the same shell:\n archal run scenario.md --reuse-session # runs against the active twin session\n archal run scenario.md --reuse-session ses-abc # targets a specific session id\n\n.archal.json (one valid project-default setup path):\n {\n "title": "my-agent-tests",\n "agent": "npx tsx ./.archal/harness.ts",\n "twins": ["github"],\n "agentModel": "gemini-2.0-flash"\n }\n\nFor full reference: https://docs.archal.ai/cli/run\n'
78734
+ '\nExamples:\n archal run # uses .archal.json in current directory\n archal run --config path/to/.archal.json # use specific config\n archal run scenario.md # run a single scenario directly\n archal run scenario.md --runs 5 # run 5 times\n archal run --task "List recent issues" --harness ./.archal/harness.ts --twin github\n # inline task with an explicit harness path\n archal twin start github # then in the same shell:\n archal run scenario.md --reuse-session # runs against the active twin session\n archal run scenario.md --reuse-session ses-abc # targets a specific session id\n\n.archal.json (one valid project-default setup path):\n {\n "title": "my-agent-tests",\n "agent": "npx tsx ./.archal/harness.ts",\n "twins": ["github"],\n "agentModel": "gemini-2.0-flash"\n }\n\nFor full reference: https://docs.archal.ai/cli/run\n'
78580
78735
  ).action(async (scenarioArg, opts, command) => {
78581
78736
  const reseedOpt = opts.reseed;
78582
78737
  if (reseedOpt === false) {
78583
78738
  opts.keepState = true;
78584
78739
  }
78740
+ if (opts.freshSeed && opts.keepState) {
78741
+ command.error(
78742
+ "error: --fresh-seed and --keep-state are mutually exclusive. Pick one: --fresh-seed to wipe and re-apply the scenario's seed, or --keep-state to preserve the existing twin state."
78743
+ );
78744
+ }
78585
78745
  const {
78586
78746
  archalFile,
78587
78747
  configuredAgentConfig,
@@ -79279,7 +79439,6 @@ function isRunnableScenarioCandidate(filePath) {
79279
79439
  }
79280
79440
  function toLocalScenarioRows(opts) {
79281
79441
  const tagFilter = opts.tag?.toLowerCase();
79282
- const difficultyFilter = opts.difficulty?.toLowerCase();
79283
79442
  const rows = [];
79284
79443
  const localResolution = findLocalScenariosDir();
79285
79444
  const localDir = localResolution.dir;
@@ -79299,7 +79458,6 @@ function toLocalScenarioRows(opts) {
79299
79458
  const scenarioTags = scenario.config.tags.map((t) => t.toLowerCase());
79300
79459
  if (!scenarioTags.includes(tagFilter)) continue;
79301
79460
  }
79302
- if (difficultyFilter && (scenario.config.difficulty ?? "") !== difficultyFilter) continue;
79303
79461
  rows.push({
79304
79462
  scenario: scenario.title,
79305
79463
  slug: (0, import_node_path49.relative)(localDir, file2).replace(/\\/gu, "/").replace(/\.md$/u, "") || (0, import_node_path49.basename)(file2, ".md"),
@@ -79313,13 +79471,9 @@ function toLocalScenarioRows(opts) {
79313
79471
  }
79314
79472
  function matchesScenarioFilters(scenario, opts) {
79315
79473
  const tagFilter = opts.tag?.toLowerCase();
79316
- const difficultyFilter = opts.difficulty?.toLowerCase();
79317
79474
  if (tagFilter && !scenario.tags.some((tag) => tag.toLowerCase() === tagFilter)) {
79318
79475
  return false;
79319
79476
  }
79320
- if (difficultyFilter && (scenario.difficulty ?? "") !== difficultyFilter) {
79321
- return false;
79322
- }
79323
79477
  return true;
79324
79478
  }
79325
79479
  async function listScenarios(opts) {
@@ -79385,7 +79539,7 @@ Found ${rows.length} scenario(s)`);
79385
79539
  }
79386
79540
  function createScenarioCommand() {
79387
79541
  const scenario = new Command("scenario").description("Browse and manage scenarios");
79388
- scenario.command("list").description("List runnable scenarios").option("--tag <tag>", "Filter scenarios by tag").option("--difficulty <level>", "Filter by difficulty (easy, medium, hard)").option("--json", "Output as JSON").action(async (opts) => {
79542
+ scenario.command("list").description("List runnable scenarios").option("--tag <tag>", "Filter scenarios by tag").option("--json", "Output as JSON").action(async (opts) => {
79389
79543
  await listScenarios(opts);
79390
79544
  });
79391
79545
  return scenario;
@@ -80440,6 +80594,21 @@ function sortSessionsByCreatedAt(sessions) {
80440
80594
  function isActiveTwinSession(session) {
80441
80595
  return session.status !== "ended" && session.status !== "expired" && session.status !== "completed";
80442
80596
  }
80597
+ async function probeSavedTwinSession(token, saved) {
80598
+ const result = await getSessionStatus(token, saved.sessionId, saved.controlPlaneBaseUrl);
80599
+ if (!result.ok) {
80600
+ if (result.status === 404) {
80601
+ clearSession();
80602
+ return { state: "stale", reason: "not found" };
80603
+ }
80604
+ throw new CliRuntimeError(`Server error fetching session status: ${result.error}`);
80605
+ }
80606
+ if (!isActiveTwinSession(result.data)) {
80607
+ clearSession();
80608
+ return { state: "stale", reason: result.data.status };
80609
+ }
80610
+ return { state: "alive", response: result.data };
80611
+ }
80443
80612
  var WORKER_HEALTH_TIMEOUT_BASE_MS = 6e4;
80444
80613
  var WORKER_HEALTH_TIMEOUT_PER_TWIN_MS = 2e4;
80445
80614
  var WORKER_HEALTH_TIMEOUT_MAX_MS = 18e4;
@@ -80759,14 +80928,21 @@ function createTwinCommand() {
80759
80928
  });
80760
80929
  });
80761
80930
  command.command("start").argument("[twins...]", "Twins to start").option("--all", "Start all available twins").option("--seed <seeds...>", "Seeds per twin (for example: stripe:small-business github:enterprise-repo)").option("--setup <description>", "Describe desired state in natural language and seed the first twin after startup").option("--setup-file <path>", "Read setup description from a file and seed the first twin after startup").option("--seed-file <path>", "Load a JSON or markdown seed file after the first twin is ready").option("--name <name>", "Custom session name shown on the dashboard (defaults to twin names)").option("--ttl-seconds <seconds>", "Requested twin lifetime in seconds (capped server-side)").option("--fresh", "Bypass idempotency replay \u2014 always create a new session even if request params match a recent one").description("Start a persistent hosted twin session with hosted twin APIs").action(async (twins, opts) => {
80931
+ const token = requireToken();
80762
80932
  const existing = loadSession();
80763
80933
  if (existing) {
80764
- throw new CliRuntimeError(
80765
- `Active session found: ${existing.sessionId}
80934
+ const probe = await probeSavedTwinSession(token, existing);
80935
+ if (probe.state === "alive") {
80936
+ throw new CliRuntimeError(
80937
+ `Active session found: ${existing.sessionId}
80766
80938
  Run 'archal twin stop' first, or 'archal twin status' to inspect it.`
80939
+ );
80940
+ }
80941
+ process.stderr.write(
80942
+ `${DIM6}[twin] Clearing stale local session ${existing.sessionId} (${probe.reason}); starting a new one.${RESET6}
80943
+ `
80767
80944
  );
80768
80945
  }
80769
- const token = requireToken();
80770
80946
  const requestedTwins = await resolveRequestedTwins(token, twins, opts.all);
80771
80947
  if (requestedTwins.length === 0) {
80772
80948
  if (opts.all) {
@@ -80843,25 +81019,15 @@ Run 'archal twin stop' first, or 'archal twin status' to inspect it.`
80843
81019
  `);
80844
81020
  return;
80845
81021
  }
80846
- const result = await getSessionStatus(token, saved.sessionId, saved.controlPlaneBaseUrl);
80847
- if (!result.ok) {
80848
- if (result.status === 404) {
80849
- process.stderr.write(`${DIM6}Session ${saved.sessionId} is no longer active (not found).${RESET6}
80850
- `);
80851
- clearSession();
80852
- return;
80853
- }
80854
- throw new CliRuntimeError(`Server error fetching session status: ${result.error}`);
80855
- }
80856
- const response = result.data;
80857
- if (response.status === "ended" || response.status === "expired" || response.status === "completed") {
81022
+ const probe = await probeSavedTwinSession(token, saved);
81023
+ if (probe.state === "stale") {
80858
81024
  process.stderr.write(
80859
- `${DIM6}Session ${saved.sessionId} is no longer active (${response.status}).${RESET6}
81025
+ `${DIM6}Session ${saved.sessionId} is no longer active (${probe.reason}).${RESET6}
80860
81026
  `
80861
81027
  );
80862
- clearSession();
80863
81028
  return;
80864
81029
  }
81030
+ const response = probe.response;
80865
81031
  if (opts.json) {
80866
81032
  process.stdout.write(`${JSON.stringify({ ...response, sessionId: saved.sessionId }, null, 2)}
80867
81033
  `);
@@ -26994,7 +26994,6 @@ var scenarioConfigSchema = external_exports3.object({
26994
26994
  timeout: external_exports3.number().default(120),
26995
26995
  runs: external_exports3.number().default(5),
26996
26996
  evaluatorModel: external_exports3.string().optional(),
26997
- difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
26998
26997
  tags: external_exports3.array(external_exports3.string()).default([])
26999
26998
  });
27000
26999
 
@@ -27194,7 +27194,6 @@ var scenarioConfigSchema = external_exports3.object({
27194
27194
  timeout: external_exports3.number().default(120),
27195
27195
  runs: external_exports3.number().default(5),
27196
27196
  evaluatorModel: external_exports3.string().optional(),
27197
- difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
27198
27197
  tags: external_exports3.array(external_exports3.string()).default([])
27199
27198
  });
27200
27199
 
@@ -18,7 +18,7 @@ import {
18
18
  requestedSeedsMatchSession,
19
19
  sleep,
20
20
  trimEnv
21
- } from "./chunk-YV6BH6DO.js";
21
+ } from "./chunk-KTMNDJFB.js";
22
22
 
23
23
  // src/runtime-module-resolution.ts
24
24
  import { dirname, extname, resolve } from "path";
@@ -30086,7 +30086,6 @@ var scenarioConfigSchema = external_exports3.object({
30086
30086
  timeout: external_exports3.number().default(120),
30087
30087
  runs: external_exports3.number().default(5),
30088
30088
  evaluatorModel: external_exports3.string().optional(),
30089
- difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
30090
30089
  tags: external_exports3.array(external_exports3.string()).default([])
30091
30090
  });
30092
30091
 
@@ -8,11 +8,11 @@ import {
8
8
  readArchalVitestConfig,
9
9
  resetArchalTwins,
10
10
  resolveRuntimeModule
11
- } from "./chunk-RKYS44AS.js";
11
+ } from "./chunk-L6HSMJ3F.js";
12
12
  import {
13
13
  encodeConfig,
14
14
  getSessionIdFilePath
15
- } from "./chunk-YV6BH6DO.js";
15
+ } from "./chunk-KTMNDJFB.js";
16
16
  import {
17
17
  __commonJS,
18
18
  __toESM
@@ -27434,7 +27434,6 @@ var scenarioConfigSchema = external_exports3.object({
27434
27434
  timeout: external_exports3.number().default(120),
27435
27435
  runs: external_exports3.number().default(5),
27436
27436
  evaluatorModel: external_exports3.string().optional(),
27437
- difficulty: external_exports3.enum(["easy", "medium", "hard"]).optional(),
27438
27437
  tags: external_exports3.array(external_exports3.string()).default([])
27439
27438
  });
27440
27439
 
@@ -4,7 +4,7 @@ import {
4
4
  createHostedAuthLease,
5
5
  parsePositiveInteger,
6
6
  runHostedSessionReaper
7
- } from "../chunk-YV6BH6DO.js";
7
+ } from "../chunk-KTMNDJFB.js";
8
8
  import "../chunk-YJICENME.js";
9
9
 
10
10
  // src/runtime/hosted-session-reaper.ts
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  bootstrapArchalVitestRouting
3
- } from "../chunk-RKYS44AS.js";
4
- import "../chunk-YV6BH6DO.js";
3
+ } from "../chunk-L6HSMJ3F.js";
4
+ import "../chunk-KTMNDJFB.js";
5
5
  import "../chunk-YJICENME.js";
6
6
 
7
7
  // src/runtime/setup-files.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "archal",
3
- "version": "0.9.9",
3
+ "version": "0.9.10",
4
4
  "description": "Test your agents & integrations against digital twins",
5
5
  "type": "module",
6
6
  "main": "dist/index.cjs",