@archal/cli 0.7.9 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -233,6 +233,7 @@ function parseCriterionLine(line, index) {
233
233
  } else {
234
234
  type = inferCriterionType(description);
235
235
  }
236
+ if (!description) return null;
236
237
  return {
237
238
  id: `criterion-${index + 1}`,
238
239
  description,
@@ -333,7 +334,11 @@ ${expectedBehavior}`.toLowerCase();
333
334
  github: ["github", "repository", "pull request", "create_issue", "create_pull_request", "merge_pull_request"],
334
335
  slack: ["slack", "slack channel", "send_message", "slack message", "direct message"],
335
336
  linear: ["linear", "linear ticket", "linear project", "linear cycle"],
336
- jira: ["jira", "jira sprint", "jira epic", "jira board"]
337
+ jira: ["jira", "jira sprint", "jira epic", "jira board"],
338
+ stripe: ["stripe", "payment", "refund", "subscription", "invoice", "charge"],
339
+ supabase: ["supabase", "database", "sql query", "database table"],
340
+ "google-workspace": ["google workspace", "gmail", "google calendar", "google drive", "google docs"],
341
+ browser: ["browser", "web page", "navigate to", "click on", "web content"]
337
342
  };
338
343
  for (const [twin, keywords] of Object.entries(twinKeywords)) {
339
344
  if (keywords.some((kw) => combined.includes(kw))) {
@@ -425,7 +430,9 @@ function validateScenario(scenario) {
425
430
  }
426
431
  }
427
432
  if (scenario.config.twins.length === 0) {
428
- errors.push("Scenario does not reference any known twins (specify in Config section or mention services in Setup/Expected Behavior)");
433
+ errors.push(
434
+ 'Scenario does not reference any known twins. Add a "## Config" section with "twins: github" (or slack, linear, jira, stripe, supabase, google-workspace, browser). Alternatively, mention the service name in ## Setup or ## Expected Behavior.'
435
+ );
429
436
  }
430
437
  if (scenario.config.timeout <= 0) {
431
438
  errors.push("Timeout must be a positive number");
@@ -1107,6 +1114,8 @@ var HTTP_RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 425, 429, 500, 5
1107
1114
  var HTTP_PUSH_TIMEOUT_MS = 2e4;
1108
1115
  var HTTP_PUSH_MAX_RETRIES = 6;
1109
1116
  var HTTP_PUSH_BACKOFF_MS = [1e3, 2e3, 3e3, 5e3, 5e3, 5e3];
1117
+ var HTTP_PUSH_WARMUP_RETRIES = 6;
1118
+ var HTTP_PUSH_WARMUP_BACKOFF_MS = [1500, 2500, 3500, 5e3, 6e3, 7e3];
1110
1119
  function resolveRetryDelay(backoffMs, attempt, fallbackMs) {
1111
1120
  const indexed = backoffMs[attempt];
1112
1121
  if (typeof indexed === "number" && Number.isFinite(indexed) && indexed >= 0) {
@@ -1157,6 +1166,10 @@ async function fetchWithRetry(url, options, retryOptions) {
1157
1166
  function twinBasePath(url) {
1158
1167
  return url.replace(/\/(mcp|api)\/?$/, "");
1159
1168
  }
1169
+ function isTwinWorkerWarmupResponse(status, body) {
1170
+ if (status !== 503) return false;
1171
+ return /twin worker endpoint not available|session is busy|retry shortly/i.test(body);
1172
+ }
1160
1173
  async function collectStateFromHttp(twinUrls, bearerToken, adminAuth) {
1161
1174
  const state = {};
1162
1175
  const failures = [];
@@ -1201,25 +1214,44 @@ async function pushStateToCloud(twinUrls, seedSelections, bearerToken, adminAuth
1201
1214
  }
1202
1215
  const url = `${twinBasePath(baseUrl)}/state`;
1203
1216
  debug(`Pushing dynamic seed to ${sel.twinName}`, { url });
1204
- const response = await fetchWithRetry(
1205
- url,
1206
- {
1207
- method: "PUT",
1208
- headers,
1209
- body: JSON.stringify(sel.seedData)
1210
- },
1211
- {
1212
- retries: HTTP_PUSH_MAX_RETRIES,
1213
- timeoutMs: HTTP_PUSH_TIMEOUT_MS,
1214
- backoffMs: HTTP_PUSH_BACKOFF_MS
1217
+ const payload = JSON.stringify(sel.seedData);
1218
+ let pushed = false;
1219
+ for (let warmupAttempt = 0; warmupAttempt <= HTTP_PUSH_WARMUP_RETRIES; warmupAttempt++) {
1220
+ const response = await fetchWithRetry(
1221
+ url,
1222
+ {
1223
+ method: "PUT",
1224
+ headers,
1225
+ body: payload
1226
+ },
1227
+ {
1228
+ retries: HTTP_PUSH_MAX_RETRIES,
1229
+ timeoutMs: HTTP_PUSH_TIMEOUT_MS,
1230
+ backoffMs: HTTP_PUSH_BACKOFF_MS
1231
+ }
1232
+ );
1233
+ if (response.ok) {
1234
+ pushed = true;
1235
+ break;
1215
1236
  }
1216
- );
1217
- if (!response.ok) {
1218
1237
  const text = await response.text().catch(() => "");
1238
+ const isWarmup = isTwinWorkerWarmupResponse(response.status, text);
1239
+ if (isWarmup && warmupAttempt < HTTP_PUSH_WARMUP_RETRIES) {
1240
+ const delay = resolveRetryDelay(HTTP_PUSH_WARMUP_BACKOFF_MS, warmupAttempt, 5e3);
1241
+ warn(
1242
+ `Twin "${sel.twinName}" not ready for state push (HTTP 503), retrying in ${delay}ms`,
1243
+ { attempt: `${warmupAttempt + 1}/${HTTP_PUSH_WARMUP_RETRIES + 1}` }
1244
+ );
1245
+ await new Promise((resolve12) => setTimeout(resolve12, delay));
1246
+ continue;
1247
+ }
1219
1248
  throw new Error(
1220
1249
  `Failed to push dynamic seed to twin "${sel.twinName}": HTTP ${response.status}${text ? ` (${text})` : ""}`
1221
1250
  );
1222
1251
  }
1252
+ if (!pushed) {
1253
+ throw new Error(`Failed to push dynamic seed to twin "${sel.twinName}": worker warmup did not complete in time`);
1254
+ }
1223
1255
  debug(`Pushed dynamic seed to ${sel.twinName} successfully`);
1224
1256
  }
1225
1257
  }
@@ -3072,7 +3104,7 @@ async function callLlmViaArchal(options) {
3072
3104
  debug("Archal backend response", { model: actualModel, remaining: String(result.data.remaining ?? "unknown") });
3073
3105
  const isSeedGen = options.intent === "seed-generate";
3074
3106
  if (!modelMismatchWarned && !isSeedGen && options.model && actualModel && !actualModel.includes(options.model) && !options.model.includes(actualModel)) {
3075
- warn(`Requested model "${options.model}" but Archal backend used "${actualModel}". To use a specific model, set provider to "direct" with your own API key.`);
3107
+ debug(`Archal backend used "${actualModel}" (requested "${options.model}"). To use a specific model, set provider to "direct" with your own API key.`);
3076
3108
  modelMismatchWarned = true;
3077
3109
  }
3078
3110
  return result.data.text;
@@ -3195,6 +3227,47 @@ async function callAnthropic(options) {
3195
3227
  if (!textBlock?.text) throw new Error("Anthropic returned no text content");
3196
3228
  return textBlock.text;
3197
3229
  }
3230
+ function extractOpenAiTextContent(data) {
3231
+ const message = data.choices?.[0]?.message;
3232
+ if (!message) return null;
3233
+ if (typeof message.content === "string") {
3234
+ const trimmed = message.content.trim();
3235
+ return trimmed.length > 0 ? trimmed : null;
3236
+ }
3237
+ if (Array.isArray(message.content)) {
3238
+ const textSegments = [];
3239
+ for (const part of message.content) {
3240
+ if (typeof part === "string") {
3241
+ const trimmed = part.trim();
3242
+ if (trimmed.length > 0) textSegments.push(trimmed);
3243
+ continue;
3244
+ }
3245
+ if (!part || typeof part !== "object") continue;
3246
+ const partText = part.text;
3247
+ if (typeof partText === "string") {
3248
+ const trimmed = partText.trim();
3249
+ if (trimmed.length > 0) textSegments.push(trimmed);
3250
+ continue;
3251
+ }
3252
+ if (partText && typeof partText === "object" && typeof partText.value === "string") {
3253
+ const trimmed = partText.value.trim();
3254
+ if (trimmed.length > 0) textSegments.push(trimmed);
3255
+ continue;
3256
+ }
3257
+ if (typeof part.value === "string") {
3258
+ const trimmed = part.value.trim();
3259
+ if (trimmed.length > 0) textSegments.push(trimmed);
3260
+ }
3261
+ }
3262
+ if (textSegments.length > 0) {
3263
+ return textSegments.join("\n");
3264
+ }
3265
+ }
3266
+ if (typeof message.refusal === "string" && message.refusal.trim().length > 0) {
3267
+ return message.refusal.trim();
3268
+ }
3269
+ return null;
3270
+ }
3198
3271
  function usesMaxCompletionTokens(model) {
3199
3272
  return model.startsWith("gpt-5") || model.startsWith("o1-") || model.startsWith("o2-") || model.startsWith("o3-") || model.startsWith("o4-");
3200
3273
  }
@@ -3222,7 +3295,7 @@ async function callOpenAi(options) {
3222
3295
  throw new LlmApiError("OpenAI", response.status, errorText.slice(0, 200));
3223
3296
  }
3224
3297
  const data = await response.json();
3225
- const content = data.choices?.[0]?.message?.content;
3298
+ const content = extractOpenAiTextContent(data);
3226
3299
  if (!content) throw new Error("OpenAI returned no content");
3227
3300
  return content;
3228
3301
  }
@@ -3256,7 +3329,7 @@ async function callOpenAiCompatible(options) {
3256
3329
  throw new LlmApiError(`OpenAI-compatible (${options.baseUrl})`, response.status, errorText.slice(0, 200));
3257
3330
  }
3258
3331
  const data = await response.json();
3259
- const content = data.choices?.[0]?.message?.content;
3332
+ const content = extractOpenAiTextContent(data);
3260
3333
  if (!content) throw new Error("OpenAI-compatible API returned no content");
3261
3334
  return content;
3262
3335
  }
@@ -3281,13 +3354,15 @@ ${CYAN}${BOLD}archal${RESET} ${DIM}|${RESET} ${scenarioTitle}
3281
3354
  `);
3282
3355
  }
3283
3356
  }
3284
- function printRunProgress(runIndex, totalRuns, score, error2) {
3357
+ function printRunProgress(runIndex, totalRuns, score, error2, outcome) {
3285
3358
  const { quiet } = getLoggerOptions();
3286
3359
  if (quiet || activeOutputFormat !== "terminal") return;
3287
3360
  const dots = ".".repeat(Math.max(1, 20 - String(runIndex + 1).length - String(totalRuns).length));
3288
3361
  if (error2) {
3289
3362
  const shortError = error2.length > MAX_ERROR_PREVIEW_CHARS ? error2.slice(0, MAX_ERROR_PREVIEW_CHARS - 1) + "\u2026" : error2;
3290
- process.stderr.write(` run ${runIndex + 1}/${totalRuns} ${DIM}${dots}${RESET} ${RED}ERROR${RESET} ${DIM}(${shortError})${RESET}
3363
+ const inconclusive = outcome === "inconclusive_infrastructure" || outcome === "inconclusive_seed";
3364
+ const label = inconclusive ? `${YELLOW}INCONCLUSIVE${RESET}` : `${RED}ERROR${RESET}`;
3365
+ process.stderr.write(` run ${runIndex + 1}/${totalRuns} ${DIM}${dots}${RESET} ${label} ${DIM}(${shortError})${RESET}
3291
3366
  `);
3292
3367
  return;
3293
3368
  }
@@ -4193,8 +4268,35 @@ function filterByPredicate(items, predicate) {
4193
4268
  if (knownMatches.length > 0) {
4194
4269
  return { items: knownMatches, recognized: true };
4195
4270
  }
4271
+ const ACTION_VERBS = /* @__PURE__ */ new Set([
4272
+ "listed",
4273
+ "fetched",
4274
+ "retrieved",
4275
+ "found",
4276
+ "searched",
4277
+ "queried",
4278
+ "posted",
4279
+ "sent",
4280
+ "received",
4281
+ "notified",
4282
+ "alerted",
4283
+ "reviewed",
4284
+ "analyzed",
4285
+ "inspected",
4286
+ "checked",
4287
+ "verified",
4288
+ "triaged",
4289
+ "escalated",
4290
+ "assigned",
4291
+ "tagged",
4292
+ "labeled",
4293
+ "updated",
4294
+ "edited",
4295
+ "patched",
4296
+ "migrated"
4297
+ ]);
4196
4298
  const isSingleWord = !lowerPredicate.includes(" ");
4197
- if (isSingleWord) {
4299
+ if (isSingleWord && !ACTION_VERBS.has(lowerPredicate)) {
4198
4300
  const hasKnownField = items.some((item) => {
4199
4301
  if (typeof item !== "object" || item === null) return false;
4200
4302
  const obj = item;
@@ -5466,24 +5568,46 @@ ${JSON.stringify(context.stateDiff, null, 2)}
5466
5568
  ## Agent Trace Evidence
5467
5569
  ${traceEvidence}`;
5468
5570
  }
5571
+ function estimateTokens(value) {
5572
+ const json = JSON.stringify(value);
5573
+ return Math.ceil(json.length / 4);
5574
+ }
5575
+ var MAX_STATE_TOKENS = 4e4;
5469
5576
  function summarizeState(state) {
5470
5577
  const flat = flattenTwinState(state);
5471
5578
  const summary = {};
5472
5579
  for (const [key, value] of Object.entries(flat)) {
5473
5580
  if (Array.isArray(value)) {
5474
- if (value.length <= 100) {
5581
+ if (value.length <= 50) {
5475
5582
  summary[key] = value;
5476
5583
  } else {
5477
5584
  summary[key] = {
5478
5585
  _count: value.length,
5479
- _first20: value.slice(0, 20),
5480
- _last20: value.slice(-20)
5586
+ _first10: value.slice(0, 10),
5587
+ _last10: value.slice(-10)
5481
5588
  };
5482
5589
  }
5483
5590
  } else {
5484
5591
  summary[key] = value;
5485
5592
  }
5486
5593
  }
5594
+ let totalTokens = estimateTokens(summary);
5595
+ if (totalTokens > MAX_STATE_TOKENS) {
5596
+ const collectionSizes = Object.entries(summary).map(([key, value]) => ({ key, tokens: estimateTokens(value) })).sort((a, b) => b.tokens - a.tokens);
5597
+ for (const { key } of collectionSizes) {
5598
+ if (totalTokens <= MAX_STATE_TOKENS) break;
5599
+ const value = summary[key];
5600
+ if (!Array.isArray(value)) continue;
5601
+ const before = estimateTokens(value);
5602
+ summary[key] = {
5603
+ _count: value.length,
5604
+ _first5: value.slice(0, 5),
5605
+ _last5: value.slice(-5),
5606
+ _truncated: "Collection too large for evaluation \u2014 showing subset"
5607
+ };
5608
+ totalTokens -= before - estimateTokens(summary[key]);
5609
+ }
5610
+ }
5487
5611
  return summary;
5488
5612
  }
5489
5613
  function parseJudgeResponse(text) {
@@ -5583,6 +5707,15 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
5583
5707
  };
5584
5708
  }
5585
5709
  const message = err instanceof Error ? err.message : String(err);
5710
+ if (err instanceof LlmApiError && err.status === 400 && message.includes("too long")) {
5711
+ warn(`LLM judge prompt too large for criterion "${criterion.id}" \u2014 twin state may be too large for evaluation`);
5712
+ return {
5713
+ criterionId: criterion.id,
5714
+ status: "fail",
5715
+ confidence: 0,
5716
+ explanation: "LLM evaluation skipped: prompt exceeded model context window. The scenario state is too large for probabilistic evaluation. Consider using deterministic [D] criteria for this scenario."
5717
+ };
5718
+ }
5586
5719
  error(`LLM judge call failed: ${message}`);
5587
5720
  return {
5588
5721
  criterionId: criterion.id,
@@ -5809,6 +5942,17 @@ function buildFailureAnalysisPrompt(input) {
5809
5942
  );
5810
5943
  sections.push(`## Passed Criteria (${input.passedCriteria.length})`);
5811
5944
  sections.push(input.passedCriteria.map((c) => `- ${sanitizeForPrompt(c.description, 300)}`).join("\n"));
5945
+ if (input.agentError || input.agentLog) {
5946
+ sections.push(`## Agent Execution Context`);
5947
+ if (input.agentError) {
5948
+ sections.push(`Error: ${sanitizeForPrompt(input.agentError, 300)}`);
5949
+ }
5950
+ if (input.agentLog) {
5951
+ const logTail = input.agentLog.length > 800 ? input.agentLog.slice(-800) : input.agentLog;
5952
+ sections.push(`Agent log (tail):
5953
+ ${sanitizeForPrompt(logTail, 800)}`);
5954
+ }
5955
+ }
5812
5956
  sections.push(`## Agent Trace (${input.trace.length} tool calls)`);
5813
5957
  sections.push(
5814
5958
  input.trace.length === 0 ? "(Agent made no tool calls - likely crashed or timed out)" : JSON.stringify(traceFormatted, null, 2)
@@ -6552,7 +6696,7 @@ function resolveTelemetryEndpointFromEnv() {
6552
6696
  if (!fallbackBaseUrl) {
6553
6697
  return null;
6554
6698
  }
6555
- return `${fallbackBaseUrl}/api/traces`;
6699
+ return `${fallbackBaseUrl}/v1/traces`;
6556
6700
  }
6557
6701
  function resolveIngestToken() {
6558
6702
  return process.env["ARCHAL_TELEMETRY_TOKEN"]?.trim();
@@ -6701,8 +6845,26 @@ function isTelemetryEnabled() {
6701
6845
  if (consent !== "pending") return consent === "granted";
6702
6846
  return loadConfig().telemetry;
6703
6847
  }
6704
- function buildStructuredRunError(runIndex, error2) {
6848
+ function buildStructuredRunError(runIndex, error2, outcome) {
6705
6849
  const message = error2.trim();
6850
+ if (outcome === "inconclusive_seed") {
6851
+ return {
6852
+ runIndex,
6853
+ message,
6854
+ category: "seed_setup",
6855
+ code: "SEED_SETUP_ERROR",
6856
+ retryable: true
6857
+ };
6858
+ }
6859
+ if (outcome === "inconclusive_infrastructure") {
6860
+ return {
6861
+ runIndex,
6862
+ message,
6863
+ category: "infrastructure",
6864
+ code: "INFRASTRUCTURE_ERROR",
6865
+ retryable: true
6866
+ };
6867
+ }
6706
6868
  if (message.startsWith("Agent not found:")) {
6707
6869
  return {
6708
6870
  runIndex,
@@ -6944,7 +7106,7 @@ function buildMetadata(report, totalEntries) {
6944
7106
  },
6945
7107
  agentInternals: {
6946
7108
  runDurationsMs: report.runs.map((run) => run.durationMs),
6947
- runErrors: report.runs.filter((run) => typeof run.error === "string" && run.error.length > 0).map((run) => buildStructuredRunError(run.runIndex, run.error)),
7109
+ runErrors: report.runs.filter((run) => typeof run.error === "string" && run.error.length > 0).map((run) => buildStructuredRunError(run.runIndex, run.error, run.outcome)),
6948
7110
  evaluationCounts: { pass: passCount, partial: partialCount, fail: failCount },
6949
7111
  runSummaries: report.runs.map((run) => ({
6950
7112
  runIndex: run.runIndex,
@@ -7119,6 +7281,7 @@ async function uploadIfEnabled(traceId, report) {
7119
7281
  }
7120
7282
 
7121
7283
  // src/runner/dynamic-seed-generator.ts
7284
+ import { createHash as createHash4 } from "crypto";
7122
7285
  import { z as z4 } from "zod";
7123
7286
 
7124
7287
  // src/runner/seed-schemas/seed-schema-inference.ts
@@ -8240,7 +8403,8 @@ var RELATIONSHIP_RULES = {
8240
8403
  { sourceCollection: "disputes", sourceField: "paymentIntentId", targetCollection: "paymentIntents", targetField: "paymentIntentId", optional: true }
8241
8404
  ],
8242
8405
  jira: [
8243
- { sourceCollection: "issues", sourceField: "projectId", targetCollection: "projects", targetField: "id" }
8406
+ { sourceCollection: "issues", sourceField: "projectId", targetCollection: "projects", targetField: "id" },
8407
+ { sourceCollection: "projects", sourceField: "leadAccountId", targetCollection: "users", targetField: "accountId" }
8244
8408
  ],
8245
8409
  linear: [
8246
8410
  { sourceCollection: "issues", sourceField: "teamId", targetCollection: "teams", targetField: "id" },
@@ -8484,15 +8648,20 @@ function autoFillMissingFKs(seed, twinName) {
8484
8648
  const targetEntities = result[rule.targetCollection];
8485
8649
  if (!sourceEntities || !targetEntities || targetEntities.length === 0) continue;
8486
8650
  const targetValues = targetEntities.map((e) => e[rule.targetField]).filter((v) => v !== void 0 && v !== null);
8487
- if (targetValues.length !== 1) continue;
8488
- const singleTarget = targetValues[0];
8651
+ if (targetValues.length === 0) continue;
8652
+ const validTargetSet = new Set(targetValues.map(String));
8653
+ let fillIndex = 0;
8489
8654
  for (const entity of sourceEntities) {
8490
8655
  const e = entity;
8491
- if (e[rule.sourceField] === void 0 || e[rule.sourceField] === null) {
8492
- warn(
8493
- `Auto-filling ${rule.sourceCollection}.${rule.sourceField} = ${String(singleTarget)} (only one ${rule.targetCollection} exists)`
8656
+ const currentValue = e[rule.sourceField];
8657
+ const needsFill = currentValue === void 0 || currentValue === null || !validTargetSet.has(String(currentValue));
8658
+ if (needsFill) {
8659
+ const fillValue = targetValues[fillIndex % targetValues.length];
8660
+ fillIndex++;
8661
+ debug(
8662
+ `Auto-filling ${rule.sourceCollection}.${rule.sourceField} = ${String(fillValue)} (from ${targetValues.length} ${rule.targetCollection})` + (currentValue != null ? ` (was ${String(currentValue)} \u2014 not in targets)` : "")
8494
8663
  );
8495
- e[rule.sourceField] = singleTarget;
8664
+ e[rule.sourceField] = fillValue;
8496
8665
  }
8497
8666
  }
8498
8667
  }
@@ -8526,12 +8695,36 @@ function normalizeSeedData(seed, twinName) {
8526
8695
  }
8527
8696
  }
8528
8697
  }
8698
+ const collectionSchema = schema[collection];
8699
+ if (collectionSchema) {
8700
+ for (const [field, fieldDef] of Object.entries(collectionSchema)) {
8701
+ if (!(field in e) || e[field] === null || e[field] === void 0) continue;
8702
+ const expectedType = fieldDef.type.split("|")[0].trim();
8703
+ if (expectedType === "string" && typeof e[field] === "object" && e[field] !== null && !Array.isArray(e[field])) {
8704
+ const obj = e[field];
8705
+ const extracted = obj["login"] ?? obj["name"] ?? obj["value"] ?? obj["key"] ?? obj["id"] ?? obj["displayName"];
8706
+ if (typeof extracted === "string") {
8707
+ debug(`Seed normalization: coerced ${collection}.${field} from object to string "${extracted}"`);
8708
+ e[field] = extracted;
8709
+ } else {
8710
+ const firstStr = Object.values(obj).find((v) => typeof v === "string");
8711
+ if (firstStr) {
8712
+ debug(`Seed normalization: coerced ${collection}.${field} from object to string "${firstStr}" (fallback)`);
8713
+ e[field] = firstStr;
8714
+ } else {
8715
+ debug(`Seed normalization: could not coerce ${collection}.${field} from object to string, removing`);
8716
+ delete e[field];
8717
+ }
8718
+ }
8719
+ }
8720
+ }
8721
+ }
8529
8722
  if (collectionDefaults) {
8530
8723
  for (const [field, defaultValue] of Object.entries(collectionDefaults)) {
8531
8724
  if (!(field in e)) {
8532
8725
  e[field] = structuredClone(defaultValue);
8533
8726
  } else if (e[field] === null && defaultValue !== null) {
8534
- const fieldDef = schema[collection]?.[field];
8727
+ const fieldDef = collectionSchema?.[field];
8535
8728
  if (fieldDef && !fieldDef.type.includes("null")) {
8536
8729
  e[field] = structuredClone(defaultValue);
8537
8730
  }
@@ -8540,6 +8733,15 @@ function normalizeSeedData(seed, twinName) {
8540
8733
  }
8541
8734
  }
8542
8735
  }
8736
+ if (twinName === "github" && result["repos"]) {
8737
+ for (const entity of result["repos"]) {
8738
+ const e = entity;
8739
+ if ((!e["fullName"] || typeof e["fullName"] !== "string") && typeof e["owner"] === "string" && typeof e["name"] === "string") {
8740
+ e["fullName"] = `${e["owner"]}/${e["name"]}`;
8741
+ debug(`Seed normalization: derived repos.fullName = "${e["fullName"]}"`);
8742
+ }
8743
+ }
8744
+ }
8543
8745
  return result;
8544
8746
  }
8545
8747
 
@@ -8551,6 +8753,7 @@ var KIND_COLLECTION_HINTS = {
8551
8753
  channel: ["channels"],
8552
8754
  user: ["users"],
8553
8755
  ticket: ["issues"],
8756
+ project: ["projects"],
8554
8757
  table: ["tables"],
8555
8758
  site: ["sites", "domains"],
8556
8759
  file: ["files"],
@@ -8560,6 +8763,9 @@ var KIND_COLLECTION_HINTS = {
8560
8763
  var ENTITY_KEY_ALIASES = {
8561
8764
  "repo.owner": ["ownerLogin", "owner_login", "login", "owner.login", "owner.name"],
8562
8765
  "issue.key": ["identifier"],
8766
+ "project.key": ["key", "projectKey"],
8767
+ "ticket.key": ["identifier", "key"],
8768
+ "stripe_entity.id": ["id", "charge", "chargeId", "paymentIntentId", "invoiceId", "customerId", "disputeId"],
8563
8769
  "email.address": ["email", "from", "to", "cc", "bcc"],
8564
8770
  "file.name": ["title", "fileName", "filename", "subject", "summary"]
8565
8771
  };
@@ -8715,10 +8921,28 @@ function validateSeedCoverage(intent, mergedSeed) {
8715
8921
  const entityIssues = [];
8716
8922
  const quoteErrors = [];
8717
8923
  const quoteWarnings = [];
8718
- const CORE_ENTITY_KEYS = /* @__PURE__ */ new Set(["owner", "name", "fullName", "channel_name", "key", "identifier", "number"]);
8924
+ const CORE_ENTITY_KEYS = /* @__PURE__ */ new Set(["owner", "name", "fullName", "channel_name", "key", "identifier", "number", "id"]);
8925
+ const CONTRACT_REQUIRED_KINDS = /* @__PURE__ */ new Set([
8926
+ "repo",
8927
+ "pullRequest",
8928
+ "issue",
8929
+ "channel",
8930
+ "user",
8931
+ "ticket",
8932
+ "project",
8933
+ "table"
8934
+ ]);
8719
8935
  const entityWarnings = [];
8720
8936
  for (const entity of intent.entities) {
8721
8937
  if (typeof entity.value === "boolean") continue;
8938
+ const candidateCollections = toCollectionCandidates(mergedSeed, entity.kind, entity.value);
8939
+ if (CONTRACT_REQUIRED_KINDS.has(entity.kind) && candidateCollections.length === 0) {
8940
+ entityIssues.push({
8941
+ type: "missing_entity",
8942
+ message: `Scenario entity contract mismatch: no collections match ${entity.kind}.${entity.key}=${String(entity.value)}`
8943
+ });
8944
+ continue;
8945
+ }
8722
8946
  if (!valueExistsInCollections(mergedSeed, entity.kind, entity.key, entity.value)) {
8723
8947
  const issue = {
8724
8948
  type: "missing_entity",
@@ -8816,7 +9040,25 @@ var NON_SUBJECT_STARTS = /* @__PURE__ */ new Set([
8816
9040
  "could",
8817
9041
  "would",
8818
9042
  "may",
8819
- "might"
9043
+ "might",
9044
+ "for",
9045
+ "with",
9046
+ "in",
9047
+ "at",
9048
+ "to",
9049
+ "from",
9050
+ "by",
9051
+ "on",
9052
+ "per",
9053
+ "via",
9054
+ "into",
9055
+ "onto",
9056
+ "over",
9057
+ "under",
9058
+ "after",
9059
+ "before",
9060
+ "during",
9061
+ "as"
8820
9062
  ]);
8821
9063
  function isReasonableCountSubject(subject, expected) {
8822
9064
  if (expected > MAX_REASONABLE_COUNT) return false;
@@ -8827,38 +9069,96 @@ function isReasonableCountSubject(subject, expected) {
8827
9069
  if (/\b(?:have|has|had|were|was|are|is|been|being|do|does|did|can|could|should|will|would|may|might)\b/.test(subject.toLowerCase())) return false;
8828
9070
  return true;
8829
9071
  }
9072
+ function appearsToBeClockSuffix(text, numberStart) {
9073
+ const prefix = text.slice(Math.max(0, numberStart - 3), numberStart);
9074
+ return /^\d{1,2}:$/.test(prefix);
9075
+ }
9076
+ function isDecimalFragment(text, matchIndex) {
9077
+ if (matchIndex <= 0) return false;
9078
+ const charBefore = text[matchIndex - 1];
9079
+ if (charBefore === ".") {
9080
+ return matchIndex >= 2 && /\d/.test(text[matchIndex - 2]);
9081
+ }
9082
+ return false;
9083
+ }
9084
+ function resolveSubjectWithKey(subject, flat) {
9085
+ const candidates = buildSubjectCandidates2(subject);
9086
+ for (const candidate of candidates) {
9087
+ const normalized = candidate.replace(/\s+/g, "").toLowerCase();
9088
+ for (const [key, value] of Object.entries(flat)) {
9089
+ const normalizedKey = key.replace(/\s+/g, "").toLowerCase();
9090
+ if ((normalizedKey === normalized || normalizedKey === normalized + "s") && Array.isArray(value)) {
9091
+ return { items: value, key };
9092
+ }
9093
+ }
9094
+ }
9095
+ const items = resolveSubjectInState(subject, flat);
9096
+ return items ? { items, key: "" } : null;
9097
+ }
9098
+ function buildSubjectCandidates2(subject) {
9099
+ const candidates = [subject];
9100
+ if (subject.endsWith("s") && subject.length > 3) {
9101
+ candidates.push(subject.slice(0, -1));
9102
+ } else {
9103
+ candidates.push(subject + "s");
9104
+ }
9105
+ const words = subject.split(/\s+/);
9106
+ if (words.length > 1) {
9107
+ candidates.push(words[0]);
9108
+ candidates.push(words[words.length - 1]);
9109
+ }
9110
+ return candidates;
9111
+ }
8830
9112
  function verifySeedCounts(setupText, seedState) {
8831
9113
  const mismatches = [];
8832
9114
  const flat = flattenTwinState(seedState);
8833
9115
  const countPattern = /\b(\d+)\s+([\w\s]+?)(?:\s+(?:that|which|are|with|in|labeled|assigned)\b)/gi;
8834
9116
  for (const match of setupText.matchAll(countPattern)) {
9117
+ if (isDecimalFragment(setupText, match.index)) continue;
8835
9118
  const expected = parseInt(match[1], 10);
8836
9119
  const subject = match[2].trim();
9120
+ if (match.index !== void 0 && appearsToBeClockSuffix(setupText, match.index)) continue;
8837
9121
  if (!subject || expected <= 0) continue;
8838
9122
  if (!isReasonableCountSubject(subject, expected)) continue;
8839
- const resolved = resolveSubjectInState(subject, flat);
8840
- if (resolved && resolved.length !== expected) {
8841
- mismatches.push({ subject, expected, actual: resolved.length });
9123
+ const resolved = resolveSubjectWithKey(subject, flat);
9124
+ if (resolved && resolved.items.length !== expected) {
9125
+ mismatches.push({ subject, expected, actual: resolved.items.length, collectionKey: resolved.key || void 0 });
8842
9126
  }
8843
9127
  }
8844
9128
  const simplePattern = /\b(\d+)\s+([\w\s]+?)(?:[.,;:)]|$)/gm;
8845
9129
  const seenSubjects = new Set(mismatches.map((m) => m.subject.toLowerCase()));
8846
9130
  for (const match of setupText.matchAll(simplePattern)) {
9131
+ if (isDecimalFragment(setupText, match.index)) continue;
8847
9132
  const expected = parseInt(match[1], 10);
8848
9133
  const subject = match[2].trim();
9134
+ if (match.index !== void 0 && appearsToBeClockSuffix(setupText, match.index)) continue;
8849
9135
  if (!subject || expected <= 0 || seenSubjects.has(subject.toLowerCase())) continue;
8850
9136
  if (!isReasonableCountSubject(subject, expected)) continue;
8851
- const resolved = resolveSubjectInState(subject, flat);
8852
- if (resolved && resolved.length !== expected) {
8853
- mismatches.push({ subject, expected, actual: resolved.length });
9137
+ const resolved = resolveSubjectWithKey(subject, flat);
9138
+ if (resolved && resolved.items.length !== expected) {
9139
+ mismatches.push({ subject, expected, actual: resolved.items.length, collectionKey: resolved.key || void 0 });
8854
9140
  seenSubjects.add(subject.toLowerCase());
8855
9141
  }
8856
9142
  }
8857
9143
  return mismatches;
8858
9144
  }
9145
+ function trimSeedToExpectedCounts(seed, mismatches) {
9146
+ let totalTrimmed = 0;
9147
+ for (const m of mismatches) {
9148
+ if (m.actual <= m.expected) continue;
9149
+ if (!m.collectionKey || !seed[m.collectionKey]) continue;
9150
+ const collection = seed[m.collectionKey];
9151
+ if (collection.length > m.expected) {
9152
+ const trimmed = collection.length - m.expected;
9153
+ seed[m.collectionKey] = collection.slice(0, m.expected);
9154
+ totalTrimmed += trimmed;
9155
+ }
9156
+ }
9157
+ return totalTrimmed;
9158
+ }
8859
9159
 
8860
9160
  // src/runner/seed-cache.ts
8861
- var CACHE_VERSION = 3;
9161
+ var CACHE_VERSION = 4;
8862
9162
  var NEGATIVE_CACHE_VERSION = 2;
8863
9163
  var NEGATIVE_PREFIX = "neg-";
8864
9164
  var CACHE_DIR = join7(homedir2(), ".archal", "seed-cache");
@@ -9110,7 +9410,7 @@ ${setupText}
9110
9410
  Extract the seed blueprint as JSON.`;
9111
9411
  try {
9112
9412
  const provider = detectProvider(config.model);
9113
- const apiKey = resolveProviderApiKey(config.apiKey, provider);
9413
+ const apiKey = config.providerMode === "archal" ? "" : resolveProviderApiKey(config.apiKey ?? "", provider);
9114
9414
  const responseText = await callLlm({
9115
9415
  provider,
9116
9416
  model: config.model,
@@ -9129,12 +9429,26 @@ Extract the seed blueprint as JSON.`;
9129
9429
  }
9130
9430
  const parsed = parseBlueprint(responseText, twinName);
9131
9431
  if (!parsed) return null;
9432
+ const validCollections = new Set(availableCollections);
9433
+ parsed.collections = parsed.collections.filter((col) => {
9434
+ if (validCollections.has(col.name)) return true;
9435
+ warn(`Blueprint references unknown collection "${col.name}" for ${twinName} \u2014 dropping`);
9436
+ return false;
9437
+ });
9132
9438
  for (const col of parsed.collections) {
9133
9439
  const groupSum = col.groups.reduce((sum, g) => sum + g.count, 0);
9134
9440
  if (groupSum !== col.totalCount) {
9135
9441
  debug(`Blueprint group count mismatch for ${col.name}: groups sum to ${groupSum}, totalCount is ${col.totalCount}. Adjusting.`);
9136
9442
  col.totalCount = groupSum;
9137
9443
  }
9444
+ if (col.totalCount === 0) {
9445
+ debug(`Blueprint collection ${col.name} has 0 entities \u2014 dropping`);
9446
+ }
9447
+ }
9448
+ parsed.collections = parsed.collections.filter((col) => col.totalCount > 0);
9449
+ if (parsed.collections.length === 0 && parsed.identities.length === 0) {
9450
+ warn("Blueprint extracted no valid collections or identities");
9451
+ return null;
9138
9452
  }
9139
9453
  return parsed;
9140
9454
  } catch (err) {
@@ -9356,7 +9670,13 @@ function buildSeedFromBlueprint(blueprint, baseSeed) {
9356
9670
  for (const identity of blueprint.identities) {
9357
9671
  processIdentity(identity, seed, warnings);
9358
9672
  }
9673
+ const baseCollections = new Set(Object.keys(baseSeed));
9359
9674
  for (const spec of blueprint.collections) {
9675
+ if (!baseCollections.has(spec.name) && !seed[spec.name]) {
9676
+ warnings.push(`Blueprint references unknown collection "${spec.name}" \u2014 skipping`);
9677
+ warn(`Blueprint references unknown collection "${spec.name}" for ${blueprint.twin} twin \u2014 skipping`);
9678
+ continue;
9679
+ }
9360
9680
  processCollection(spec, seed, blueprint.twin, existingLabels, warnings, now);
9361
9681
  }
9362
9682
  return { seed, warnings };
@@ -9612,9 +9932,16 @@ function buildSlackEntity(collection, id, props, seed, index, temporal, contentH
9612
9932
  }
9613
9933
  case "messages": {
9614
9934
  const channels = seed["channels"] ?? [];
9615
- const channelId = channels.length > 0 ? String(channels[index % channels.length]["channel_id"] ?? "C0001AAAA") : "C0001AAAA";
9935
+ const targetChannel = channels.length > 0 ? channels[index % channels.length] : null;
9936
+ const channelId = targetChannel ? String(targetChannel["channel_id"] ?? "C0001AAAA") : "C0001AAAA";
9937
+ const channelMembers = targetChannel ? targetChannel["members"] ?? [] : [];
9616
9938
  const users = seed["users"] ?? [];
9617
- const userId = users.length > 0 ? String(users[index % users.length]["user_id"] ?? "U0001AAAA") : "U0001AAAA";
9939
+ let userId;
9940
+ if (channelMembers.length > 0) {
9941
+ userId = channelMembers[index % channelMembers.length];
9942
+ } else {
9943
+ userId = users.length > 0 ? String(users[index % users.length]["user_id"] ?? "U0001AAAA") : "U0001AAAA";
9944
+ }
9618
9945
  const baseTs = Math.floor(new Date(temporal.createdAt).getTime() / 1e3);
9619
9946
  const ts = generateSlackTs(baseTs, index);
9620
9947
  return {
@@ -10303,9 +10630,19 @@ function extractHybridPatch(obj) {
10303
10630
  }
10304
10631
  return null;
10305
10632
  }
10306
- function buildSeedCacheContext(twinName, intent, context) {
10633
+ function hashText(text) {
10634
+ return createHash4("sha256").update(text).digest("hex").slice(0, 16);
10635
+ }
10636
+ function buildSeedCacheContext(twinName, config, intent, context) {
10307
10637
  return {
10308
10638
  twinName,
10639
+ generator: {
10640
+ model: config.model,
10641
+ providerMode: config.providerMode ?? "direct",
10642
+ baseUrl: config.baseUrl ?? null,
10643
+ systemPromptHash: hashText(SYSTEM_PROMPT2),
10644
+ promptTemplateVersion: 2
10645
+ },
10309
10646
  intent: intent ?? null,
10310
10647
  scenario: context ?? null
10311
10648
  };
@@ -10660,10 +10997,13 @@ async function tryBlueprintPath(twinName, baseSeedData, setupDescription, availa
10660
10997
  finalSeed = autoFillMissingFKs(finalSeed, twinName);
10661
10998
  const relValidation = validateSeedRelationships(finalSeed, twinName);
10662
10999
  if (!relValidation.valid) {
10663
- warn("Blueprint seed failed relationship validation", {
10664
- errors: relValidation.errors.slice(0, 5).join("; ")
10665
- });
10666
- return null;
11000
+ finalSeed = autoFillMissingFKs(finalSeed, twinName);
11001
+ const secondValidation = validateSeedRelationships(finalSeed, twinName);
11002
+ if (!secondValidation.valid) {
11003
+ warn("Blueprint seed has unresolved FK references (continuing anyway)", {
11004
+ errors: secondValidation.errors.slice(0, 5).join("; ")
11005
+ });
11006
+ }
10667
11007
  }
10668
11008
  if (intent) {
10669
11009
  const coverage = validateSeedCoverage(intent, finalSeed);
@@ -10678,9 +11018,16 @@ async function tryBlueprintPath(twinName, baseSeedData, setupDescription, availa
10678
11018
  flatForVerify[twinName] = finalSeed;
10679
11019
  const countMismatches = verifySeedCounts(setupDescription, flatForVerify);
10680
11020
  if (countMismatches.length > 0) {
10681
- debug("Blueprint seed has count mismatches (acceptable)", {
10682
- mismatches: countMismatches.map((m) => `${m.subject}: ${m.expected} vs ${m.actual}`).join("; ")
10683
- });
11021
+ const trimmed = trimSeedToExpectedCounts(finalSeed, countMismatches);
11022
+ if (trimmed > 0) {
11023
+ debug(`Blueprint seed: trimmed ${trimmed} excess entities to match setup counts`);
11024
+ }
11025
+ const remaining = countMismatches.filter((m) => m.actual > m.expected && !m.collectionKey);
11026
+ if (remaining.length > 0) {
11027
+ debug("Blueprint seed has unresolvable count mismatches", {
11028
+ mismatches: remaining.map((m) => `${m.subject}: ${m.expected} vs ${m.actual}`).join("; ")
11029
+ });
11030
+ }
10684
11031
  }
10685
11032
  const syntheticPatch = {
10686
11033
  add: {}
@@ -10710,7 +11057,7 @@ async function tryBlueprintPath(twinName, baseSeedData, setupDescription, availa
10710
11057
  async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDescription, config, intent, context) {
10711
11058
  const cacheScope = {
10712
11059
  baseSeedData,
10713
- cacheContext: buildSeedCacheContext(twinName, intent, context)
11060
+ cacheContext: buildSeedCacheContext(twinName, config, intent, context)
10714
11061
  };
10715
11062
  if (!config.noCache) {
10716
11063
  const cached = getCachedSeed(twinName, baseSeedName, setupDescription, cacheScope);
@@ -10741,7 +11088,7 @@ async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDe
10741
11088
  if (blueprintResult) {
10742
11089
  info("Dynamic seed generated via blueprint", { twin: twinName });
10743
11090
  if (!config.noCache) {
10744
- const cacheContext = buildSeedCacheContext(twinName, intent, context);
11091
+ const cacheContext = buildSeedCacheContext(twinName, config, intent, context);
10745
11092
  cacheSeed(twinName, baseSeedName, setupDescription, blueprintResult.seed, blueprintResult.patch, {
10746
11093
  baseSeedData,
10747
11094
  cacheContext
@@ -10787,7 +11134,7 @@ Fix these issues:
10787
11134
  validationAttempt: String(validationAttempts + 1)
10788
11135
  });
10789
11136
  const provider = detectProvider(config.model);
10790
- const apiKey = resolveProviderApiKey(config.apiKey, provider);
11137
+ const apiKey = effectiveMode === "archal" ? "" : resolveProviderApiKey(config.apiKey, provider);
10791
11138
  const responseText = await callLlm({
10792
11139
  provider,
10793
11140
  model: config.model,
@@ -10796,7 +11143,7 @@ Fix these issues:
10796
11143
  userPrompt: promptWithFeedback,
10797
11144
  maxTokens: 16384,
10798
11145
  baseUrl: config.baseUrl,
10799
- providerMode: config.providerMode,
11146
+ providerMode: effectiveMode,
10800
11147
  intent: "seed-generate",
10801
11148
  responseFormat: "json"
10802
11149
  });
@@ -10872,14 +11219,19 @@ Fix these issues:
10872
11219
  const relationshipValidation = validateSeedRelationships(mergedSeed, twinName);
10873
11220
  if (!relationshipValidation.valid) {
10874
11221
  const topErrors = relationshipValidation.errors.slice(0, 10);
10875
- warn(`Dynamic seed relationship validation failed (attempt ${attempt + 1})`, {
11222
+ if (validationAttempts < MAX_ATTEMPTS - 1) {
11223
+ warn(`Dynamic seed relationship validation failed (attempt ${attempt + 1})`, {
11224
+ errors: topErrors.join("; ")
11225
+ });
11226
+ lastErrors = topErrors;
11227
+ patch = null;
11228
+ mergedSeed = null;
11229
+ validationAttempts++;
11230
+ continue;
11231
+ }
11232
+ warn(`Dynamic seed has unresolved FK references (accepting on final attempt)`, {
10876
11233
  errors: topErrors.join("; ")
10877
11234
  });
10878
- lastErrors = topErrors;
10879
- patch = null;
10880
- mergedSeed = null;
10881
- validationAttempts++;
10882
- continue;
10883
11235
  }
10884
11236
  if (intent) {
10885
11237
  debug("Seed intent coverage summary", {
@@ -10938,6 +11290,15 @@ Fix these issues:
10938
11290
  }
10939
11291
  mergedSeed = autoFillMissingFKs(mergedSeed, twinName);
10940
11292
  mergedSeed = ensureSlackScenarioChannelAccess(mergedSeed, intent);
11293
+ if (setupDescription) {
11294
+ const flatForTrim = {};
11295
+ flatForTrim[twinName] = mergedSeed;
11296
+ const finalMismatches = verifySeedCounts(setupDescription, flatForTrim);
11297
+ const trimmed = trimSeedToExpectedCounts(mergedSeed, finalMismatches);
11298
+ if (trimmed > 0) {
11299
+ debug(`Trimmed ${trimmed} excess seed entities to match setup counts`);
11300
+ }
11301
+ }
10941
11302
  if (!config.noCache) {
10942
11303
  cacheSeed(twinName, baseSeedName, setupDescription, mergedSeed, patch, cacheScope);
10943
11304
  }
@@ -11085,10 +11446,23 @@ function githubIntent(setup) {
11085
11446
  entities.push({ kind: "repo", key: "fullName", value: fullName });
11086
11447
  }
11087
11448
  if (!primaryRepoSet) {
11088
- const orgMatch = setup.match(/\bgithub\s+(?:organization|org)\s+"([a-z][a-z0-9._-]*)"/i);
11449
+ const orgMatch = setup.match(
11450
+ /\b(?:github\s+)?(?:organization|org)\s+(?:named\s+)?["']?([a-z][a-z0-9._-]*)["']?/i
11451
+ );
11089
11452
  if (orgMatch?.[1]) {
11090
- extractedSlots["repo.owner"] = orgMatch[1];
11091
- entities.push({ kind: "repo", key: "owner", value: orgMatch[1] });
11453
+ extractedSlots["repo.owner"] = orgMatch[1].toLowerCase();
11454
+ entities.push({ kind: "repo", key: "owner", value: orgMatch[1].toLowerCase() });
11455
+ const repoName = setup.match(/\b(?:repository|repo)\s+(?:named\s+)?["']?([a-z][a-z0-9._-]{1,99})["']?/i)?.[1];
11456
+ if (repoName) {
11457
+ const normalizedName = repoName.toLowerCase();
11458
+ extractedSlots["repo.name"] = normalizedName;
11459
+ entities.push({ kind: "repo", key: "name", value: normalizedName });
11460
+ entities.push({
11461
+ kind: "repo",
11462
+ key: "fullName",
11463
+ value: `${String(extractedSlots["repo.owner"])}/${normalizedName}`
11464
+ });
11465
+ }
11092
11466
  } else {
11093
11467
  missingSlots.push({
11094
11468
  slot: "repo.owner/repo.name",
@@ -11279,6 +11653,18 @@ function stripeIntent(setup) {
11279
11653
  });
11280
11654
  }
11281
11655
  }
11656
+ const idRegex = /\b((?:acct|cus|prod|price|pi|ch|re|in|sub|dp|pm|payout|tr|tok|evt)_[a-zA-Z0-9]+)\b/g;
11657
+ const seenIds = /* @__PURE__ */ new Set();
11658
+ let idMatch;
11659
+ while ((idMatch = idRegex.exec(setup)) !== null) {
11660
+ const id = idMatch[1];
11661
+ if (seenIds.has(id)) continue;
11662
+ seenIds.add(id);
11663
+ entities.push({ kind: "stripe_entity", key: "id", value: id });
11664
+ if (!extractedSlots["stripe.primary_id"]) {
11665
+ extractedSlots["stripe.primary_id"] = id;
11666
+ }
11667
+ }
11282
11668
  if (missingSlots.length > 0) {
11283
11669
  return { intent: null, missingSlots };
11284
11670
  }
@@ -11372,6 +11758,30 @@ function jiraIntent(setup) {
11372
11758
  }
11373
11759
  entities.push({ kind: "ticket", key: "key", value: key });
11374
11760
  }
11761
+ const seenProjects = /* @__PURE__ */ new Set();
11762
+ const addProject = (projectKey) => {
11763
+ const normalized = projectKey.toUpperCase();
11764
+ if (!/^[A-Z][A-Z0-9]{1,9}$/.test(normalized)) return;
11765
+ if (seenProjects.has(normalized)) return;
11766
+ seenProjects.add(normalized);
11767
+ entities.push({ kind: "project", key: "key", value: normalized });
11768
+ if (!extractedSlots["project.key"]) {
11769
+ extractedSlots["project.key"] = normalized;
11770
+ }
11771
+ };
11772
+ for (const key of seenKeys) {
11773
+ addProject(key.split("-", 1)[0] ?? "");
11774
+ }
11775
+ const projectRegexes = [
11776
+ /\b(?:jira\s+)?project\s+(?:key\s*)?[:=]?\s*["']?([A-Z][A-Z0-9]{1,9})["']?/gi,
11777
+ /\bproject\s+["'][^"'\n]+["']\s*\(\s*([A-Z][A-Z0-9]{1,9})\s*\)/gi
11778
+ ];
11779
+ for (const regex of projectRegexes) {
11780
+ let projectMatch;
11781
+ while ((projectMatch = regex.exec(setup)) !== null) {
11782
+ addProject(projectMatch[1] ?? "");
11783
+ }
11784
+ }
11375
11785
  return {
11376
11786
  intent: {
11377
11787
  twinName: "jira",
@@ -11386,6 +11796,7 @@ function jiraIntent(setup) {
11386
11796
  }
11387
11797
  function supabaseIntent(setup) {
11388
11798
  const extractedSlots = {};
11799
+ const entities = [];
11389
11800
  const missingSlots = [];
11390
11801
  const requiredSlots = ["database.target"];
11391
11802
  const seenTables = /* @__PURE__ */ new Set();
@@ -11418,6 +11829,9 @@ function supabaseIntent(setup) {
11418
11829
  const hasEnvVarTokens = /\b[A-Z][A-Z0-9_]{2,}\b/.test(setup);
11419
11830
  if (seenTables.size > 0 || mentionsProject || mentionsLogsOrService || mentionsEnvVars && hasEnvVarTokens) {
11420
11831
  extractedSlots["database.target"] = true;
11832
+ for (const table2 of seenTables) {
11833
+ entities.push({ kind: "table", key: "name", value: table2 });
11834
+ }
11421
11835
  } else {
11422
11836
  missingSlots.push({
11423
11837
  slot: "database.target",
@@ -11434,10 +11848,7 @@ function supabaseIntent(setup) {
11434
11848
  setupSummary: setupSummary(setup),
11435
11849
  requiredSlots,
11436
11850
  extractedSlots,
11437
- // Supabase table names in setup can describe conceptual data sources
11438
- // that are not materialized in the base SQL schema. Keep intent broad
11439
- // to avoid false-hard failures in seed generation.
11440
- entities: [],
11851
+ entities,
11441
11852
  quotedStrings: []
11442
11853
  },
11443
11854
  missingSlots: []
@@ -11897,11 +12308,21 @@ function parseSqlSeed(sql) {
11897
12308
  function loadSeedStateFromPath(seedRoot, seedName) {
11898
12309
  const jsonPath = resolve4(seedRoot, `${seedName}.json`);
11899
12310
  if (existsSync10(jsonPath)) {
11900
- return JSON.parse(readFileSync12(jsonPath, "utf-8"));
12311
+ try {
12312
+ return JSON.parse(readFileSync12(jsonPath, "utf-8"));
12313
+ } catch (err) {
12314
+ const detail = err instanceof Error ? err.message : String(err);
12315
+ throw new Error(`Failed to parse seed file ${jsonPath}: ${detail}`);
12316
+ }
11901
12317
  }
11902
12318
  const sqlPath = resolve4(seedRoot, `${seedName}.sql`);
11903
12319
  if (existsSync10(sqlPath)) {
11904
- return parseSqlSeed(readFileSync12(sqlPath, "utf-8"));
12320
+ try {
12321
+ return parseSqlSeed(readFileSync12(sqlPath, "utf-8"));
12322
+ } catch (err) {
12323
+ const detail = err instanceof Error ? err.message : String(err);
12324
+ throw new Error(`Failed to parse seed file ${sqlPath}: ${detail}`);
12325
+ }
11905
12326
  }
11906
12327
  return null;
11907
12328
  }
@@ -11951,12 +12372,24 @@ function loadBaseSeedFromDisk(twinName, seedName) {
11951
12372
  }
11952
12373
  function categorizeRunError(message) {
11953
12374
  if (/Failed to spawn|ENOENT/.test(message)) {
11954
- return `Agent not found: ${message}. Check that your agent command is installed and in PATH.`;
12375
+ return {
12376
+ message: `Agent not found: ${message}. Check that your agent command is installed and in PATH.`,
12377
+ outcome: "failed_agent"
12378
+ };
12379
+ }
12380
+ if (/Dynamic seed generation failed|Missing dynamic seed state|seed generation|seed setup/i.test(message)) {
12381
+ return {
12382
+ message: `Seed generation error: ${message}`,
12383
+ outcome: "inconclusive_seed"
12384
+ };
11955
12385
  }
11956
12386
  if (/HTTP [45]\d\d|ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|cloud session|fetch failed|AbortError|TimeoutError|operation was aborted|timed?\s*out/i.test(message)) {
11957
- return `Infrastructure error: ${message}. Check your network or try again.`;
12387
+ return {
12388
+ message: `Infrastructure error: ${message}. Check your network or try again.`,
12389
+ outcome: "inconclusive_infrastructure"
12390
+ };
11958
12391
  }
11959
- return message;
12392
+ return { message, outcome: "failed_agent" };
11960
12393
  }
11961
12394
  async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, apiEngine, localEngine, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls, hostedSessionId, apiBearerToken, adminAuth) {
11962
12395
  const startTime = Date.now();
@@ -12094,7 +12527,8 @@ ${baseTaskMessage}` : baseTaskMessage;
12094
12527
  stateDiff: diff,
12095
12528
  agentLog: agentResult.stderr || void 0,
12096
12529
  agentTrace: agentResult.agentTrace,
12097
- tokenUsage
12530
+ tokenUsage,
12531
+ outcome: "failed_agent"
12098
12532
  };
12099
12533
  }
12100
12534
  if (agentResult.exitCode !== 0 && agentResult.exitCode !== null) {
@@ -12133,11 +12567,14 @@ ${baseTaskMessage}` : baseTaskMessage;
12133
12567
  stateDiff: diff,
12134
12568
  agentLog: agentResult.stderr || void 0,
12135
12569
  agentTrace: agentResult.agentTrace,
12136
- tokenUsage
12570
+ tokenUsage,
12571
+ outcome: "failed_agent"
12137
12572
  };
12138
12573
  }
12139
12574
  if (trace.length === 0) {
12140
- warn(`Agent made no tool calls on run ${runIndex + 1}. The agent may have failed to act \u2014 check agent logs and task prompt.`);
12575
+ warn(
12576
+ `Agent made no tool calls on run ${runIndex + 1}. This usually means the model is too weak for this scenario. Try a more capable model (e.g. --engine-model claude-sonnet-4-6 or --engine-model gemini-2.5-pro). If using a custom agent, check that it correctly processes tool schemas and calls tools.`
12577
+ );
12141
12578
  }
12142
12579
  progress(`Evaluating run ${runIndex + 1}...`);
12143
12580
  const evaluationResult = await evaluateRun(
@@ -12163,12 +12600,13 @@ ${baseTaskMessage}` : baseTaskMessage;
12163
12600
  stateDiff: diff,
12164
12601
  agentLog: agentResult.stderr || void 0,
12165
12602
  agentTrace: agentResult.agentTrace,
12166
- tokenUsage
12603
+ tokenUsage,
12604
+ outcome: "completed"
12167
12605
  };
12168
12606
  } catch (err) {
12169
12607
  const message = err instanceof Error ? err.message : String(err);
12170
12608
  const categorized = categorizeRunError(message);
12171
- error(`Run ${runIndex + 1} failed: ${categorized}`);
12609
+ error(`Run ${runIndex + 1} failed: ${categorized.message}`);
12172
12610
  const durationMs = Date.now() - startTime;
12173
12611
  return {
12174
12612
  runIndex,
@@ -12176,12 +12614,13 @@ ${baseTaskMessage}` : baseTaskMessage;
12176
12614
  criterionId: c.id,
12177
12615
  status: "fail",
12178
12616
  confidence: 1,
12179
- explanation: `Run failed: ${categorized}`
12617
+ explanation: `Run failed: ${categorized.message}`
12180
12618
  })),
12181
12619
  overallScore: 0,
12182
12620
  trace: [],
12183
12621
  durationMs,
12184
- error: categorized,
12622
+ error: categorized.message,
12623
+ outcome: categorized.outcome,
12185
12624
  stateBefore: beforeState,
12186
12625
  stateAfter: beforeState,
12187
12626
  stateDiff: { added: {}, modified: {}, removed: {} }
@@ -12258,9 +12697,20 @@ function preflightCheck(scenario, apiKey, model, baseUrl, evaluatorProvider, see
12258
12697
  }
12259
12698
  }
12260
12699
  if (seedModel) {
12700
+ const mode = seedProviderMode ?? "auto";
12701
+ const provider = detectProvider(seedModel);
12702
+ const resolvedKey = resolveProviderApiKey(apiKey, provider);
12261
12703
  const creds = getCredentials();
12262
12704
  const hasArchalAuth = Boolean(creds?.token);
12263
- if (!hasArchalAuth) {
12705
+ if (provider === "openai-compatible" && !baseUrl && mode === "direct") {
12706
+ errors.push({
12707
+ check: "seed.baseUrl",
12708
+ message: `Seed model "${seedModel}" requires a base URL for the OpenAI-compatible endpoint`,
12709
+ detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>",
12710
+ warning: true
12711
+ });
12712
+ }
12713
+ if (mode === "archal" && !hasArchalAuth) {
12264
12714
  errors.push({
12265
12715
  check: "archal-auth-seed",
12266
12716
  message: "Dynamic seed generation requires Archal authentication",
@@ -12268,6 +12718,32 @@ function preflightCheck(scenario, apiKey, model, baseUrl, evaluatorProvider, see
12268
12718
  warning: true
12269
12719
  });
12270
12720
  }
12721
+ if (mode === "direct" && !resolvedKey) {
12722
+ errors.push({
12723
+ check: getProviderEnvVar(provider),
12724
+ message: `Seed provider is "direct" but no API key is configured for ${provider}`,
12725
+ detail: `Set via: export ${getProviderEnvVar(provider)}=<your-key> or archal config set evaluator.apiKey <key>`,
12726
+ warning: true
12727
+ });
12728
+ }
12729
+ if (mode === "auto" && !resolvedKey && !hasArchalAuth) {
12730
+ errors.push({
12731
+ check: getProviderEnvVar(provider),
12732
+ message: 'Dynamic seed generation has no available provider in "auto" mode',
12733
+ detail: `Set ${getProviderEnvVar(provider)} (or evaluator.apiKey) for direct mode, or run archal login for Archal backend mode`,
12734
+ warning: true
12735
+ });
12736
+ }
12737
+ if (resolvedKey && (mode === "direct" || mode === "auto")) {
12738
+ const mismatch = validateKeyForProvider(resolvedKey, provider);
12739
+ if (mismatch) {
12740
+ errors.push({
12741
+ check: "seed-key-provider-mismatch",
12742
+ message: mismatch,
12743
+ warning: true
12744
+ });
12745
+ }
12746
+ }
12271
12747
  }
12272
12748
  return errors;
12273
12749
  }
@@ -12316,6 +12792,35 @@ async function runScenario(options) {
12316
12792
  'cloudTwinUrls is required. Local twin execution has been removed; use "archal run" to provision a hosted session.'
12317
12793
  );
12318
12794
  }
12795
+ const criterionDescriptions = {};
12796
+ const criterionTypes = {};
12797
+ for (const c of scenario.successCriteria) {
12798
+ criterionDescriptions[c.id] = c.description;
12799
+ criterionTypes[c.id] = c.type;
12800
+ }
12801
+ const buildInconclusiveSeedReport = (message) => ({
12802
+ scenarioTitle: scenario.title,
12803
+ satisfactionScore: 0,
12804
+ criterionDescriptions,
12805
+ criterionTypes,
12806
+ twinNames: scenario.config.twins,
12807
+ runs: [{
12808
+ runIndex: 0,
12809
+ evaluations: scenario.successCriteria.map((criterion) => ({
12810
+ criterionId: criterion.id,
12811
+ status: "fail",
12812
+ confidence: 1,
12813
+ explanation: `Run not scored due to seed setup failure: ${message}`
12814
+ })),
12815
+ overallScore: 0,
12816
+ trace: [],
12817
+ durationMs: 0,
12818
+ error: message,
12819
+ outcome: "inconclusive_seed"
12820
+ }],
12821
+ summary: `Inconclusive (seed setup): ${message}`,
12822
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
12823
+ });
12319
12824
  const preflightErrors = preflightCheck(
12320
12825
  scenario,
12321
12826
  config.apiKey,
@@ -12406,7 +12911,7 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12406
12911
  cacheContext: seedPromptContext
12407
12912
  });
12408
12913
  }
12409
- throw new Error(message);
12914
+ return buildInconclusiveSeedReport(message);
12410
12915
  }
12411
12916
  warn(message);
12412
12917
  generationTargets.push(sel);
@@ -12415,12 +12920,11 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12415
12920
  if (generationTargets.length > 0) {
12416
12921
  progress("Generating dynamic seeds from setup description...");
12417
12922
  const dynamicConfig = {
12418
- apiKey: "",
12419
- // Seed gen always routes through Archal backend
12923
+ apiKey: config.apiKey,
12420
12924
  model: config.seedModel,
12421
12925
  baseUrl: config.baseUrl,
12422
12926
  noCache: options.noSeedCache,
12423
- providerMode: "archal"
12927
+ providerMode: config.seedProvider
12424
12928
  };
12425
12929
  let cloudSeedSnapshotByTwin = null;
12426
12930
  const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
@@ -12438,20 +12942,28 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12438
12942
  baseSeedData = normalizeSeedState(cloudSeedSnapshotByTwin[sel.twinName]);
12439
12943
  }
12440
12944
  if (!baseSeedData || Object.keys(baseSeedData).length === 0) {
12441
- throw new Error(
12945
+ return buildInconclusiveSeedReport(
12442
12946
  `Could not load base seed "${sel.seedName}" for twin "${sel.twinName}" from disk. Ensure the seed file exists at twins/${sel.twinName}/seeds/${sel.seedName}.json or .sql, or that the hosted twin /state endpoint is reachable.`
12443
12947
  );
12444
12948
  }
12445
12949
  progress(`Generating dynamic seed for ${sel.twinName}...`);
12446
- const result = await generateDynamicSeed(
12447
- sel.twinName,
12448
- sel.seedName,
12449
- baseSeedData,
12450
- scenario.setup,
12451
- dynamicConfig,
12452
- extractedIntentByTwin.get(sel.twinName),
12453
- seedPromptContext
12454
- );
12950
+ let result;
12951
+ try {
12952
+ result = await generateDynamicSeed(
12953
+ sel.twinName,
12954
+ sel.seedName,
12955
+ baseSeedData,
12956
+ scenario.setup,
12957
+ dynamicConfig,
12958
+ extractedIntentByTwin.get(sel.twinName),
12959
+ seedPromptContext
12960
+ );
12961
+ } catch (error2) {
12962
+ const detail = error2 instanceof Error ? error2.message : String(error2);
12963
+ return buildInconclusiveSeedReport(
12964
+ `Dynamic seed generation failed for twin "${sel.twinName}": ${detail}`
12965
+ );
12966
+ }
12455
12967
  sel.seedData = result.seed;
12456
12968
  if (result.fromCache) {
12457
12969
  cachedSeedTwins.push(sel.twinName);
@@ -12467,15 +12979,21 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12467
12979
  }
12468
12980
  const missingDynamicSeeds = seedSelections.filter((sel) => !sel.seedData);
12469
12981
  if (missingDynamicSeeds.length > 0) {
12470
- throw new Error(
12982
+ return buildInconclusiveSeedReport(
12471
12983
  `Missing dynamic seed state for twin(s): ${missingDynamicSeeds.map((sel) => sel.twinName).join(", ")}`
12472
12984
  );
12473
12985
  }
12474
12986
  for (const sel of seedSelections) {
12475
12987
  const mismatches = verifySeedCounts(scenario.setup, sel.seedData);
12476
12988
  if (mismatches.length === 0) continue;
12989
+ const significantMismatches = mismatches.filter((m) => {
12990
+ const delta = Math.abs(m.expected - m.actual);
12991
+ const ratio = m.expected > 0 ? delta / m.expected : delta;
12992
+ return delta > 5 || ratio > 0.5;
12993
+ });
12994
+ if (significantMismatches.length === 0) continue;
12477
12995
  warn(
12478
- `Seed count mismatch for ${sel.twinName}: ${mismatches.map((m) => `${m.subject}: expected ${m.expected}, got ${m.actual}`).join("; ")}`
12996
+ `Seed count mismatch for ${sel.twinName}: ${significantMismatches.map((m) => `${m.subject}: expected ${m.expected}, got ${m.actual}`).join("; ")}`
12479
12997
  );
12480
12998
  }
12481
12999
  const scenarioDir = dirname2(resolve4(options.scenarioPath));
@@ -12656,8 +13174,8 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12656
13174
  return {
12657
13175
  scenarioTitle: scenario.title,
12658
13176
  satisfactionScore: 100,
12659
- criterionDescriptions: {},
12660
- criterionTypes: {},
13177
+ criterionDescriptions,
13178
+ criterionTypes,
12661
13179
  twinNames: scenario.config.twins,
12662
13180
  runs: [],
12663
13181
  summary: "Preflight checks passed",
@@ -12667,7 +13185,7 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12667
13185
  printHeader(scenario.title, seedSelections);
12668
13186
  const evaluatorProvider = detectProvider(model);
12669
13187
  const configProvider = detectProvider(config.model);
12670
- const evaluatorApiKey = options.model && evaluatorProvider !== configProvider ? resolveProviderApiKey("", evaluatorProvider) : resolveProviderApiKey(config.apiKey, evaluatorProvider);
13188
+ const evaluatorApiKey = config.evaluatorProvider === "archal" ? "" : options.model && evaluatorProvider !== configProvider ? resolveProviderApiKey("", evaluatorProvider) : resolveProviderApiKey(config.apiKey, evaluatorProvider);
12671
13189
  const evaluatorConfig = {
12672
13190
  apiKey: evaluatorApiKey,
12673
13191
  model,
@@ -12696,8 +13214,8 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12696
13214
  adminAuth
12697
13215
  );
12698
13216
  runs.push(result);
12699
- printRunProgress(i, numRuns, result.overallScore, result.error);
12700
- if (result.error) {
13217
+ printRunProgress(i, numRuns, result.overallScore, result.error, result.outcome);
13218
+ if (result.outcome === "inconclusive_infrastructure" || result.outcome === "inconclusive_seed") {
12701
13219
  consecutiveInfraErrors++;
12702
13220
  if (consecutiveInfraErrors >= EARLY_ABORT_THRESHOLD && i < numRuns - 1) {
12703
13221
  warn(`${consecutiveInfraErrors} consecutive run errors \u2014 aborting remaining ${numRuns - i - 1} run(s) to avoid wasting quota.`);
@@ -12707,19 +13225,17 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12707
13225
  consecutiveInfraErrors = 0;
12708
13226
  }
12709
13227
  }
12710
- const runScores = runs.map((r) => r.overallScore);
13228
+ const scoredRuns = runs.filter(
13229
+ (run) => run.outcome !== "inconclusive_infrastructure" && run.outcome !== "inconclusive_seed"
13230
+ );
13231
+ const runScores = scoredRuns.map((r) => r.overallScore);
12711
13232
  const satisfactionScore = aggregateSatisfaction(runScores);
12712
- const allEvaluations = runs.map((r) => r.evaluations);
12713
- const summary = generateSummary(allEvaluations, satisfactionScore);
12714
- const criterionDescriptions = {};
12715
- const criterionTypes = {};
12716
- for (const c of scenario.successCriteria) {
12717
- criterionDescriptions[c.id] = c.description;
12718
- criterionTypes[c.id] = c.type;
12719
- }
13233
+ const allEvaluations = scoredRuns.map((r) => r.evaluations);
13234
+ const inconclusiveRuns = runs.length - scoredRuns.length;
13235
+ const summary = scoredRuns.length > 0 ? generateSummary(allEvaluations, satisfactionScore) : `Inconclusive: no scored runs (${inconclusiveRuns} infrastructure/seed setup run failure${inconclusiveRuns === 1 ? "" : "s"}).`;
12720
13236
  let failureAnalysis;
12721
- if (satisfactionScore < 100 && runs.length > 0 && !options.noFailureAnalysis) {
12722
- const representativeRun = runs.reduce(
13237
+ if (satisfactionScore < 100 && scoredRuns.length > 0 && !options.noFailureAnalysis) {
13238
+ const representativeRun = scoredRuns.reduce(
12723
13239
  (worst, r) => r.overallScore < worst.overallScore ? r : worst
12724
13240
  );
12725
13241
  const failedCriteria = representativeRun.evaluations.filter((e) => e.status !== "pass").map((e) => ({
@@ -12742,7 +13258,9 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
12742
13258
  stateDiff: representativeRun.stateDiff ?? { added: {}, modified: {}, removed: {} },
12743
13259
  stateBefore: representativeRun.stateBefore ?? {},
12744
13260
  stateAfter: representativeRun.stateAfter ?? {},
12745
- satisfactionScore
13261
+ satisfactionScore,
13262
+ agentLog: representativeRun.agentLog,
13263
+ agentError: representativeRun.error
12746
13264
  },
12747
13265
  evaluatorConfig
12748
13266
  );
@@ -13521,7 +14039,21 @@ function createRunCommand() {
13521
14039
  }
13522
14040
  }
13523
14041
  if (!process.env["ARCHAL_ENGINE_API_KEY"] && userConfig.engineApiKey) {
13524
- process.env["ARCHAL_ENGINE_API_KEY"] = userConfig.engineApiKey;
14042
+ const configKey = userConfig.engineApiKey;
14043
+ const requestedModel = firstNonEmpty(
14044
+ opts.engineModel,
14045
+ process.env["ARCHAL_ENGINE_MODEL"],
14046
+ opts.model
14047
+ // -m also defaults the engine model for local harnesses
14048
+ );
14049
+ if (requestedModel) {
14050
+ const modelProvider = detectProvider(requestedModel);
14051
+ if (!validateKeyForProvider(configKey, modelProvider)) {
14052
+ process.env["ARCHAL_ENGINE_API_KEY"] = configKey;
14053
+ }
14054
+ } else {
14055
+ process.env["ARCHAL_ENGINE_API_KEY"] = configKey;
14056
+ }
13525
14057
  }
13526
14058
  }
13527
14059
  inferEngineModelFromEvaluatorModel(opts);
@@ -13572,8 +14104,17 @@ function createRunCommand() {
13572
14104
  }
13573
14105
  }
13574
14106
  if (engine.mode === "local" && !process.env["ARCHAL_ENGINE_API_KEY"]) {
14107
+ const requestedModel = firstNonEmpty(
14108
+ opts.engineModel,
14109
+ process.env["ARCHAL_ENGINE_MODEL"]
14110
+ );
14111
+ const provider = requestedModel ? detectProvider(requestedModel) : null;
14112
+ const providerHint = provider ? `
14113
+ Hint: You requested model "${requestedModel}" (${provider}) but no ${provider} API key is available.
14114
+ Set ${getProviderEnvVar(provider)} or pass --engine-key <${provider}-key>
14115
+ ` : "";
13575
14116
  process.stderr.write(
13576
- "Error: No API key found. The agent harness needs an API key to call the model.\nSet one of:\n GEMINI_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY env var\n archal config set engine.apiKey <key>\n ARCHAL_ENGINE_API_KEY env var\n"
14117
+ "Error: No API key found. The agent harness needs an API key to call the model.\nSet one of:\n GEMINI_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY env var\n archal config set engine.apiKey <key>\n ARCHAL_ENGINE_API_KEY env var\n" + providerHint
13577
14118
  );
13578
14119
  process.exit(2);
13579
14120
  }
@@ -13643,12 +14184,14 @@ function createRunCommand() {
13643
14184
  })();
13644
14185
  const SESSION_READY_TIMEOUT_MS = Math.max(12e4, configuredReadyTimeoutMs);
13645
14186
  const SESSION_POLL_INTERVAL_MS = 2e3;
13646
- const STATUS_READY_GRACE_MS = 5e3;
13647
14187
  const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
13648
14188
  let sessionReady = false;
13649
14189
  let lastPollIssue;
13650
- let statusReadySinceMs = null;
13651
14190
  const isRetryablePollFailure = (result) => result.offline || typeof result.status === "number" && result.status >= 500;
14191
+ const workersAllReady = (workers) => {
14192
+ if (!workers || Object.keys(workers).length === 0) return true;
14193
+ return Object.values(workers).every((value) => value === "ready");
14194
+ };
13652
14195
  const sleepForPollInterval = async () => new Promise((resolve12) => setTimeout(resolve12, SESSION_POLL_INTERVAL_MS));
13653
14196
  if (!opts.quiet) process.stderr.write("Starting cloud session...\n");
13654
14197
  let pollCount = 0;
@@ -13703,26 +14246,19 @@ function createRunCommand() {
13703
14246
  }
13704
14247
  const healthAlive = healthResult.ok && healthResult.data.alive;
13705
14248
  const statusAlive = statusResult.data.alive || status === "ready";
13706
- if (statusAlive && healthAlive) {
14249
+ const statusWorkersReady = workersAllReady(
14250
+ statusResult.data.twins ?? statusResult.data.workers
14251
+ );
14252
+ const healthWorkersReady = workersAllReady(healthResult.data.twins);
14253
+ if (statusAlive && healthAlive && statusWorkersReady && healthWorkersReady) {
13707
14254
  sessionReady = true;
13708
14255
  break;
13709
14256
  }
13710
- if (statusAlive && !healthAlive) {
13711
- if (statusReadySinceMs === null) {
13712
- statusReadySinceMs = Date.now();
13713
- }
13714
- const readyForMs = Date.now() - statusReadySinceMs;
13715
- if (readyForMs >= STATUS_READY_GRACE_MS) {
13716
- debug(
13717
- `Session ${backendSessionId} proceeded after health endpoint warmup (${readyForMs}ms).`
13718
- );
13719
- sessionReady = true;
13720
- break;
13721
- }
13722
- } else {
13723
- statusReadySinceMs = null;
13724
- }
13725
- lastPollIssue = `session still starting (status=${status}, health=${healthAlive ? "alive" : "starting"})`;
14257
+ const statusTwinStates = Object.entries(
14258
+ statusResult.data.twins ?? statusResult.data.workers ?? {}
14259
+ ).map(([twin, twinStatus]) => `${twin}:${twinStatus}`).join(", ");
14260
+ const healthTwinStates = Object.entries(healthResult.data.twins ?? {}).map(([twin, twinStatus]) => `${twin}:${twinStatus}`).join(", ");
14261
+ lastPollIssue = `session still starting (status=${status}, health=${healthAlive ? "alive" : "starting"}, statusTwins=[${statusTwinStates || "n/a"}], healthTwins=[${healthTwinStates || "n/a"}])`;
13726
14262
  await sleepForPollInterval();
13727
14263
  }
13728
14264
  if (sessionReady) {
@@ -14123,6 +14659,7 @@ function buildEvidenceArtifacts(report) {
14123
14659
  overallScore: run.overallScore,
14124
14660
  durationMs: run.durationMs,
14125
14661
  error: run.error ?? null,
14662
+ outcome: run.outcome ?? null,
14126
14663
  evaluations: (run.evaluations ?? []).map((ev) => ({
14127
14664
  criterionId: ev.criterionId,
14128
14665
  status: ev.status,
@@ -14442,7 +14979,7 @@ import { createInterface as createInterface2 } from "readline";
14442
14979
  import { Command as Command5 } from "commander";
14443
14980
 
14444
14981
  // src/telemetry/anonymizer.ts
14445
- import { createHash as createHash4 } from "crypto";
14982
+ import { createHash as createHash5 } from "crypto";
14446
14983
  var API_KEY_PATTERNS = [
14447
14984
  /(?:api[_-]?key|token|secret|password|authorization|bearer|credential)\s*[:=]\s*["']?([a-zA-Z0-9_\-/.+=]{16,})["']?/gi,
14448
14985
  /sk-[a-zA-Z0-9]{20,}/g,
@@ -14492,7 +15029,7 @@ var USERNAME_FIELDS = /* @__PURE__ */ new Set([
14492
15029
  "maintainer"
14493
15030
  ]);
14494
15031
  function hashValue2(value, salt = "archal") {
14495
- return `anon_${createHash4("sha256").update(`${salt}:${value}`).digest("hex").slice(0, 12)}`;
15032
+ return `anon_${createHash5("sha256").update(`${salt}:${value}`).digest("hex").slice(0, 12)}`;
14496
15033
  }
14497
15034
  function anonymizeForEnterprise(entries) {
14498
15035
  debug("Enterprise anonymization", { entryCount: String(entries.length) });
@@ -15468,7 +16005,7 @@ function createDoctorCommand() {
15468
16005
  // src/commands/login.ts
15469
16006
  import { Command as Command8 } from "commander";
15470
16007
  import { exec } from "child_process";
15471
- import { createHash as createHash5, randomBytes as randomBytes2 } from "crypto";
16008
+ import { createHash as createHash6, randomBytes as randomBytes2 } from "crypto";
15472
16009
  import { createServer } from "http";
15473
16010
  var START_PORT = 51423;
15474
16011
  var LOGIN_TIMEOUT_MS = 5 * 60 * 1e3;
@@ -15489,7 +16026,7 @@ function openBrowser(url) {
15489
16026
  }
15490
16027
  function createPkcePair() {
15491
16028
  const codeVerifier = randomBytes2(32).toString("base64url");
15492
- const codeChallenge = createHash5("sha256").update(codeVerifier).digest("base64url");
16029
+ const codeChallenge = createHash6("sha256").update(codeVerifier).digest("base64url");
15493
16030
  return { codeVerifier, codeChallenge };
15494
16031
  }
15495
16032
  function isPlan2(value) {
@@ -16219,11 +16756,25 @@ function detectProviderName(model) {
16219
16756
  if (normalized.startsWith("gpt-") || normalized.startsWith("o1-") || normalized.startsWith("o3-") || normalized.startsWith("o4-")) return "OpenAI";
16220
16757
  return "OpenAI-compatible";
16221
16758
  }
16222
- function resolveEngineApiKey(explicitKey) {
16759
+ function resolveEngineApiKey(explicitKey, model) {
16223
16760
  if (explicitKey?.trim()) return explicitKey.trim();
16224
16761
  if (process.env["ARCHAL_ENGINE_API_KEY"]?.trim()) return process.env["ARCHAL_ENGINE_API_KEY"].trim();
16762
+ const modelProvider = model ? detectProvider(model) : null;
16225
16763
  const config = loadConfig();
16226
- if (config.engineApiKey) return config.engineApiKey;
16764
+ if (config.engineApiKey) {
16765
+ if (!modelProvider || !validateKeyForProvider(config.engineApiKey, modelProvider)) {
16766
+ return config.engineApiKey;
16767
+ }
16768
+ }
16769
+ const providerEnvVars = {
16770
+ gemini: "GEMINI_API_KEY",
16771
+ openai: "OPENAI_API_KEY",
16772
+ anthropic: "ANTHROPIC_API_KEY"
16773
+ };
16774
+ if (modelProvider && providerEnvVars[modelProvider]) {
16775
+ const val = process.env[providerEnvVars[modelProvider]]?.trim();
16776
+ if (val) return val;
16777
+ }
16227
16778
  for (const envVar of ["GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY"]) {
16228
16779
  const val = process.env[envVar]?.trim();
16229
16780
  if (val) return val;
@@ -16272,7 +16823,7 @@ function createDemoCommand() {
16272
16823
  process.exit(1);
16273
16824
  }
16274
16825
  const providerName = detectProviderName(opts.model);
16275
- const engineApiKey = resolveEngineApiKey(opts.apiKey);
16826
+ const engineApiKey = resolveEngineApiKey(opts.apiKey, opts.model);
16276
16827
  if (!engineApiKey) {
16277
16828
  process.stderr.write(
16278
16829
  `Error: No API key found for model "${opts.model}" (${providerName}).