@archal/cli 0.7.9 → 0.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +697 -146
- package/harnesses/_lib/providers.mjs +29 -7
- package/harnesses/hardened/agent.mjs +42 -109
- package/harnesses/naive/agent.mjs +15 -3
- package/harnesses/react/agent.mjs +36 -10
- package/harnesses/zero-shot/agent.mjs +15 -3
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -233,6 +233,7 @@ function parseCriterionLine(line, index) {
|
|
|
233
233
|
} else {
|
|
234
234
|
type = inferCriterionType(description);
|
|
235
235
|
}
|
|
236
|
+
if (!description) return null;
|
|
236
237
|
return {
|
|
237
238
|
id: `criterion-${index + 1}`,
|
|
238
239
|
description,
|
|
@@ -333,7 +334,11 @@ ${expectedBehavior}`.toLowerCase();
|
|
|
333
334
|
github: ["github", "repository", "pull request", "create_issue", "create_pull_request", "merge_pull_request"],
|
|
334
335
|
slack: ["slack", "slack channel", "send_message", "slack message", "direct message"],
|
|
335
336
|
linear: ["linear", "linear ticket", "linear project", "linear cycle"],
|
|
336
|
-
jira: ["jira", "jira sprint", "jira epic", "jira board"]
|
|
337
|
+
jira: ["jira", "jira sprint", "jira epic", "jira board"],
|
|
338
|
+
stripe: ["stripe", "payment", "refund", "subscription", "invoice", "charge"],
|
|
339
|
+
supabase: ["supabase", "database", "sql query", "database table"],
|
|
340
|
+
"google-workspace": ["google workspace", "gmail", "google calendar", "google drive", "google docs"],
|
|
341
|
+
browser: ["browser", "web page", "navigate to", "click on", "web content"]
|
|
337
342
|
};
|
|
338
343
|
for (const [twin, keywords] of Object.entries(twinKeywords)) {
|
|
339
344
|
if (keywords.some((kw) => combined.includes(kw))) {
|
|
@@ -425,7 +430,9 @@ function validateScenario(scenario) {
|
|
|
425
430
|
}
|
|
426
431
|
}
|
|
427
432
|
if (scenario.config.twins.length === 0) {
|
|
428
|
-
errors.push(
|
|
433
|
+
errors.push(
|
|
434
|
+
'Scenario does not reference any known twins. Add a "## Config" section with "twins: github" (or slack, linear, jira, stripe, supabase, google-workspace, browser). Alternatively, mention the service name in ## Setup or ## Expected Behavior.'
|
|
435
|
+
);
|
|
429
436
|
}
|
|
430
437
|
if (scenario.config.timeout <= 0) {
|
|
431
438
|
errors.push("Timeout must be a positive number");
|
|
@@ -1107,6 +1114,8 @@ var HTTP_RETRYABLE_STATUS_CODES = /* @__PURE__ */ new Set([408, 425, 429, 500, 5
|
|
|
1107
1114
|
var HTTP_PUSH_TIMEOUT_MS = 2e4;
|
|
1108
1115
|
var HTTP_PUSH_MAX_RETRIES = 6;
|
|
1109
1116
|
var HTTP_PUSH_BACKOFF_MS = [1e3, 2e3, 3e3, 5e3, 5e3, 5e3];
|
|
1117
|
+
var HTTP_PUSH_WARMUP_RETRIES = 6;
|
|
1118
|
+
var HTTP_PUSH_WARMUP_BACKOFF_MS = [1500, 2500, 3500, 5e3, 6e3, 7e3];
|
|
1110
1119
|
function resolveRetryDelay(backoffMs, attempt, fallbackMs) {
|
|
1111
1120
|
const indexed = backoffMs[attempt];
|
|
1112
1121
|
if (typeof indexed === "number" && Number.isFinite(indexed) && indexed >= 0) {
|
|
@@ -1157,6 +1166,10 @@ async function fetchWithRetry(url, options, retryOptions) {
|
|
|
1157
1166
|
function twinBasePath(url) {
|
|
1158
1167
|
return url.replace(/\/(mcp|api)\/?$/, "");
|
|
1159
1168
|
}
|
|
1169
|
+
function isTwinWorkerWarmupResponse(status, body) {
|
|
1170
|
+
if (status !== 503) return false;
|
|
1171
|
+
return /twin worker endpoint not available|session is busy|retry shortly/i.test(body);
|
|
1172
|
+
}
|
|
1160
1173
|
async function collectStateFromHttp(twinUrls, bearerToken, adminAuth) {
|
|
1161
1174
|
const state = {};
|
|
1162
1175
|
const failures = [];
|
|
@@ -1201,25 +1214,44 @@ async function pushStateToCloud(twinUrls, seedSelections, bearerToken, adminAuth
|
|
|
1201
1214
|
}
|
|
1202
1215
|
const url = `${twinBasePath(baseUrl)}/state`;
|
|
1203
1216
|
debug(`Pushing dynamic seed to ${sel.twinName}`, { url });
|
|
1204
|
-
const
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1217
|
+
const payload = JSON.stringify(sel.seedData);
|
|
1218
|
+
let pushed = false;
|
|
1219
|
+
for (let warmupAttempt = 0; warmupAttempt <= HTTP_PUSH_WARMUP_RETRIES; warmupAttempt++) {
|
|
1220
|
+
const response = await fetchWithRetry(
|
|
1221
|
+
url,
|
|
1222
|
+
{
|
|
1223
|
+
method: "PUT",
|
|
1224
|
+
headers,
|
|
1225
|
+
body: payload
|
|
1226
|
+
},
|
|
1227
|
+
{
|
|
1228
|
+
retries: HTTP_PUSH_MAX_RETRIES,
|
|
1229
|
+
timeoutMs: HTTP_PUSH_TIMEOUT_MS,
|
|
1230
|
+
backoffMs: HTTP_PUSH_BACKOFF_MS
|
|
1231
|
+
}
|
|
1232
|
+
);
|
|
1233
|
+
if (response.ok) {
|
|
1234
|
+
pushed = true;
|
|
1235
|
+
break;
|
|
1215
1236
|
}
|
|
1216
|
-
);
|
|
1217
|
-
if (!response.ok) {
|
|
1218
1237
|
const text = await response.text().catch(() => "");
|
|
1238
|
+
const isWarmup = isTwinWorkerWarmupResponse(response.status, text);
|
|
1239
|
+
if (isWarmup && warmupAttempt < HTTP_PUSH_WARMUP_RETRIES) {
|
|
1240
|
+
const delay = resolveRetryDelay(HTTP_PUSH_WARMUP_BACKOFF_MS, warmupAttempt, 5e3);
|
|
1241
|
+
warn(
|
|
1242
|
+
`Twin "${sel.twinName}" not ready for state push (HTTP 503), retrying in ${delay}ms`,
|
|
1243
|
+
{ attempt: `${warmupAttempt + 1}/${HTTP_PUSH_WARMUP_RETRIES + 1}` }
|
|
1244
|
+
);
|
|
1245
|
+
await new Promise((resolve12) => setTimeout(resolve12, delay));
|
|
1246
|
+
continue;
|
|
1247
|
+
}
|
|
1219
1248
|
throw new Error(
|
|
1220
1249
|
`Failed to push dynamic seed to twin "${sel.twinName}": HTTP ${response.status}${text ? ` (${text})` : ""}`
|
|
1221
1250
|
);
|
|
1222
1251
|
}
|
|
1252
|
+
if (!pushed) {
|
|
1253
|
+
throw new Error(`Failed to push dynamic seed to twin "${sel.twinName}": worker warmup did not complete in time`);
|
|
1254
|
+
}
|
|
1223
1255
|
debug(`Pushed dynamic seed to ${sel.twinName} successfully`);
|
|
1224
1256
|
}
|
|
1225
1257
|
}
|
|
@@ -3072,7 +3104,7 @@ async function callLlmViaArchal(options) {
|
|
|
3072
3104
|
debug("Archal backend response", { model: actualModel, remaining: String(result.data.remaining ?? "unknown") });
|
|
3073
3105
|
const isSeedGen = options.intent === "seed-generate";
|
|
3074
3106
|
if (!modelMismatchWarned && !isSeedGen && options.model && actualModel && !actualModel.includes(options.model) && !options.model.includes(actualModel)) {
|
|
3075
|
-
|
|
3107
|
+
debug(`Archal backend used "${actualModel}" (requested "${options.model}"). To use a specific model, set provider to "direct" with your own API key.`);
|
|
3076
3108
|
modelMismatchWarned = true;
|
|
3077
3109
|
}
|
|
3078
3110
|
return result.data.text;
|
|
@@ -3195,6 +3227,47 @@ async function callAnthropic(options) {
|
|
|
3195
3227
|
if (!textBlock?.text) throw new Error("Anthropic returned no text content");
|
|
3196
3228
|
return textBlock.text;
|
|
3197
3229
|
}
|
|
3230
|
+
function extractOpenAiTextContent(data) {
|
|
3231
|
+
const message = data.choices?.[0]?.message;
|
|
3232
|
+
if (!message) return null;
|
|
3233
|
+
if (typeof message.content === "string") {
|
|
3234
|
+
const trimmed = message.content.trim();
|
|
3235
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
3236
|
+
}
|
|
3237
|
+
if (Array.isArray(message.content)) {
|
|
3238
|
+
const textSegments = [];
|
|
3239
|
+
for (const part of message.content) {
|
|
3240
|
+
if (typeof part === "string") {
|
|
3241
|
+
const trimmed = part.trim();
|
|
3242
|
+
if (trimmed.length > 0) textSegments.push(trimmed);
|
|
3243
|
+
continue;
|
|
3244
|
+
}
|
|
3245
|
+
if (!part || typeof part !== "object") continue;
|
|
3246
|
+
const partText = part.text;
|
|
3247
|
+
if (typeof partText === "string") {
|
|
3248
|
+
const trimmed = partText.trim();
|
|
3249
|
+
if (trimmed.length > 0) textSegments.push(trimmed);
|
|
3250
|
+
continue;
|
|
3251
|
+
}
|
|
3252
|
+
if (partText && typeof partText === "object" && typeof partText.value === "string") {
|
|
3253
|
+
const trimmed = partText.value.trim();
|
|
3254
|
+
if (trimmed.length > 0) textSegments.push(trimmed);
|
|
3255
|
+
continue;
|
|
3256
|
+
}
|
|
3257
|
+
if (typeof part.value === "string") {
|
|
3258
|
+
const trimmed = part.value.trim();
|
|
3259
|
+
if (trimmed.length > 0) textSegments.push(trimmed);
|
|
3260
|
+
}
|
|
3261
|
+
}
|
|
3262
|
+
if (textSegments.length > 0) {
|
|
3263
|
+
return textSegments.join("\n");
|
|
3264
|
+
}
|
|
3265
|
+
}
|
|
3266
|
+
if (typeof message.refusal === "string" && message.refusal.trim().length > 0) {
|
|
3267
|
+
return message.refusal.trim();
|
|
3268
|
+
}
|
|
3269
|
+
return null;
|
|
3270
|
+
}
|
|
3198
3271
|
function usesMaxCompletionTokens(model) {
|
|
3199
3272
|
return model.startsWith("gpt-5") || model.startsWith("o1-") || model.startsWith("o2-") || model.startsWith("o3-") || model.startsWith("o4-");
|
|
3200
3273
|
}
|
|
@@ -3222,7 +3295,7 @@ async function callOpenAi(options) {
|
|
|
3222
3295
|
throw new LlmApiError("OpenAI", response.status, errorText.slice(0, 200));
|
|
3223
3296
|
}
|
|
3224
3297
|
const data = await response.json();
|
|
3225
|
-
const content = data
|
|
3298
|
+
const content = extractOpenAiTextContent(data);
|
|
3226
3299
|
if (!content) throw new Error("OpenAI returned no content");
|
|
3227
3300
|
return content;
|
|
3228
3301
|
}
|
|
@@ -3256,7 +3329,7 @@ async function callOpenAiCompatible(options) {
|
|
|
3256
3329
|
throw new LlmApiError(`OpenAI-compatible (${options.baseUrl})`, response.status, errorText.slice(0, 200));
|
|
3257
3330
|
}
|
|
3258
3331
|
const data = await response.json();
|
|
3259
|
-
const content = data
|
|
3332
|
+
const content = extractOpenAiTextContent(data);
|
|
3260
3333
|
if (!content) throw new Error("OpenAI-compatible API returned no content");
|
|
3261
3334
|
return content;
|
|
3262
3335
|
}
|
|
@@ -3281,13 +3354,15 @@ ${CYAN}${BOLD}archal${RESET} ${DIM}|${RESET} ${scenarioTitle}
|
|
|
3281
3354
|
`);
|
|
3282
3355
|
}
|
|
3283
3356
|
}
|
|
3284
|
-
function printRunProgress(runIndex, totalRuns, score, error2) {
|
|
3357
|
+
function printRunProgress(runIndex, totalRuns, score, error2, outcome) {
|
|
3285
3358
|
const { quiet } = getLoggerOptions();
|
|
3286
3359
|
if (quiet || activeOutputFormat !== "terminal") return;
|
|
3287
3360
|
const dots = ".".repeat(Math.max(1, 20 - String(runIndex + 1).length - String(totalRuns).length));
|
|
3288
3361
|
if (error2) {
|
|
3289
3362
|
const shortError = error2.length > MAX_ERROR_PREVIEW_CHARS ? error2.slice(0, MAX_ERROR_PREVIEW_CHARS - 1) + "\u2026" : error2;
|
|
3290
|
-
|
|
3363
|
+
const inconclusive = outcome === "inconclusive_infrastructure" || outcome === "inconclusive_seed";
|
|
3364
|
+
const label = inconclusive ? `${YELLOW}INCONCLUSIVE${RESET}` : `${RED}ERROR${RESET}`;
|
|
3365
|
+
process.stderr.write(` run ${runIndex + 1}/${totalRuns} ${DIM}${dots}${RESET} ${label} ${DIM}(${shortError})${RESET}
|
|
3291
3366
|
`);
|
|
3292
3367
|
return;
|
|
3293
3368
|
}
|
|
@@ -4193,8 +4268,35 @@ function filterByPredicate(items, predicate) {
|
|
|
4193
4268
|
if (knownMatches.length > 0) {
|
|
4194
4269
|
return { items: knownMatches, recognized: true };
|
|
4195
4270
|
}
|
|
4271
|
+
const ACTION_VERBS = /* @__PURE__ */ new Set([
|
|
4272
|
+
"listed",
|
|
4273
|
+
"fetched",
|
|
4274
|
+
"retrieved",
|
|
4275
|
+
"found",
|
|
4276
|
+
"searched",
|
|
4277
|
+
"queried",
|
|
4278
|
+
"posted",
|
|
4279
|
+
"sent",
|
|
4280
|
+
"received",
|
|
4281
|
+
"notified",
|
|
4282
|
+
"alerted",
|
|
4283
|
+
"reviewed",
|
|
4284
|
+
"analyzed",
|
|
4285
|
+
"inspected",
|
|
4286
|
+
"checked",
|
|
4287
|
+
"verified",
|
|
4288
|
+
"triaged",
|
|
4289
|
+
"escalated",
|
|
4290
|
+
"assigned",
|
|
4291
|
+
"tagged",
|
|
4292
|
+
"labeled",
|
|
4293
|
+
"updated",
|
|
4294
|
+
"edited",
|
|
4295
|
+
"patched",
|
|
4296
|
+
"migrated"
|
|
4297
|
+
]);
|
|
4196
4298
|
const isSingleWord = !lowerPredicate.includes(" ");
|
|
4197
|
-
if (isSingleWord) {
|
|
4299
|
+
if (isSingleWord && !ACTION_VERBS.has(lowerPredicate)) {
|
|
4198
4300
|
const hasKnownField = items.some((item) => {
|
|
4199
4301
|
if (typeof item !== "object" || item === null) return false;
|
|
4200
4302
|
const obj = item;
|
|
@@ -5466,24 +5568,46 @@ ${JSON.stringify(context.stateDiff, null, 2)}
|
|
|
5466
5568
|
## Agent Trace Evidence
|
|
5467
5569
|
${traceEvidence}`;
|
|
5468
5570
|
}
|
|
5571
|
+
function estimateTokens(value) {
|
|
5572
|
+
const json = JSON.stringify(value);
|
|
5573
|
+
return Math.ceil(json.length / 4);
|
|
5574
|
+
}
|
|
5575
|
+
var MAX_STATE_TOKENS = 4e4;
|
|
5469
5576
|
function summarizeState(state) {
|
|
5470
5577
|
const flat = flattenTwinState(state);
|
|
5471
5578
|
const summary = {};
|
|
5472
5579
|
for (const [key, value] of Object.entries(flat)) {
|
|
5473
5580
|
if (Array.isArray(value)) {
|
|
5474
|
-
if (value.length <=
|
|
5581
|
+
if (value.length <= 50) {
|
|
5475
5582
|
summary[key] = value;
|
|
5476
5583
|
} else {
|
|
5477
5584
|
summary[key] = {
|
|
5478
5585
|
_count: value.length,
|
|
5479
|
-
|
|
5480
|
-
|
|
5586
|
+
_first10: value.slice(0, 10),
|
|
5587
|
+
_last10: value.slice(-10)
|
|
5481
5588
|
};
|
|
5482
5589
|
}
|
|
5483
5590
|
} else {
|
|
5484
5591
|
summary[key] = value;
|
|
5485
5592
|
}
|
|
5486
5593
|
}
|
|
5594
|
+
let totalTokens = estimateTokens(summary);
|
|
5595
|
+
if (totalTokens > MAX_STATE_TOKENS) {
|
|
5596
|
+
const collectionSizes = Object.entries(summary).map(([key, value]) => ({ key, tokens: estimateTokens(value) })).sort((a, b) => b.tokens - a.tokens);
|
|
5597
|
+
for (const { key } of collectionSizes) {
|
|
5598
|
+
if (totalTokens <= MAX_STATE_TOKENS) break;
|
|
5599
|
+
const value = summary[key];
|
|
5600
|
+
if (!Array.isArray(value)) continue;
|
|
5601
|
+
const before = estimateTokens(value);
|
|
5602
|
+
summary[key] = {
|
|
5603
|
+
_count: value.length,
|
|
5604
|
+
_first5: value.slice(0, 5),
|
|
5605
|
+
_last5: value.slice(-5),
|
|
5606
|
+
_truncated: "Collection too large for evaluation \u2014 showing subset"
|
|
5607
|
+
};
|
|
5608
|
+
totalTokens -= before - estimateTokens(summary[key]);
|
|
5609
|
+
}
|
|
5610
|
+
}
|
|
5487
5611
|
return summary;
|
|
5488
5612
|
}
|
|
5489
5613
|
function parseJudgeResponse(text) {
|
|
@@ -5583,6 +5707,15 @@ async function evaluateWithLlm(criterion, expectedBehavior, stateBefore, stateAf
|
|
|
5583
5707
|
};
|
|
5584
5708
|
}
|
|
5585
5709
|
const message = err instanceof Error ? err.message : String(err);
|
|
5710
|
+
if (err instanceof LlmApiError && err.status === 400 && message.includes("too long")) {
|
|
5711
|
+
warn(`LLM judge prompt too large for criterion "${criterion.id}" \u2014 twin state may be too large for evaluation`);
|
|
5712
|
+
return {
|
|
5713
|
+
criterionId: criterion.id,
|
|
5714
|
+
status: "fail",
|
|
5715
|
+
confidence: 0,
|
|
5716
|
+
explanation: "LLM evaluation skipped: prompt exceeded model context window. The scenario state is too large for probabilistic evaluation. Consider using deterministic [D] criteria for this scenario."
|
|
5717
|
+
};
|
|
5718
|
+
}
|
|
5586
5719
|
error(`LLM judge call failed: ${message}`);
|
|
5587
5720
|
return {
|
|
5588
5721
|
criterionId: criterion.id,
|
|
@@ -5809,6 +5942,17 @@ function buildFailureAnalysisPrompt(input) {
|
|
|
5809
5942
|
);
|
|
5810
5943
|
sections.push(`## Passed Criteria (${input.passedCriteria.length})`);
|
|
5811
5944
|
sections.push(input.passedCriteria.map((c) => `- ${sanitizeForPrompt(c.description, 300)}`).join("\n"));
|
|
5945
|
+
if (input.agentError || input.agentLog) {
|
|
5946
|
+
sections.push(`## Agent Execution Context`);
|
|
5947
|
+
if (input.agentError) {
|
|
5948
|
+
sections.push(`Error: ${sanitizeForPrompt(input.agentError, 300)}`);
|
|
5949
|
+
}
|
|
5950
|
+
if (input.agentLog) {
|
|
5951
|
+
const logTail = input.agentLog.length > 800 ? input.agentLog.slice(-800) : input.agentLog;
|
|
5952
|
+
sections.push(`Agent log (tail):
|
|
5953
|
+
${sanitizeForPrompt(logTail, 800)}`);
|
|
5954
|
+
}
|
|
5955
|
+
}
|
|
5812
5956
|
sections.push(`## Agent Trace (${input.trace.length} tool calls)`);
|
|
5813
5957
|
sections.push(
|
|
5814
5958
|
input.trace.length === 0 ? "(Agent made no tool calls - likely crashed or timed out)" : JSON.stringify(traceFormatted, null, 2)
|
|
@@ -6552,7 +6696,7 @@ function resolveTelemetryEndpointFromEnv() {
|
|
|
6552
6696
|
if (!fallbackBaseUrl) {
|
|
6553
6697
|
return null;
|
|
6554
6698
|
}
|
|
6555
|
-
return `${fallbackBaseUrl}/
|
|
6699
|
+
return `${fallbackBaseUrl}/v1/traces`;
|
|
6556
6700
|
}
|
|
6557
6701
|
function resolveIngestToken() {
|
|
6558
6702
|
return process.env["ARCHAL_TELEMETRY_TOKEN"]?.trim();
|
|
@@ -6701,8 +6845,26 @@ function isTelemetryEnabled() {
|
|
|
6701
6845
|
if (consent !== "pending") return consent === "granted";
|
|
6702
6846
|
return loadConfig().telemetry;
|
|
6703
6847
|
}
|
|
6704
|
-
function buildStructuredRunError(runIndex, error2) {
|
|
6848
|
+
function buildStructuredRunError(runIndex, error2, outcome) {
|
|
6705
6849
|
const message = error2.trim();
|
|
6850
|
+
if (outcome === "inconclusive_seed") {
|
|
6851
|
+
return {
|
|
6852
|
+
runIndex,
|
|
6853
|
+
message,
|
|
6854
|
+
category: "seed_setup",
|
|
6855
|
+
code: "SEED_SETUP_ERROR",
|
|
6856
|
+
retryable: true
|
|
6857
|
+
};
|
|
6858
|
+
}
|
|
6859
|
+
if (outcome === "inconclusive_infrastructure") {
|
|
6860
|
+
return {
|
|
6861
|
+
runIndex,
|
|
6862
|
+
message,
|
|
6863
|
+
category: "infrastructure",
|
|
6864
|
+
code: "INFRASTRUCTURE_ERROR",
|
|
6865
|
+
retryable: true
|
|
6866
|
+
};
|
|
6867
|
+
}
|
|
6706
6868
|
if (message.startsWith("Agent not found:")) {
|
|
6707
6869
|
return {
|
|
6708
6870
|
runIndex,
|
|
@@ -6944,7 +7106,7 @@ function buildMetadata(report, totalEntries) {
|
|
|
6944
7106
|
},
|
|
6945
7107
|
agentInternals: {
|
|
6946
7108
|
runDurationsMs: report.runs.map((run) => run.durationMs),
|
|
6947
|
-
runErrors: report.runs.filter((run) => typeof run.error === "string" && run.error.length > 0).map((run) => buildStructuredRunError(run.runIndex, run.error)),
|
|
7109
|
+
runErrors: report.runs.filter((run) => typeof run.error === "string" && run.error.length > 0).map((run) => buildStructuredRunError(run.runIndex, run.error, run.outcome)),
|
|
6948
7110
|
evaluationCounts: { pass: passCount, partial: partialCount, fail: failCount },
|
|
6949
7111
|
runSummaries: report.runs.map((run) => ({
|
|
6950
7112
|
runIndex: run.runIndex,
|
|
@@ -7119,6 +7281,7 @@ async function uploadIfEnabled(traceId, report) {
|
|
|
7119
7281
|
}
|
|
7120
7282
|
|
|
7121
7283
|
// src/runner/dynamic-seed-generator.ts
|
|
7284
|
+
import { createHash as createHash4 } from "crypto";
|
|
7122
7285
|
import { z as z4 } from "zod";
|
|
7123
7286
|
|
|
7124
7287
|
// src/runner/seed-schemas/seed-schema-inference.ts
|
|
@@ -8240,7 +8403,8 @@ var RELATIONSHIP_RULES = {
|
|
|
8240
8403
|
{ sourceCollection: "disputes", sourceField: "paymentIntentId", targetCollection: "paymentIntents", targetField: "paymentIntentId", optional: true }
|
|
8241
8404
|
],
|
|
8242
8405
|
jira: [
|
|
8243
|
-
{ sourceCollection: "issues", sourceField: "projectId", targetCollection: "projects", targetField: "id" }
|
|
8406
|
+
{ sourceCollection: "issues", sourceField: "projectId", targetCollection: "projects", targetField: "id" },
|
|
8407
|
+
{ sourceCollection: "projects", sourceField: "leadAccountId", targetCollection: "users", targetField: "accountId" }
|
|
8244
8408
|
],
|
|
8245
8409
|
linear: [
|
|
8246
8410
|
{ sourceCollection: "issues", sourceField: "teamId", targetCollection: "teams", targetField: "id" },
|
|
@@ -8484,15 +8648,20 @@ function autoFillMissingFKs(seed, twinName) {
|
|
|
8484
8648
|
const targetEntities = result[rule.targetCollection];
|
|
8485
8649
|
if (!sourceEntities || !targetEntities || targetEntities.length === 0) continue;
|
|
8486
8650
|
const targetValues = targetEntities.map((e) => e[rule.targetField]).filter((v) => v !== void 0 && v !== null);
|
|
8487
|
-
if (targetValues.length
|
|
8488
|
-
const
|
|
8651
|
+
if (targetValues.length === 0) continue;
|
|
8652
|
+
const validTargetSet = new Set(targetValues.map(String));
|
|
8653
|
+
let fillIndex = 0;
|
|
8489
8654
|
for (const entity of sourceEntities) {
|
|
8490
8655
|
const e = entity;
|
|
8491
|
-
|
|
8492
|
-
|
|
8493
|
-
|
|
8656
|
+
const currentValue = e[rule.sourceField];
|
|
8657
|
+
const needsFill = currentValue === void 0 || currentValue === null || !validTargetSet.has(String(currentValue));
|
|
8658
|
+
if (needsFill) {
|
|
8659
|
+
const fillValue = targetValues[fillIndex % targetValues.length];
|
|
8660
|
+
fillIndex++;
|
|
8661
|
+
debug(
|
|
8662
|
+
`Auto-filling ${rule.sourceCollection}.${rule.sourceField} = ${String(fillValue)} (from ${targetValues.length} ${rule.targetCollection})` + (currentValue != null ? ` (was ${String(currentValue)} \u2014 not in targets)` : "")
|
|
8494
8663
|
);
|
|
8495
|
-
e[rule.sourceField] =
|
|
8664
|
+
e[rule.sourceField] = fillValue;
|
|
8496
8665
|
}
|
|
8497
8666
|
}
|
|
8498
8667
|
}
|
|
@@ -8526,12 +8695,36 @@ function normalizeSeedData(seed, twinName) {
|
|
|
8526
8695
|
}
|
|
8527
8696
|
}
|
|
8528
8697
|
}
|
|
8698
|
+
const collectionSchema = schema[collection];
|
|
8699
|
+
if (collectionSchema) {
|
|
8700
|
+
for (const [field, fieldDef] of Object.entries(collectionSchema)) {
|
|
8701
|
+
if (!(field in e) || e[field] === null || e[field] === void 0) continue;
|
|
8702
|
+
const expectedType = fieldDef.type.split("|")[0].trim();
|
|
8703
|
+
if (expectedType === "string" && typeof e[field] === "object" && e[field] !== null && !Array.isArray(e[field])) {
|
|
8704
|
+
const obj = e[field];
|
|
8705
|
+
const extracted = obj["login"] ?? obj["name"] ?? obj["value"] ?? obj["key"] ?? obj["id"] ?? obj["displayName"];
|
|
8706
|
+
if (typeof extracted === "string") {
|
|
8707
|
+
debug(`Seed normalization: coerced ${collection}.${field} from object to string "${extracted}"`);
|
|
8708
|
+
e[field] = extracted;
|
|
8709
|
+
} else {
|
|
8710
|
+
const firstStr = Object.values(obj).find((v) => typeof v === "string");
|
|
8711
|
+
if (firstStr) {
|
|
8712
|
+
debug(`Seed normalization: coerced ${collection}.${field} from object to string "${firstStr}" (fallback)`);
|
|
8713
|
+
e[field] = firstStr;
|
|
8714
|
+
} else {
|
|
8715
|
+
debug(`Seed normalization: could not coerce ${collection}.${field} from object to string, removing`);
|
|
8716
|
+
delete e[field];
|
|
8717
|
+
}
|
|
8718
|
+
}
|
|
8719
|
+
}
|
|
8720
|
+
}
|
|
8721
|
+
}
|
|
8529
8722
|
if (collectionDefaults) {
|
|
8530
8723
|
for (const [field, defaultValue] of Object.entries(collectionDefaults)) {
|
|
8531
8724
|
if (!(field in e)) {
|
|
8532
8725
|
e[field] = structuredClone(defaultValue);
|
|
8533
8726
|
} else if (e[field] === null && defaultValue !== null) {
|
|
8534
|
-
const fieldDef =
|
|
8727
|
+
const fieldDef = collectionSchema?.[field];
|
|
8535
8728
|
if (fieldDef && !fieldDef.type.includes("null")) {
|
|
8536
8729
|
e[field] = structuredClone(defaultValue);
|
|
8537
8730
|
}
|
|
@@ -8540,6 +8733,15 @@ function normalizeSeedData(seed, twinName) {
|
|
|
8540
8733
|
}
|
|
8541
8734
|
}
|
|
8542
8735
|
}
|
|
8736
|
+
if (twinName === "github" && result["repos"]) {
|
|
8737
|
+
for (const entity of result["repos"]) {
|
|
8738
|
+
const e = entity;
|
|
8739
|
+
if ((!e["fullName"] || typeof e["fullName"] !== "string") && typeof e["owner"] === "string" && typeof e["name"] === "string") {
|
|
8740
|
+
e["fullName"] = `${e["owner"]}/${e["name"]}`;
|
|
8741
|
+
debug(`Seed normalization: derived repos.fullName = "${e["fullName"]}"`);
|
|
8742
|
+
}
|
|
8743
|
+
}
|
|
8744
|
+
}
|
|
8543
8745
|
return result;
|
|
8544
8746
|
}
|
|
8545
8747
|
|
|
@@ -8551,6 +8753,7 @@ var KIND_COLLECTION_HINTS = {
|
|
|
8551
8753
|
channel: ["channels"],
|
|
8552
8754
|
user: ["users"],
|
|
8553
8755
|
ticket: ["issues"],
|
|
8756
|
+
project: ["projects"],
|
|
8554
8757
|
table: ["tables"],
|
|
8555
8758
|
site: ["sites", "domains"],
|
|
8556
8759
|
file: ["files"],
|
|
@@ -8560,6 +8763,9 @@ var KIND_COLLECTION_HINTS = {
|
|
|
8560
8763
|
var ENTITY_KEY_ALIASES = {
|
|
8561
8764
|
"repo.owner": ["ownerLogin", "owner_login", "login", "owner.login", "owner.name"],
|
|
8562
8765
|
"issue.key": ["identifier"],
|
|
8766
|
+
"project.key": ["key", "projectKey"],
|
|
8767
|
+
"ticket.key": ["identifier", "key"],
|
|
8768
|
+
"stripe_entity.id": ["id", "charge", "chargeId", "paymentIntentId", "invoiceId", "customerId", "disputeId"],
|
|
8563
8769
|
"email.address": ["email", "from", "to", "cc", "bcc"],
|
|
8564
8770
|
"file.name": ["title", "fileName", "filename", "subject", "summary"]
|
|
8565
8771
|
};
|
|
@@ -8715,10 +8921,28 @@ function validateSeedCoverage(intent, mergedSeed) {
|
|
|
8715
8921
|
const entityIssues = [];
|
|
8716
8922
|
const quoteErrors = [];
|
|
8717
8923
|
const quoteWarnings = [];
|
|
8718
|
-
const CORE_ENTITY_KEYS = /* @__PURE__ */ new Set(["owner", "name", "fullName", "channel_name", "key", "identifier", "number"]);
|
|
8924
|
+
const CORE_ENTITY_KEYS = /* @__PURE__ */ new Set(["owner", "name", "fullName", "channel_name", "key", "identifier", "number", "id"]);
|
|
8925
|
+
const CONTRACT_REQUIRED_KINDS = /* @__PURE__ */ new Set([
|
|
8926
|
+
"repo",
|
|
8927
|
+
"pullRequest",
|
|
8928
|
+
"issue",
|
|
8929
|
+
"channel",
|
|
8930
|
+
"user",
|
|
8931
|
+
"ticket",
|
|
8932
|
+
"project",
|
|
8933
|
+
"table"
|
|
8934
|
+
]);
|
|
8719
8935
|
const entityWarnings = [];
|
|
8720
8936
|
for (const entity of intent.entities) {
|
|
8721
8937
|
if (typeof entity.value === "boolean") continue;
|
|
8938
|
+
const candidateCollections = toCollectionCandidates(mergedSeed, entity.kind, entity.value);
|
|
8939
|
+
if (CONTRACT_REQUIRED_KINDS.has(entity.kind) && candidateCollections.length === 0) {
|
|
8940
|
+
entityIssues.push({
|
|
8941
|
+
type: "missing_entity",
|
|
8942
|
+
message: `Scenario entity contract mismatch: no collections match ${entity.kind}.${entity.key}=${String(entity.value)}`
|
|
8943
|
+
});
|
|
8944
|
+
continue;
|
|
8945
|
+
}
|
|
8722
8946
|
if (!valueExistsInCollections(mergedSeed, entity.kind, entity.key, entity.value)) {
|
|
8723
8947
|
const issue = {
|
|
8724
8948
|
type: "missing_entity",
|
|
@@ -8816,7 +9040,25 @@ var NON_SUBJECT_STARTS = /* @__PURE__ */ new Set([
|
|
|
8816
9040
|
"could",
|
|
8817
9041
|
"would",
|
|
8818
9042
|
"may",
|
|
8819
|
-
"might"
|
|
9043
|
+
"might",
|
|
9044
|
+
"for",
|
|
9045
|
+
"with",
|
|
9046
|
+
"in",
|
|
9047
|
+
"at",
|
|
9048
|
+
"to",
|
|
9049
|
+
"from",
|
|
9050
|
+
"by",
|
|
9051
|
+
"on",
|
|
9052
|
+
"per",
|
|
9053
|
+
"via",
|
|
9054
|
+
"into",
|
|
9055
|
+
"onto",
|
|
9056
|
+
"over",
|
|
9057
|
+
"under",
|
|
9058
|
+
"after",
|
|
9059
|
+
"before",
|
|
9060
|
+
"during",
|
|
9061
|
+
"as"
|
|
8820
9062
|
]);
|
|
8821
9063
|
function isReasonableCountSubject(subject, expected) {
|
|
8822
9064
|
if (expected > MAX_REASONABLE_COUNT) return false;
|
|
@@ -8827,38 +9069,96 @@ function isReasonableCountSubject(subject, expected) {
|
|
|
8827
9069
|
if (/\b(?:have|has|had|were|was|are|is|been|being|do|does|did|can|could|should|will|would|may|might)\b/.test(subject.toLowerCase())) return false;
|
|
8828
9070
|
return true;
|
|
8829
9071
|
}
|
|
9072
|
+
function appearsToBeClockSuffix(text, numberStart) {
|
|
9073
|
+
const prefix = text.slice(Math.max(0, numberStart - 3), numberStart);
|
|
9074
|
+
return /^\d{1,2}:$/.test(prefix);
|
|
9075
|
+
}
|
|
9076
|
+
function isDecimalFragment(text, matchIndex) {
|
|
9077
|
+
if (matchIndex <= 0) return false;
|
|
9078
|
+
const charBefore = text[matchIndex - 1];
|
|
9079
|
+
if (charBefore === ".") {
|
|
9080
|
+
return matchIndex >= 2 && /\d/.test(text[matchIndex - 2]);
|
|
9081
|
+
}
|
|
9082
|
+
return false;
|
|
9083
|
+
}
|
|
9084
|
+
function resolveSubjectWithKey(subject, flat) {
|
|
9085
|
+
const candidates = buildSubjectCandidates2(subject);
|
|
9086
|
+
for (const candidate of candidates) {
|
|
9087
|
+
const normalized = candidate.replace(/\s+/g, "").toLowerCase();
|
|
9088
|
+
for (const [key, value] of Object.entries(flat)) {
|
|
9089
|
+
const normalizedKey = key.replace(/\s+/g, "").toLowerCase();
|
|
9090
|
+
if ((normalizedKey === normalized || normalizedKey === normalized + "s") && Array.isArray(value)) {
|
|
9091
|
+
return { items: value, key };
|
|
9092
|
+
}
|
|
9093
|
+
}
|
|
9094
|
+
}
|
|
9095
|
+
const items = resolveSubjectInState(subject, flat);
|
|
9096
|
+
return items ? { items, key: "" } : null;
|
|
9097
|
+
}
|
|
9098
|
+
function buildSubjectCandidates2(subject) {
|
|
9099
|
+
const candidates = [subject];
|
|
9100
|
+
if (subject.endsWith("s") && subject.length > 3) {
|
|
9101
|
+
candidates.push(subject.slice(0, -1));
|
|
9102
|
+
} else {
|
|
9103
|
+
candidates.push(subject + "s");
|
|
9104
|
+
}
|
|
9105
|
+
const words = subject.split(/\s+/);
|
|
9106
|
+
if (words.length > 1) {
|
|
9107
|
+
candidates.push(words[0]);
|
|
9108
|
+
candidates.push(words[words.length - 1]);
|
|
9109
|
+
}
|
|
9110
|
+
return candidates;
|
|
9111
|
+
}
|
|
8830
9112
|
function verifySeedCounts(setupText, seedState) {
|
|
8831
9113
|
const mismatches = [];
|
|
8832
9114
|
const flat = flattenTwinState(seedState);
|
|
8833
9115
|
const countPattern = /\b(\d+)\s+([\w\s]+?)(?:\s+(?:that|which|are|with|in|labeled|assigned)\b)/gi;
|
|
8834
9116
|
for (const match of setupText.matchAll(countPattern)) {
|
|
9117
|
+
if (isDecimalFragment(setupText, match.index)) continue;
|
|
8835
9118
|
const expected = parseInt(match[1], 10);
|
|
8836
9119
|
const subject = match[2].trim();
|
|
9120
|
+
if (match.index !== void 0 && appearsToBeClockSuffix(setupText, match.index)) continue;
|
|
8837
9121
|
if (!subject || expected <= 0) continue;
|
|
8838
9122
|
if (!isReasonableCountSubject(subject, expected)) continue;
|
|
8839
|
-
const resolved =
|
|
8840
|
-
if (resolved && resolved.length !== expected) {
|
|
8841
|
-
mismatches.push({ subject, expected, actual: resolved.length });
|
|
9123
|
+
const resolved = resolveSubjectWithKey(subject, flat);
|
|
9124
|
+
if (resolved && resolved.items.length !== expected) {
|
|
9125
|
+
mismatches.push({ subject, expected, actual: resolved.items.length, collectionKey: resolved.key || void 0 });
|
|
8842
9126
|
}
|
|
8843
9127
|
}
|
|
8844
9128
|
const simplePattern = /\b(\d+)\s+([\w\s]+?)(?:[.,;:)]|$)/gm;
|
|
8845
9129
|
const seenSubjects = new Set(mismatches.map((m) => m.subject.toLowerCase()));
|
|
8846
9130
|
for (const match of setupText.matchAll(simplePattern)) {
|
|
9131
|
+
if (isDecimalFragment(setupText, match.index)) continue;
|
|
8847
9132
|
const expected = parseInt(match[1], 10);
|
|
8848
9133
|
const subject = match[2].trim();
|
|
9134
|
+
if (match.index !== void 0 && appearsToBeClockSuffix(setupText, match.index)) continue;
|
|
8849
9135
|
if (!subject || expected <= 0 || seenSubjects.has(subject.toLowerCase())) continue;
|
|
8850
9136
|
if (!isReasonableCountSubject(subject, expected)) continue;
|
|
8851
|
-
const resolved =
|
|
8852
|
-
if (resolved && resolved.length !== expected) {
|
|
8853
|
-
mismatches.push({ subject, expected, actual: resolved.length });
|
|
9137
|
+
const resolved = resolveSubjectWithKey(subject, flat);
|
|
9138
|
+
if (resolved && resolved.items.length !== expected) {
|
|
9139
|
+
mismatches.push({ subject, expected, actual: resolved.items.length, collectionKey: resolved.key || void 0 });
|
|
8854
9140
|
seenSubjects.add(subject.toLowerCase());
|
|
8855
9141
|
}
|
|
8856
9142
|
}
|
|
8857
9143
|
return mismatches;
|
|
8858
9144
|
}
|
|
9145
|
+
function trimSeedToExpectedCounts(seed, mismatches) {
|
|
9146
|
+
let totalTrimmed = 0;
|
|
9147
|
+
for (const m of mismatches) {
|
|
9148
|
+
if (m.actual <= m.expected) continue;
|
|
9149
|
+
if (!m.collectionKey || !seed[m.collectionKey]) continue;
|
|
9150
|
+
const collection = seed[m.collectionKey];
|
|
9151
|
+
if (collection.length > m.expected) {
|
|
9152
|
+
const trimmed = collection.length - m.expected;
|
|
9153
|
+
seed[m.collectionKey] = collection.slice(0, m.expected);
|
|
9154
|
+
totalTrimmed += trimmed;
|
|
9155
|
+
}
|
|
9156
|
+
}
|
|
9157
|
+
return totalTrimmed;
|
|
9158
|
+
}
|
|
8859
9159
|
|
|
8860
9160
|
// src/runner/seed-cache.ts
|
|
8861
|
-
var CACHE_VERSION =
|
|
9161
|
+
var CACHE_VERSION = 4;
|
|
8862
9162
|
var NEGATIVE_CACHE_VERSION = 2;
|
|
8863
9163
|
var NEGATIVE_PREFIX = "neg-";
|
|
8864
9164
|
var CACHE_DIR = join7(homedir2(), ".archal", "seed-cache");
|
|
@@ -9110,7 +9410,7 @@ ${setupText}
|
|
|
9110
9410
|
Extract the seed blueprint as JSON.`;
|
|
9111
9411
|
try {
|
|
9112
9412
|
const provider = detectProvider(config.model);
|
|
9113
|
-
const apiKey = resolveProviderApiKey(config.apiKey, provider);
|
|
9413
|
+
const apiKey = config.providerMode === "archal" ? "" : resolveProviderApiKey(config.apiKey ?? "", provider);
|
|
9114
9414
|
const responseText = await callLlm({
|
|
9115
9415
|
provider,
|
|
9116
9416
|
model: config.model,
|
|
@@ -9129,12 +9429,26 @@ Extract the seed blueprint as JSON.`;
|
|
|
9129
9429
|
}
|
|
9130
9430
|
const parsed = parseBlueprint(responseText, twinName);
|
|
9131
9431
|
if (!parsed) return null;
|
|
9432
|
+
const validCollections = new Set(availableCollections);
|
|
9433
|
+
parsed.collections = parsed.collections.filter((col) => {
|
|
9434
|
+
if (validCollections.has(col.name)) return true;
|
|
9435
|
+
warn(`Blueprint references unknown collection "${col.name}" for ${twinName} \u2014 dropping`);
|
|
9436
|
+
return false;
|
|
9437
|
+
});
|
|
9132
9438
|
for (const col of parsed.collections) {
|
|
9133
9439
|
const groupSum = col.groups.reduce((sum, g) => sum + g.count, 0);
|
|
9134
9440
|
if (groupSum !== col.totalCount) {
|
|
9135
9441
|
debug(`Blueprint group count mismatch for ${col.name}: groups sum to ${groupSum}, totalCount is ${col.totalCount}. Adjusting.`);
|
|
9136
9442
|
col.totalCount = groupSum;
|
|
9137
9443
|
}
|
|
9444
|
+
if (col.totalCount === 0) {
|
|
9445
|
+
debug(`Blueprint collection ${col.name} has 0 entities \u2014 dropping`);
|
|
9446
|
+
}
|
|
9447
|
+
}
|
|
9448
|
+
parsed.collections = parsed.collections.filter((col) => col.totalCount > 0);
|
|
9449
|
+
if (parsed.collections.length === 0 && parsed.identities.length === 0) {
|
|
9450
|
+
warn("Blueprint extracted no valid collections or identities");
|
|
9451
|
+
return null;
|
|
9138
9452
|
}
|
|
9139
9453
|
return parsed;
|
|
9140
9454
|
} catch (err) {
|
|
@@ -9356,7 +9670,13 @@ function buildSeedFromBlueprint(blueprint, baseSeed) {
|
|
|
9356
9670
|
for (const identity of blueprint.identities) {
|
|
9357
9671
|
processIdentity(identity, seed, warnings);
|
|
9358
9672
|
}
|
|
9673
|
+
const baseCollections = new Set(Object.keys(baseSeed));
|
|
9359
9674
|
for (const spec of blueprint.collections) {
|
|
9675
|
+
if (!baseCollections.has(spec.name) && !seed[spec.name]) {
|
|
9676
|
+
warnings.push(`Blueprint references unknown collection "${spec.name}" \u2014 skipping`);
|
|
9677
|
+
warn(`Blueprint references unknown collection "${spec.name}" for ${blueprint.twin} twin \u2014 skipping`);
|
|
9678
|
+
continue;
|
|
9679
|
+
}
|
|
9360
9680
|
processCollection(spec, seed, blueprint.twin, existingLabels, warnings, now);
|
|
9361
9681
|
}
|
|
9362
9682
|
return { seed, warnings };
|
|
@@ -9612,9 +9932,16 @@ function buildSlackEntity(collection, id, props, seed, index, temporal, contentH
|
|
|
9612
9932
|
}
|
|
9613
9933
|
case "messages": {
|
|
9614
9934
|
const channels = seed["channels"] ?? [];
|
|
9615
|
-
const
|
|
9935
|
+
const targetChannel = channels.length > 0 ? channels[index % channels.length] : null;
|
|
9936
|
+
const channelId = targetChannel ? String(targetChannel["channel_id"] ?? "C0001AAAA") : "C0001AAAA";
|
|
9937
|
+
const channelMembers = targetChannel ? targetChannel["members"] ?? [] : [];
|
|
9616
9938
|
const users = seed["users"] ?? [];
|
|
9617
|
-
|
|
9939
|
+
let userId;
|
|
9940
|
+
if (channelMembers.length > 0) {
|
|
9941
|
+
userId = channelMembers[index % channelMembers.length];
|
|
9942
|
+
} else {
|
|
9943
|
+
userId = users.length > 0 ? String(users[index % users.length]["user_id"] ?? "U0001AAAA") : "U0001AAAA";
|
|
9944
|
+
}
|
|
9618
9945
|
const baseTs = Math.floor(new Date(temporal.createdAt).getTime() / 1e3);
|
|
9619
9946
|
const ts = generateSlackTs(baseTs, index);
|
|
9620
9947
|
return {
|
|
@@ -10303,9 +10630,19 @@ function extractHybridPatch(obj) {
|
|
|
10303
10630
|
}
|
|
10304
10631
|
return null;
|
|
10305
10632
|
}
|
|
10306
|
-
function
|
|
10633
|
+
function hashText(text) {
|
|
10634
|
+
return createHash4("sha256").update(text).digest("hex").slice(0, 16);
|
|
10635
|
+
}
|
|
10636
|
+
function buildSeedCacheContext(twinName, config, intent, context) {
|
|
10307
10637
|
return {
|
|
10308
10638
|
twinName,
|
|
10639
|
+
generator: {
|
|
10640
|
+
model: config.model,
|
|
10641
|
+
providerMode: config.providerMode ?? "direct",
|
|
10642
|
+
baseUrl: config.baseUrl ?? null,
|
|
10643
|
+
systemPromptHash: hashText(SYSTEM_PROMPT2),
|
|
10644
|
+
promptTemplateVersion: 2
|
|
10645
|
+
},
|
|
10309
10646
|
intent: intent ?? null,
|
|
10310
10647
|
scenario: context ?? null
|
|
10311
10648
|
};
|
|
@@ -10660,10 +10997,13 @@ async function tryBlueprintPath(twinName, baseSeedData, setupDescription, availa
|
|
|
10660
10997
|
finalSeed = autoFillMissingFKs(finalSeed, twinName);
|
|
10661
10998
|
const relValidation = validateSeedRelationships(finalSeed, twinName);
|
|
10662
10999
|
if (!relValidation.valid) {
|
|
10663
|
-
|
|
10664
|
-
|
|
10665
|
-
|
|
10666
|
-
|
|
11000
|
+
finalSeed = autoFillMissingFKs(finalSeed, twinName);
|
|
11001
|
+
const secondValidation = validateSeedRelationships(finalSeed, twinName);
|
|
11002
|
+
if (!secondValidation.valid) {
|
|
11003
|
+
warn("Blueprint seed has unresolved FK references (continuing anyway)", {
|
|
11004
|
+
errors: secondValidation.errors.slice(0, 5).join("; ")
|
|
11005
|
+
});
|
|
11006
|
+
}
|
|
10667
11007
|
}
|
|
10668
11008
|
if (intent) {
|
|
10669
11009
|
const coverage = validateSeedCoverage(intent, finalSeed);
|
|
@@ -10678,9 +11018,16 @@ async function tryBlueprintPath(twinName, baseSeedData, setupDescription, availa
|
|
|
10678
11018
|
flatForVerify[twinName] = finalSeed;
|
|
10679
11019
|
const countMismatches = verifySeedCounts(setupDescription, flatForVerify);
|
|
10680
11020
|
if (countMismatches.length > 0) {
|
|
10681
|
-
|
|
10682
|
-
|
|
10683
|
-
|
|
11021
|
+
const trimmed = trimSeedToExpectedCounts(finalSeed, countMismatches);
|
|
11022
|
+
if (trimmed > 0) {
|
|
11023
|
+
debug(`Blueprint seed: trimmed ${trimmed} excess entities to match setup counts`);
|
|
11024
|
+
}
|
|
11025
|
+
const remaining = countMismatches.filter((m) => m.actual > m.expected && !m.collectionKey);
|
|
11026
|
+
if (remaining.length > 0) {
|
|
11027
|
+
debug("Blueprint seed has unresolvable count mismatches", {
|
|
11028
|
+
mismatches: remaining.map((m) => `${m.subject}: ${m.expected} vs ${m.actual}`).join("; ")
|
|
11029
|
+
});
|
|
11030
|
+
}
|
|
10684
11031
|
}
|
|
10685
11032
|
const syntheticPatch = {
|
|
10686
11033
|
add: {}
|
|
@@ -10710,7 +11057,7 @@ async function tryBlueprintPath(twinName, baseSeedData, setupDescription, availa
|
|
|
10710
11057
|
async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDescription, config, intent, context) {
|
|
10711
11058
|
const cacheScope = {
|
|
10712
11059
|
baseSeedData,
|
|
10713
|
-
cacheContext: buildSeedCacheContext(twinName, intent, context)
|
|
11060
|
+
cacheContext: buildSeedCacheContext(twinName, config, intent, context)
|
|
10714
11061
|
};
|
|
10715
11062
|
if (!config.noCache) {
|
|
10716
11063
|
const cached = getCachedSeed(twinName, baseSeedName, setupDescription, cacheScope);
|
|
@@ -10741,7 +11088,7 @@ async function generateDynamicSeed(twinName, baseSeedName, baseSeedData, setupDe
|
|
|
10741
11088
|
if (blueprintResult) {
|
|
10742
11089
|
info("Dynamic seed generated via blueprint", { twin: twinName });
|
|
10743
11090
|
if (!config.noCache) {
|
|
10744
|
-
const cacheContext = buildSeedCacheContext(twinName, intent, context);
|
|
11091
|
+
const cacheContext = buildSeedCacheContext(twinName, config, intent, context);
|
|
10745
11092
|
cacheSeed(twinName, baseSeedName, setupDescription, blueprintResult.seed, blueprintResult.patch, {
|
|
10746
11093
|
baseSeedData,
|
|
10747
11094
|
cacheContext
|
|
@@ -10787,7 +11134,7 @@ Fix these issues:
|
|
|
10787
11134
|
validationAttempt: String(validationAttempts + 1)
|
|
10788
11135
|
});
|
|
10789
11136
|
const provider = detectProvider(config.model);
|
|
10790
|
-
const apiKey = resolveProviderApiKey(config.apiKey, provider);
|
|
11137
|
+
const apiKey = effectiveMode === "archal" ? "" : resolveProviderApiKey(config.apiKey, provider);
|
|
10791
11138
|
const responseText = await callLlm({
|
|
10792
11139
|
provider,
|
|
10793
11140
|
model: config.model,
|
|
@@ -10796,7 +11143,7 @@ Fix these issues:
|
|
|
10796
11143
|
userPrompt: promptWithFeedback,
|
|
10797
11144
|
maxTokens: 16384,
|
|
10798
11145
|
baseUrl: config.baseUrl,
|
|
10799
|
-
providerMode:
|
|
11146
|
+
providerMode: effectiveMode,
|
|
10800
11147
|
intent: "seed-generate",
|
|
10801
11148
|
responseFormat: "json"
|
|
10802
11149
|
});
|
|
@@ -10872,14 +11219,19 @@ Fix these issues:
|
|
|
10872
11219
|
const relationshipValidation = validateSeedRelationships(mergedSeed, twinName);
|
|
10873
11220
|
if (!relationshipValidation.valid) {
|
|
10874
11221
|
const topErrors = relationshipValidation.errors.slice(0, 10);
|
|
10875
|
-
|
|
11222
|
+
if (validationAttempts < MAX_ATTEMPTS - 1) {
|
|
11223
|
+
warn(`Dynamic seed relationship validation failed (attempt ${attempt + 1})`, {
|
|
11224
|
+
errors: topErrors.join("; ")
|
|
11225
|
+
});
|
|
11226
|
+
lastErrors = topErrors;
|
|
11227
|
+
patch = null;
|
|
11228
|
+
mergedSeed = null;
|
|
11229
|
+
validationAttempts++;
|
|
11230
|
+
continue;
|
|
11231
|
+
}
|
|
11232
|
+
warn(`Dynamic seed has unresolved FK references (accepting on final attempt)`, {
|
|
10876
11233
|
errors: topErrors.join("; ")
|
|
10877
11234
|
});
|
|
10878
|
-
lastErrors = topErrors;
|
|
10879
|
-
patch = null;
|
|
10880
|
-
mergedSeed = null;
|
|
10881
|
-
validationAttempts++;
|
|
10882
|
-
continue;
|
|
10883
11235
|
}
|
|
10884
11236
|
if (intent) {
|
|
10885
11237
|
debug("Seed intent coverage summary", {
|
|
@@ -10938,6 +11290,15 @@ Fix these issues:
|
|
|
10938
11290
|
}
|
|
10939
11291
|
mergedSeed = autoFillMissingFKs(mergedSeed, twinName);
|
|
10940
11292
|
mergedSeed = ensureSlackScenarioChannelAccess(mergedSeed, intent);
|
|
11293
|
+
if (setupDescription) {
|
|
11294
|
+
const flatForTrim = {};
|
|
11295
|
+
flatForTrim[twinName] = mergedSeed;
|
|
11296
|
+
const finalMismatches = verifySeedCounts(setupDescription, flatForTrim);
|
|
11297
|
+
const trimmed = trimSeedToExpectedCounts(mergedSeed, finalMismatches);
|
|
11298
|
+
if (trimmed > 0) {
|
|
11299
|
+
debug(`Trimmed ${trimmed} excess seed entities to match setup counts`);
|
|
11300
|
+
}
|
|
11301
|
+
}
|
|
10941
11302
|
if (!config.noCache) {
|
|
10942
11303
|
cacheSeed(twinName, baseSeedName, setupDescription, mergedSeed, patch, cacheScope);
|
|
10943
11304
|
}
|
|
@@ -11085,10 +11446,23 @@ function githubIntent(setup) {
|
|
|
11085
11446
|
entities.push({ kind: "repo", key: "fullName", value: fullName });
|
|
11086
11447
|
}
|
|
11087
11448
|
if (!primaryRepoSet) {
|
|
11088
|
-
const orgMatch = setup.match(
|
|
11449
|
+
const orgMatch = setup.match(
|
|
11450
|
+
/\b(?:github\s+)?(?:organization|org)\s+(?:named\s+)?["']?([a-z][a-z0-9._-]*)["']?/i
|
|
11451
|
+
);
|
|
11089
11452
|
if (orgMatch?.[1]) {
|
|
11090
|
-
extractedSlots["repo.owner"] = orgMatch[1];
|
|
11091
|
-
entities.push({ kind: "repo", key: "owner", value: orgMatch[1] });
|
|
11453
|
+
extractedSlots["repo.owner"] = orgMatch[1].toLowerCase();
|
|
11454
|
+
entities.push({ kind: "repo", key: "owner", value: orgMatch[1].toLowerCase() });
|
|
11455
|
+
const repoName = setup.match(/\b(?:repository|repo)\s+(?:named\s+)?["']?([a-z][a-z0-9._-]{1,99})["']?/i)?.[1];
|
|
11456
|
+
if (repoName) {
|
|
11457
|
+
const normalizedName = repoName.toLowerCase();
|
|
11458
|
+
extractedSlots["repo.name"] = normalizedName;
|
|
11459
|
+
entities.push({ kind: "repo", key: "name", value: normalizedName });
|
|
11460
|
+
entities.push({
|
|
11461
|
+
kind: "repo",
|
|
11462
|
+
key: "fullName",
|
|
11463
|
+
value: `${String(extractedSlots["repo.owner"])}/${normalizedName}`
|
|
11464
|
+
});
|
|
11465
|
+
}
|
|
11092
11466
|
} else {
|
|
11093
11467
|
missingSlots.push({
|
|
11094
11468
|
slot: "repo.owner/repo.name",
|
|
@@ -11279,6 +11653,18 @@ function stripeIntent(setup) {
|
|
|
11279
11653
|
});
|
|
11280
11654
|
}
|
|
11281
11655
|
}
|
|
11656
|
+
const idRegex = /\b((?:acct|cus|prod|price|pi|ch|re|in|sub|dp|pm|payout|tr|tok|evt)_[a-zA-Z0-9]+)\b/g;
|
|
11657
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
11658
|
+
let idMatch;
|
|
11659
|
+
while ((idMatch = idRegex.exec(setup)) !== null) {
|
|
11660
|
+
const id = idMatch[1];
|
|
11661
|
+
if (seenIds.has(id)) continue;
|
|
11662
|
+
seenIds.add(id);
|
|
11663
|
+
entities.push({ kind: "stripe_entity", key: "id", value: id });
|
|
11664
|
+
if (!extractedSlots["stripe.primary_id"]) {
|
|
11665
|
+
extractedSlots["stripe.primary_id"] = id;
|
|
11666
|
+
}
|
|
11667
|
+
}
|
|
11282
11668
|
if (missingSlots.length > 0) {
|
|
11283
11669
|
return { intent: null, missingSlots };
|
|
11284
11670
|
}
|
|
@@ -11372,6 +11758,30 @@ function jiraIntent(setup) {
|
|
|
11372
11758
|
}
|
|
11373
11759
|
entities.push({ kind: "ticket", key: "key", value: key });
|
|
11374
11760
|
}
|
|
11761
|
+
const seenProjects = /* @__PURE__ */ new Set();
|
|
11762
|
+
const addProject = (projectKey) => {
|
|
11763
|
+
const normalized = projectKey.toUpperCase();
|
|
11764
|
+
if (!/^[A-Z][A-Z0-9]{1,9}$/.test(normalized)) return;
|
|
11765
|
+
if (seenProjects.has(normalized)) return;
|
|
11766
|
+
seenProjects.add(normalized);
|
|
11767
|
+
entities.push({ kind: "project", key: "key", value: normalized });
|
|
11768
|
+
if (!extractedSlots["project.key"]) {
|
|
11769
|
+
extractedSlots["project.key"] = normalized;
|
|
11770
|
+
}
|
|
11771
|
+
};
|
|
11772
|
+
for (const key of seenKeys) {
|
|
11773
|
+
addProject(key.split("-", 1)[0] ?? "");
|
|
11774
|
+
}
|
|
11775
|
+
const projectRegexes = [
|
|
11776
|
+
/\b(?:jira\s+)?project\s+(?:key\s*)?[:=]?\s*["']?([A-Z][A-Z0-9]{1,9})["']?/gi,
|
|
11777
|
+
/\bproject\s+["'][^"'\n]+["']\s*\(\s*([A-Z][A-Z0-9]{1,9})\s*\)/gi
|
|
11778
|
+
];
|
|
11779
|
+
for (const regex of projectRegexes) {
|
|
11780
|
+
let projectMatch;
|
|
11781
|
+
while ((projectMatch = regex.exec(setup)) !== null) {
|
|
11782
|
+
addProject(projectMatch[1] ?? "");
|
|
11783
|
+
}
|
|
11784
|
+
}
|
|
11375
11785
|
return {
|
|
11376
11786
|
intent: {
|
|
11377
11787
|
twinName: "jira",
|
|
@@ -11386,6 +11796,7 @@ function jiraIntent(setup) {
|
|
|
11386
11796
|
}
|
|
11387
11797
|
function supabaseIntent(setup) {
|
|
11388
11798
|
const extractedSlots = {};
|
|
11799
|
+
const entities = [];
|
|
11389
11800
|
const missingSlots = [];
|
|
11390
11801
|
const requiredSlots = ["database.target"];
|
|
11391
11802
|
const seenTables = /* @__PURE__ */ new Set();
|
|
@@ -11418,6 +11829,9 @@ function supabaseIntent(setup) {
|
|
|
11418
11829
|
const hasEnvVarTokens = /\b[A-Z][A-Z0-9_]{2,}\b/.test(setup);
|
|
11419
11830
|
if (seenTables.size > 0 || mentionsProject || mentionsLogsOrService || mentionsEnvVars && hasEnvVarTokens) {
|
|
11420
11831
|
extractedSlots["database.target"] = true;
|
|
11832
|
+
for (const table2 of seenTables) {
|
|
11833
|
+
entities.push({ kind: "table", key: "name", value: table2 });
|
|
11834
|
+
}
|
|
11421
11835
|
} else {
|
|
11422
11836
|
missingSlots.push({
|
|
11423
11837
|
slot: "database.target",
|
|
@@ -11434,10 +11848,7 @@ function supabaseIntent(setup) {
|
|
|
11434
11848
|
setupSummary: setupSummary(setup),
|
|
11435
11849
|
requiredSlots,
|
|
11436
11850
|
extractedSlots,
|
|
11437
|
-
|
|
11438
|
-
// that are not materialized in the base SQL schema. Keep intent broad
|
|
11439
|
-
// to avoid false-hard failures in seed generation.
|
|
11440
|
-
entities: [],
|
|
11851
|
+
entities,
|
|
11441
11852
|
quotedStrings: []
|
|
11442
11853
|
},
|
|
11443
11854
|
missingSlots: []
|
|
@@ -11897,11 +12308,21 @@ function parseSqlSeed(sql) {
|
|
|
11897
12308
|
function loadSeedStateFromPath(seedRoot, seedName) {
|
|
11898
12309
|
const jsonPath = resolve4(seedRoot, `${seedName}.json`);
|
|
11899
12310
|
if (existsSync10(jsonPath)) {
|
|
11900
|
-
|
|
12311
|
+
try {
|
|
12312
|
+
return JSON.parse(readFileSync12(jsonPath, "utf-8"));
|
|
12313
|
+
} catch (err) {
|
|
12314
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
12315
|
+
throw new Error(`Failed to parse seed file ${jsonPath}: ${detail}`);
|
|
12316
|
+
}
|
|
11901
12317
|
}
|
|
11902
12318
|
const sqlPath = resolve4(seedRoot, `${seedName}.sql`);
|
|
11903
12319
|
if (existsSync10(sqlPath)) {
|
|
11904
|
-
|
|
12320
|
+
try {
|
|
12321
|
+
return parseSqlSeed(readFileSync12(sqlPath, "utf-8"));
|
|
12322
|
+
} catch (err) {
|
|
12323
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
12324
|
+
throw new Error(`Failed to parse seed file ${sqlPath}: ${detail}`);
|
|
12325
|
+
}
|
|
11905
12326
|
}
|
|
11906
12327
|
return null;
|
|
11907
12328
|
}
|
|
@@ -11951,12 +12372,24 @@ function loadBaseSeedFromDisk(twinName, seedName) {
|
|
|
11951
12372
|
}
|
|
11952
12373
|
function categorizeRunError(message) {
|
|
11953
12374
|
if (/Failed to spawn|ENOENT/.test(message)) {
|
|
11954
|
-
return
|
|
12375
|
+
return {
|
|
12376
|
+
message: `Agent not found: ${message}. Check that your agent command is installed and in PATH.`,
|
|
12377
|
+
outcome: "failed_agent"
|
|
12378
|
+
};
|
|
12379
|
+
}
|
|
12380
|
+
if (/Dynamic seed generation failed|Missing dynamic seed state|seed generation|seed setup/i.test(message)) {
|
|
12381
|
+
return {
|
|
12382
|
+
message: `Seed generation error: ${message}`,
|
|
12383
|
+
outcome: "inconclusive_seed"
|
|
12384
|
+
};
|
|
11955
12385
|
}
|
|
11956
12386
|
if (/HTTP [45]\d\d|ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ECONNRESET|cloud session|fetch failed|AbortError|TimeoutError|operation was aborted|timed?\s*out/i.test(message)) {
|
|
11957
|
-
return
|
|
12387
|
+
return {
|
|
12388
|
+
message: `Infrastructure error: ${message}. Check your network or try again.`,
|
|
12389
|
+
outcome: "inconclusive_infrastructure"
|
|
12390
|
+
};
|
|
11958
12391
|
}
|
|
11959
|
-
return message;
|
|
12392
|
+
return { message, outcome: "failed_agent" };
|
|
11960
12393
|
}
|
|
11961
12394
|
async function executeSingleRun(runIndex, scenario, agentConfig, seedSelections, evaluatorConfig, timeoutSeconds, apiEngine, localEngine, remoteTwinUrlOverrides, apiRouting, cloudTwinUrls, hostedSessionId, apiBearerToken, adminAuth) {
|
|
11962
12395
|
const startTime = Date.now();
|
|
@@ -12094,7 +12527,8 @@ ${baseTaskMessage}` : baseTaskMessage;
|
|
|
12094
12527
|
stateDiff: diff,
|
|
12095
12528
|
agentLog: agentResult.stderr || void 0,
|
|
12096
12529
|
agentTrace: agentResult.agentTrace,
|
|
12097
|
-
tokenUsage
|
|
12530
|
+
tokenUsage,
|
|
12531
|
+
outcome: "failed_agent"
|
|
12098
12532
|
};
|
|
12099
12533
|
}
|
|
12100
12534
|
if (agentResult.exitCode !== 0 && agentResult.exitCode !== null) {
|
|
@@ -12133,11 +12567,14 @@ ${baseTaskMessage}` : baseTaskMessage;
|
|
|
12133
12567
|
stateDiff: diff,
|
|
12134
12568
|
agentLog: agentResult.stderr || void 0,
|
|
12135
12569
|
agentTrace: agentResult.agentTrace,
|
|
12136
|
-
tokenUsage
|
|
12570
|
+
tokenUsage,
|
|
12571
|
+
outcome: "failed_agent"
|
|
12137
12572
|
};
|
|
12138
12573
|
}
|
|
12139
12574
|
if (trace.length === 0) {
|
|
12140
|
-
warn(
|
|
12575
|
+
warn(
|
|
12576
|
+
`Agent made no tool calls on run ${runIndex + 1}. This usually means the model is too weak for this scenario. Try a more capable model (e.g. --engine-model claude-sonnet-4-6 or --engine-model gemini-2.5-pro). If using a custom agent, check that it correctly processes tool schemas and calls tools.`
|
|
12577
|
+
);
|
|
12141
12578
|
}
|
|
12142
12579
|
progress(`Evaluating run ${runIndex + 1}...`);
|
|
12143
12580
|
const evaluationResult = await evaluateRun(
|
|
@@ -12163,12 +12600,13 @@ ${baseTaskMessage}` : baseTaskMessage;
|
|
|
12163
12600
|
stateDiff: diff,
|
|
12164
12601
|
agentLog: agentResult.stderr || void 0,
|
|
12165
12602
|
agentTrace: agentResult.agentTrace,
|
|
12166
|
-
tokenUsage
|
|
12603
|
+
tokenUsage,
|
|
12604
|
+
outcome: "completed"
|
|
12167
12605
|
};
|
|
12168
12606
|
} catch (err) {
|
|
12169
12607
|
const message = err instanceof Error ? err.message : String(err);
|
|
12170
12608
|
const categorized = categorizeRunError(message);
|
|
12171
|
-
error(`Run ${runIndex + 1} failed: ${categorized}`);
|
|
12609
|
+
error(`Run ${runIndex + 1} failed: ${categorized.message}`);
|
|
12172
12610
|
const durationMs = Date.now() - startTime;
|
|
12173
12611
|
return {
|
|
12174
12612
|
runIndex,
|
|
@@ -12176,12 +12614,13 @@ ${baseTaskMessage}` : baseTaskMessage;
|
|
|
12176
12614
|
criterionId: c.id,
|
|
12177
12615
|
status: "fail",
|
|
12178
12616
|
confidence: 1,
|
|
12179
|
-
explanation: `Run failed: ${categorized}`
|
|
12617
|
+
explanation: `Run failed: ${categorized.message}`
|
|
12180
12618
|
})),
|
|
12181
12619
|
overallScore: 0,
|
|
12182
12620
|
trace: [],
|
|
12183
12621
|
durationMs,
|
|
12184
|
-
error: categorized,
|
|
12622
|
+
error: categorized.message,
|
|
12623
|
+
outcome: categorized.outcome,
|
|
12185
12624
|
stateBefore: beforeState,
|
|
12186
12625
|
stateAfter: beforeState,
|
|
12187
12626
|
stateDiff: { added: {}, modified: {}, removed: {} }
|
|
@@ -12258,9 +12697,20 @@ function preflightCheck(scenario, apiKey, model, baseUrl, evaluatorProvider, see
|
|
|
12258
12697
|
}
|
|
12259
12698
|
}
|
|
12260
12699
|
if (seedModel) {
|
|
12700
|
+
const mode = seedProviderMode ?? "auto";
|
|
12701
|
+
const provider = detectProvider(seedModel);
|
|
12702
|
+
const resolvedKey = resolveProviderApiKey(apiKey, provider);
|
|
12261
12703
|
const creds = getCredentials();
|
|
12262
12704
|
const hasArchalAuth = Boolean(creds?.token);
|
|
12263
|
-
if (!
|
|
12705
|
+
if (provider === "openai-compatible" && !baseUrl && mode === "direct") {
|
|
12706
|
+
errors.push({
|
|
12707
|
+
check: "seed.baseUrl",
|
|
12708
|
+
message: `Seed model "${seedModel}" requires a base URL for the OpenAI-compatible endpoint`,
|
|
12709
|
+
detail: "Set via: export ARCHAL_EVALUATOR_BASE_URL=<url> or archal config set evaluator.baseUrl <url>",
|
|
12710
|
+
warning: true
|
|
12711
|
+
});
|
|
12712
|
+
}
|
|
12713
|
+
if (mode === "archal" && !hasArchalAuth) {
|
|
12264
12714
|
errors.push({
|
|
12265
12715
|
check: "archal-auth-seed",
|
|
12266
12716
|
message: "Dynamic seed generation requires Archal authentication",
|
|
@@ -12268,6 +12718,32 @@ function preflightCheck(scenario, apiKey, model, baseUrl, evaluatorProvider, see
|
|
|
12268
12718
|
warning: true
|
|
12269
12719
|
});
|
|
12270
12720
|
}
|
|
12721
|
+
if (mode === "direct" && !resolvedKey) {
|
|
12722
|
+
errors.push({
|
|
12723
|
+
check: getProviderEnvVar(provider),
|
|
12724
|
+
message: `Seed provider is "direct" but no API key is configured for ${provider}`,
|
|
12725
|
+
detail: `Set via: export ${getProviderEnvVar(provider)}=<your-key> or archal config set evaluator.apiKey <key>`,
|
|
12726
|
+
warning: true
|
|
12727
|
+
});
|
|
12728
|
+
}
|
|
12729
|
+
if (mode === "auto" && !resolvedKey && !hasArchalAuth) {
|
|
12730
|
+
errors.push({
|
|
12731
|
+
check: getProviderEnvVar(provider),
|
|
12732
|
+
message: 'Dynamic seed generation has no available provider in "auto" mode',
|
|
12733
|
+
detail: `Set ${getProviderEnvVar(provider)} (or evaluator.apiKey) for direct mode, or run archal login for Archal backend mode`,
|
|
12734
|
+
warning: true
|
|
12735
|
+
});
|
|
12736
|
+
}
|
|
12737
|
+
if (resolvedKey && (mode === "direct" || mode === "auto")) {
|
|
12738
|
+
const mismatch = validateKeyForProvider(resolvedKey, provider);
|
|
12739
|
+
if (mismatch) {
|
|
12740
|
+
errors.push({
|
|
12741
|
+
check: "seed-key-provider-mismatch",
|
|
12742
|
+
message: mismatch,
|
|
12743
|
+
warning: true
|
|
12744
|
+
});
|
|
12745
|
+
}
|
|
12746
|
+
}
|
|
12271
12747
|
}
|
|
12272
12748
|
return errors;
|
|
12273
12749
|
}
|
|
@@ -12316,6 +12792,35 @@ async function runScenario(options) {
|
|
|
12316
12792
|
'cloudTwinUrls is required. Local twin execution has been removed; use "archal run" to provision a hosted session.'
|
|
12317
12793
|
);
|
|
12318
12794
|
}
|
|
12795
|
+
const criterionDescriptions = {};
|
|
12796
|
+
const criterionTypes = {};
|
|
12797
|
+
for (const c of scenario.successCriteria) {
|
|
12798
|
+
criterionDescriptions[c.id] = c.description;
|
|
12799
|
+
criterionTypes[c.id] = c.type;
|
|
12800
|
+
}
|
|
12801
|
+
const buildInconclusiveSeedReport = (message) => ({
|
|
12802
|
+
scenarioTitle: scenario.title,
|
|
12803
|
+
satisfactionScore: 0,
|
|
12804
|
+
criterionDescriptions,
|
|
12805
|
+
criterionTypes,
|
|
12806
|
+
twinNames: scenario.config.twins,
|
|
12807
|
+
runs: [{
|
|
12808
|
+
runIndex: 0,
|
|
12809
|
+
evaluations: scenario.successCriteria.map((criterion) => ({
|
|
12810
|
+
criterionId: criterion.id,
|
|
12811
|
+
status: "fail",
|
|
12812
|
+
confidence: 1,
|
|
12813
|
+
explanation: `Run not scored due to seed setup failure: ${message}`
|
|
12814
|
+
})),
|
|
12815
|
+
overallScore: 0,
|
|
12816
|
+
trace: [],
|
|
12817
|
+
durationMs: 0,
|
|
12818
|
+
error: message,
|
|
12819
|
+
outcome: "inconclusive_seed"
|
|
12820
|
+
}],
|
|
12821
|
+
summary: `Inconclusive (seed setup): ${message}`,
|
|
12822
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
12823
|
+
});
|
|
12319
12824
|
const preflightErrors = preflightCheck(
|
|
12320
12825
|
scenario,
|
|
12321
12826
|
config.apiKey,
|
|
@@ -12406,7 +12911,7 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12406
12911
|
cacheContext: seedPromptContext
|
|
12407
12912
|
});
|
|
12408
12913
|
}
|
|
12409
|
-
|
|
12914
|
+
return buildInconclusiveSeedReport(message);
|
|
12410
12915
|
}
|
|
12411
12916
|
warn(message);
|
|
12412
12917
|
generationTargets.push(sel);
|
|
@@ -12415,12 +12920,11 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12415
12920
|
if (generationTargets.length > 0) {
|
|
12416
12921
|
progress("Generating dynamic seeds from setup description...");
|
|
12417
12922
|
const dynamicConfig = {
|
|
12418
|
-
apiKey:
|
|
12419
|
-
// Seed gen always routes through Archal backend
|
|
12923
|
+
apiKey: config.apiKey,
|
|
12420
12924
|
model: config.seedModel,
|
|
12421
12925
|
baseUrl: config.baseUrl,
|
|
12422
12926
|
noCache: options.noSeedCache,
|
|
12423
|
-
providerMode:
|
|
12927
|
+
providerMode: config.seedProvider
|
|
12424
12928
|
};
|
|
12425
12929
|
let cloudSeedSnapshotByTwin = null;
|
|
12426
12930
|
const adminAuth = options.apiAdminToken ? { token: options.apiAdminToken, userId: options.apiAdminUserId } : void 0;
|
|
@@ -12438,20 +12942,28 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12438
12942
|
baseSeedData = normalizeSeedState(cloudSeedSnapshotByTwin[sel.twinName]);
|
|
12439
12943
|
}
|
|
12440
12944
|
if (!baseSeedData || Object.keys(baseSeedData).length === 0) {
|
|
12441
|
-
|
|
12945
|
+
return buildInconclusiveSeedReport(
|
|
12442
12946
|
`Could not load base seed "${sel.seedName}" for twin "${sel.twinName}" from disk. Ensure the seed file exists at twins/${sel.twinName}/seeds/${sel.seedName}.json or .sql, or that the hosted twin /state endpoint is reachable.`
|
|
12443
12947
|
);
|
|
12444
12948
|
}
|
|
12445
12949
|
progress(`Generating dynamic seed for ${sel.twinName}...`);
|
|
12446
|
-
|
|
12447
|
-
|
|
12448
|
-
|
|
12449
|
-
|
|
12450
|
-
|
|
12451
|
-
|
|
12452
|
-
|
|
12453
|
-
|
|
12454
|
-
|
|
12950
|
+
let result;
|
|
12951
|
+
try {
|
|
12952
|
+
result = await generateDynamicSeed(
|
|
12953
|
+
sel.twinName,
|
|
12954
|
+
sel.seedName,
|
|
12955
|
+
baseSeedData,
|
|
12956
|
+
scenario.setup,
|
|
12957
|
+
dynamicConfig,
|
|
12958
|
+
extractedIntentByTwin.get(sel.twinName),
|
|
12959
|
+
seedPromptContext
|
|
12960
|
+
);
|
|
12961
|
+
} catch (error2) {
|
|
12962
|
+
const detail = error2 instanceof Error ? error2.message : String(error2);
|
|
12963
|
+
return buildInconclusiveSeedReport(
|
|
12964
|
+
`Dynamic seed generation failed for twin "${sel.twinName}": ${detail}`
|
|
12965
|
+
);
|
|
12966
|
+
}
|
|
12455
12967
|
sel.seedData = result.seed;
|
|
12456
12968
|
if (result.fromCache) {
|
|
12457
12969
|
cachedSeedTwins.push(sel.twinName);
|
|
@@ -12467,15 +12979,21 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12467
12979
|
}
|
|
12468
12980
|
const missingDynamicSeeds = seedSelections.filter((sel) => !sel.seedData);
|
|
12469
12981
|
if (missingDynamicSeeds.length > 0) {
|
|
12470
|
-
|
|
12982
|
+
return buildInconclusiveSeedReport(
|
|
12471
12983
|
`Missing dynamic seed state for twin(s): ${missingDynamicSeeds.map((sel) => sel.twinName).join(", ")}`
|
|
12472
12984
|
);
|
|
12473
12985
|
}
|
|
12474
12986
|
for (const sel of seedSelections) {
|
|
12475
12987
|
const mismatches = verifySeedCounts(scenario.setup, sel.seedData);
|
|
12476
12988
|
if (mismatches.length === 0) continue;
|
|
12989
|
+
const significantMismatches = mismatches.filter((m) => {
|
|
12990
|
+
const delta = Math.abs(m.expected - m.actual);
|
|
12991
|
+
const ratio = m.expected > 0 ? delta / m.expected : delta;
|
|
12992
|
+
return delta > 5 || ratio > 0.5;
|
|
12993
|
+
});
|
|
12994
|
+
if (significantMismatches.length === 0) continue;
|
|
12477
12995
|
warn(
|
|
12478
|
-
`Seed count mismatch for ${sel.twinName}: ${
|
|
12996
|
+
`Seed count mismatch for ${sel.twinName}: ${significantMismatches.map((m) => `${m.subject}: expected ${m.expected}, got ${m.actual}`).join("; ")}`
|
|
12479
12997
|
);
|
|
12480
12998
|
}
|
|
12481
12999
|
const scenarioDir = dirname2(resolve4(options.scenarioPath));
|
|
@@ -12656,8 +13174,8 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12656
13174
|
return {
|
|
12657
13175
|
scenarioTitle: scenario.title,
|
|
12658
13176
|
satisfactionScore: 100,
|
|
12659
|
-
criterionDescriptions
|
|
12660
|
-
criterionTypes
|
|
13177
|
+
criterionDescriptions,
|
|
13178
|
+
criterionTypes,
|
|
12661
13179
|
twinNames: scenario.config.twins,
|
|
12662
13180
|
runs: [],
|
|
12663
13181
|
summary: "Preflight checks passed",
|
|
@@ -12667,7 +13185,7 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12667
13185
|
printHeader(scenario.title, seedSelections);
|
|
12668
13186
|
const evaluatorProvider = detectProvider(model);
|
|
12669
13187
|
const configProvider = detectProvider(config.model);
|
|
12670
|
-
const evaluatorApiKey = options.model && evaluatorProvider !== configProvider ? resolveProviderApiKey("", evaluatorProvider) : resolveProviderApiKey(config.apiKey, evaluatorProvider);
|
|
13188
|
+
const evaluatorApiKey = config.evaluatorProvider === "archal" ? "" : options.model && evaluatorProvider !== configProvider ? resolveProviderApiKey("", evaluatorProvider) : resolveProviderApiKey(config.apiKey, evaluatorProvider);
|
|
12671
13189
|
const evaluatorConfig = {
|
|
12672
13190
|
apiKey: evaluatorApiKey,
|
|
12673
13191
|
model,
|
|
@@ -12696,8 +13214,8 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12696
13214
|
adminAuth
|
|
12697
13215
|
);
|
|
12698
13216
|
runs.push(result);
|
|
12699
|
-
printRunProgress(i, numRuns, result.overallScore, result.error);
|
|
12700
|
-
if (result.
|
|
13217
|
+
printRunProgress(i, numRuns, result.overallScore, result.error, result.outcome);
|
|
13218
|
+
if (result.outcome === "inconclusive_infrastructure" || result.outcome === "inconclusive_seed") {
|
|
12701
13219
|
consecutiveInfraErrors++;
|
|
12702
13220
|
if (consecutiveInfraErrors >= EARLY_ABORT_THRESHOLD && i < numRuns - 1) {
|
|
12703
13221
|
warn(`${consecutiveInfraErrors} consecutive run errors \u2014 aborting remaining ${numRuns - i - 1} run(s) to avoid wasting quota.`);
|
|
@@ -12707,19 +13225,17 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12707
13225
|
consecutiveInfraErrors = 0;
|
|
12708
13226
|
}
|
|
12709
13227
|
}
|
|
12710
|
-
const
|
|
13228
|
+
const scoredRuns = runs.filter(
|
|
13229
|
+
(run) => run.outcome !== "inconclusive_infrastructure" && run.outcome !== "inconclusive_seed"
|
|
13230
|
+
);
|
|
13231
|
+
const runScores = scoredRuns.map((r) => r.overallScore);
|
|
12711
13232
|
const satisfactionScore = aggregateSatisfaction(runScores);
|
|
12712
|
-
const allEvaluations =
|
|
12713
|
-
const
|
|
12714
|
-
const
|
|
12715
|
-
const criterionTypes = {};
|
|
12716
|
-
for (const c of scenario.successCriteria) {
|
|
12717
|
-
criterionDescriptions[c.id] = c.description;
|
|
12718
|
-
criterionTypes[c.id] = c.type;
|
|
12719
|
-
}
|
|
13233
|
+
const allEvaluations = scoredRuns.map((r) => r.evaluations);
|
|
13234
|
+
const inconclusiveRuns = runs.length - scoredRuns.length;
|
|
13235
|
+
const summary = scoredRuns.length > 0 ? generateSummary(allEvaluations, satisfactionScore) : `Inconclusive: no scored runs (${inconclusiveRuns} infrastructure/seed setup run failure${inconclusiveRuns === 1 ? "" : "s"}).`;
|
|
12720
13236
|
let failureAnalysis;
|
|
12721
|
-
if (satisfactionScore < 100 &&
|
|
12722
|
-
const representativeRun =
|
|
13237
|
+
if (satisfactionScore < 100 && scoredRuns.length > 0 && !options.noFailureAnalysis) {
|
|
13238
|
+
const representativeRun = scoredRuns.reduce(
|
|
12723
13239
|
(worst, r) => r.overallScore < worst.overallScore ? r : worst
|
|
12724
13240
|
);
|
|
12725
13241
|
const failedCriteria = representativeRun.evaluations.filter((e) => e.status !== "pass").map((e) => ({
|
|
@@ -12742,7 +13258,9 @@ Pass --allow-ambiguous-seed to opt into best-effort generation.`;
|
|
|
12742
13258
|
stateDiff: representativeRun.stateDiff ?? { added: {}, modified: {}, removed: {} },
|
|
12743
13259
|
stateBefore: representativeRun.stateBefore ?? {},
|
|
12744
13260
|
stateAfter: representativeRun.stateAfter ?? {},
|
|
12745
|
-
satisfactionScore
|
|
13261
|
+
satisfactionScore,
|
|
13262
|
+
agentLog: representativeRun.agentLog,
|
|
13263
|
+
agentError: representativeRun.error
|
|
12746
13264
|
},
|
|
12747
13265
|
evaluatorConfig
|
|
12748
13266
|
);
|
|
@@ -13521,7 +14039,21 @@ function createRunCommand() {
|
|
|
13521
14039
|
}
|
|
13522
14040
|
}
|
|
13523
14041
|
if (!process.env["ARCHAL_ENGINE_API_KEY"] && userConfig.engineApiKey) {
|
|
13524
|
-
|
|
14042
|
+
const configKey = userConfig.engineApiKey;
|
|
14043
|
+
const requestedModel = firstNonEmpty(
|
|
14044
|
+
opts.engineModel,
|
|
14045
|
+
process.env["ARCHAL_ENGINE_MODEL"],
|
|
14046
|
+
opts.model
|
|
14047
|
+
// -m also defaults the engine model for local harnesses
|
|
14048
|
+
);
|
|
14049
|
+
if (requestedModel) {
|
|
14050
|
+
const modelProvider = detectProvider(requestedModel);
|
|
14051
|
+
if (!validateKeyForProvider(configKey, modelProvider)) {
|
|
14052
|
+
process.env["ARCHAL_ENGINE_API_KEY"] = configKey;
|
|
14053
|
+
}
|
|
14054
|
+
} else {
|
|
14055
|
+
process.env["ARCHAL_ENGINE_API_KEY"] = configKey;
|
|
14056
|
+
}
|
|
13525
14057
|
}
|
|
13526
14058
|
}
|
|
13527
14059
|
inferEngineModelFromEvaluatorModel(opts);
|
|
@@ -13572,8 +14104,17 @@ function createRunCommand() {
|
|
|
13572
14104
|
}
|
|
13573
14105
|
}
|
|
13574
14106
|
if (engine.mode === "local" && !process.env["ARCHAL_ENGINE_API_KEY"]) {
|
|
14107
|
+
const requestedModel = firstNonEmpty(
|
|
14108
|
+
opts.engineModel,
|
|
14109
|
+
process.env["ARCHAL_ENGINE_MODEL"]
|
|
14110
|
+
);
|
|
14111
|
+
const provider = requestedModel ? detectProvider(requestedModel) : null;
|
|
14112
|
+
const providerHint = provider ? `
|
|
14113
|
+
Hint: You requested model "${requestedModel}" (${provider}) but no ${provider} API key is available.
|
|
14114
|
+
Set ${getProviderEnvVar(provider)} or pass --engine-key <${provider}-key>
|
|
14115
|
+
` : "";
|
|
13575
14116
|
process.stderr.write(
|
|
13576
|
-
"Error: No API key found. The agent harness needs an API key to call the model.\nSet one of:\n GEMINI_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY env var\n archal config set engine.apiKey <key>\n ARCHAL_ENGINE_API_KEY env var\n"
|
|
14117
|
+
"Error: No API key found. The agent harness needs an API key to call the model.\nSet one of:\n GEMINI_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY env var\n archal config set engine.apiKey <key>\n ARCHAL_ENGINE_API_KEY env var\n" + providerHint
|
|
13577
14118
|
);
|
|
13578
14119
|
process.exit(2);
|
|
13579
14120
|
}
|
|
@@ -13643,12 +14184,14 @@ function createRunCommand() {
|
|
|
13643
14184
|
})();
|
|
13644
14185
|
const SESSION_READY_TIMEOUT_MS = Math.max(12e4, configuredReadyTimeoutMs);
|
|
13645
14186
|
const SESSION_POLL_INTERVAL_MS = 2e3;
|
|
13646
|
-
const STATUS_READY_GRACE_MS = 5e3;
|
|
13647
14187
|
const readyDeadline = Date.now() + SESSION_READY_TIMEOUT_MS;
|
|
13648
14188
|
let sessionReady = false;
|
|
13649
14189
|
let lastPollIssue;
|
|
13650
|
-
let statusReadySinceMs = null;
|
|
13651
14190
|
const isRetryablePollFailure = (result) => result.offline || typeof result.status === "number" && result.status >= 500;
|
|
14191
|
+
const workersAllReady = (workers) => {
|
|
14192
|
+
if (!workers || Object.keys(workers).length === 0) return true;
|
|
14193
|
+
return Object.values(workers).every((value) => value === "ready");
|
|
14194
|
+
};
|
|
13652
14195
|
const sleepForPollInterval = async () => new Promise((resolve12) => setTimeout(resolve12, SESSION_POLL_INTERVAL_MS));
|
|
13653
14196
|
if (!opts.quiet) process.stderr.write("Starting cloud session...\n");
|
|
13654
14197
|
let pollCount = 0;
|
|
@@ -13703,26 +14246,19 @@ function createRunCommand() {
|
|
|
13703
14246
|
}
|
|
13704
14247
|
const healthAlive = healthResult.ok && healthResult.data.alive;
|
|
13705
14248
|
const statusAlive = statusResult.data.alive || status === "ready";
|
|
13706
|
-
|
|
14249
|
+
const statusWorkersReady = workersAllReady(
|
|
14250
|
+
statusResult.data.twins ?? statusResult.data.workers
|
|
14251
|
+
);
|
|
14252
|
+
const healthWorkersReady = workersAllReady(healthResult.data.twins);
|
|
14253
|
+
if (statusAlive && healthAlive && statusWorkersReady && healthWorkersReady) {
|
|
13707
14254
|
sessionReady = true;
|
|
13708
14255
|
break;
|
|
13709
14256
|
}
|
|
13710
|
-
|
|
13711
|
-
|
|
13712
|
-
|
|
13713
|
-
|
|
13714
|
-
|
|
13715
|
-
if (readyForMs >= STATUS_READY_GRACE_MS) {
|
|
13716
|
-
debug(
|
|
13717
|
-
`Session ${backendSessionId} proceeded after health endpoint warmup (${readyForMs}ms).`
|
|
13718
|
-
);
|
|
13719
|
-
sessionReady = true;
|
|
13720
|
-
break;
|
|
13721
|
-
}
|
|
13722
|
-
} else {
|
|
13723
|
-
statusReadySinceMs = null;
|
|
13724
|
-
}
|
|
13725
|
-
lastPollIssue = `session still starting (status=${status}, health=${healthAlive ? "alive" : "starting"})`;
|
|
14257
|
+
const statusTwinStates = Object.entries(
|
|
14258
|
+
statusResult.data.twins ?? statusResult.data.workers ?? {}
|
|
14259
|
+
).map(([twin, twinStatus]) => `${twin}:${twinStatus}`).join(", ");
|
|
14260
|
+
const healthTwinStates = Object.entries(healthResult.data.twins ?? {}).map(([twin, twinStatus]) => `${twin}:${twinStatus}`).join(", ");
|
|
14261
|
+
lastPollIssue = `session still starting (status=${status}, health=${healthAlive ? "alive" : "starting"}, statusTwins=[${statusTwinStates || "n/a"}], healthTwins=[${healthTwinStates || "n/a"}])`;
|
|
13726
14262
|
await sleepForPollInterval();
|
|
13727
14263
|
}
|
|
13728
14264
|
if (sessionReady) {
|
|
@@ -14123,6 +14659,7 @@ function buildEvidenceArtifacts(report) {
|
|
|
14123
14659
|
overallScore: run.overallScore,
|
|
14124
14660
|
durationMs: run.durationMs,
|
|
14125
14661
|
error: run.error ?? null,
|
|
14662
|
+
outcome: run.outcome ?? null,
|
|
14126
14663
|
evaluations: (run.evaluations ?? []).map((ev) => ({
|
|
14127
14664
|
criterionId: ev.criterionId,
|
|
14128
14665
|
status: ev.status,
|
|
@@ -14442,7 +14979,7 @@ import { createInterface as createInterface2 } from "readline";
|
|
|
14442
14979
|
import { Command as Command5 } from "commander";
|
|
14443
14980
|
|
|
14444
14981
|
// src/telemetry/anonymizer.ts
|
|
14445
|
-
import { createHash as
|
|
14982
|
+
import { createHash as createHash5 } from "crypto";
|
|
14446
14983
|
var API_KEY_PATTERNS = [
|
|
14447
14984
|
/(?:api[_-]?key|token|secret|password|authorization|bearer|credential)\s*[:=]\s*["']?([a-zA-Z0-9_\-/.+=]{16,})["']?/gi,
|
|
14448
14985
|
/sk-[a-zA-Z0-9]{20,}/g,
|
|
@@ -14492,7 +15029,7 @@ var USERNAME_FIELDS = /* @__PURE__ */ new Set([
|
|
|
14492
15029
|
"maintainer"
|
|
14493
15030
|
]);
|
|
14494
15031
|
function hashValue2(value, salt = "archal") {
|
|
14495
|
-
return `anon_${
|
|
15032
|
+
return `anon_${createHash5("sha256").update(`${salt}:${value}`).digest("hex").slice(0, 12)}`;
|
|
14496
15033
|
}
|
|
14497
15034
|
function anonymizeForEnterprise(entries) {
|
|
14498
15035
|
debug("Enterprise anonymization", { entryCount: String(entries.length) });
|
|
@@ -15468,7 +16005,7 @@ function createDoctorCommand() {
|
|
|
15468
16005
|
// src/commands/login.ts
|
|
15469
16006
|
import { Command as Command8 } from "commander";
|
|
15470
16007
|
import { exec } from "child_process";
|
|
15471
|
-
import { createHash as
|
|
16008
|
+
import { createHash as createHash6, randomBytes as randomBytes2 } from "crypto";
|
|
15472
16009
|
import { createServer } from "http";
|
|
15473
16010
|
var START_PORT = 51423;
|
|
15474
16011
|
var LOGIN_TIMEOUT_MS = 5 * 60 * 1e3;
|
|
@@ -15489,7 +16026,7 @@ function openBrowser(url) {
|
|
|
15489
16026
|
}
|
|
15490
16027
|
function createPkcePair() {
|
|
15491
16028
|
const codeVerifier = randomBytes2(32).toString("base64url");
|
|
15492
|
-
const codeChallenge =
|
|
16029
|
+
const codeChallenge = createHash6("sha256").update(codeVerifier).digest("base64url");
|
|
15493
16030
|
return { codeVerifier, codeChallenge };
|
|
15494
16031
|
}
|
|
15495
16032
|
function isPlan2(value) {
|
|
@@ -16219,11 +16756,25 @@ function detectProviderName(model) {
|
|
|
16219
16756
|
if (normalized.startsWith("gpt-") || normalized.startsWith("o1-") || normalized.startsWith("o3-") || normalized.startsWith("o4-")) return "OpenAI";
|
|
16220
16757
|
return "OpenAI-compatible";
|
|
16221
16758
|
}
|
|
16222
|
-
function resolveEngineApiKey(explicitKey) {
|
|
16759
|
+
function resolveEngineApiKey(explicitKey, model) {
|
|
16223
16760
|
if (explicitKey?.trim()) return explicitKey.trim();
|
|
16224
16761
|
if (process.env["ARCHAL_ENGINE_API_KEY"]?.trim()) return process.env["ARCHAL_ENGINE_API_KEY"].trim();
|
|
16762
|
+
const modelProvider = model ? detectProvider(model) : null;
|
|
16225
16763
|
const config = loadConfig();
|
|
16226
|
-
if (config.engineApiKey)
|
|
16764
|
+
if (config.engineApiKey) {
|
|
16765
|
+
if (!modelProvider || !validateKeyForProvider(config.engineApiKey, modelProvider)) {
|
|
16766
|
+
return config.engineApiKey;
|
|
16767
|
+
}
|
|
16768
|
+
}
|
|
16769
|
+
const providerEnvVars = {
|
|
16770
|
+
gemini: "GEMINI_API_KEY",
|
|
16771
|
+
openai: "OPENAI_API_KEY",
|
|
16772
|
+
anthropic: "ANTHROPIC_API_KEY"
|
|
16773
|
+
};
|
|
16774
|
+
if (modelProvider && providerEnvVars[modelProvider]) {
|
|
16775
|
+
const val = process.env[providerEnvVars[modelProvider]]?.trim();
|
|
16776
|
+
if (val) return val;
|
|
16777
|
+
}
|
|
16227
16778
|
for (const envVar of ["GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY"]) {
|
|
16228
16779
|
const val = process.env[envVar]?.trim();
|
|
16229
16780
|
if (val) return val;
|
|
@@ -16272,7 +16823,7 @@ function createDemoCommand() {
|
|
|
16272
16823
|
process.exit(1);
|
|
16273
16824
|
}
|
|
16274
16825
|
const providerName = detectProviderName(opts.model);
|
|
16275
|
-
const engineApiKey = resolveEngineApiKey(opts.apiKey);
|
|
16826
|
+
const engineApiKey = resolveEngineApiKey(opts.apiKey, opts.model);
|
|
16276
16827
|
if (!engineApiKey) {
|
|
16277
16828
|
process.stderr.write(
|
|
16278
16829
|
`Error: No API key found for model "${opts.model}" (${providerName}).
|