@warmdrift/kgauto-compiler 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -269,33 +269,69 @@ function passCompressHistory(ir, opts = {}) {
269
269
  const summarizeAboveTokens = opts.summarizeAboveTokens;
270
270
  const historyTokensTotal = totalHistoryTokens(history);
271
271
  const countThresholdHit = history.length > summarizeOlderThan;
272
- const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
272
+ const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens;
273
273
  if (!countThresholdHit && !tokenThresholdHit) {
274
274
  return { value: ir, mutations: [], historyTokensTotal };
275
275
  }
276
- const cutIndex = history.length - keepRecent;
277
- const old = history.slice(0, cutIndex);
278
- const recent = history.slice(cutIndex);
279
- const userTurns = old.filter((m) => m.role === "user");
280
- const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
281
- const oldTokens = totalHistoryTokens(old);
282
- const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
283
- const summary = {
284
- role: "system",
285
- content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
286
- };
287
- return {
288
- value: { ...ir, history: [summary, ...recent] },
289
- mutations: [
290
- {
291
- id: `compress-history-${old.length}`,
292
- source: "static_pass",
293
- passName: "compress_history",
294
- description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
276
+ if (history.length > keepRecent) {
277
+ const cutIndex = history.length - keepRecent;
278
+ const old = history.slice(0, cutIndex);
279
+ const recent = history.slice(cutIndex);
280
+ const userTurns = old.filter((m) => m.role === "user");
281
+ const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
282
+ const oldTokens = totalHistoryTokens(old);
283
+ const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
284
+ const summary = {
285
+ role: "system",
286
+ content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
287
+ };
288
+ return {
289
+ value: { ...ir, history: [summary, ...recent] },
290
+ mutations: [
291
+ {
292
+ id: `compress-history-${old.length}`,
293
+ source: "static_pass",
294
+ passName: "compress_history",
295
+ description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
296
+ }
297
+ ],
298
+ historyTokensTotal
299
+ };
300
+ }
301
+ if (tokenThresholdHit) {
302
+ let fattestIdx = -1;
303
+ let fattestTokens = 0;
304
+ for (let i = 0; i < history.length; i++) {
305
+ const m = history[i];
306
+ if (!m || typeof m.content !== "string") continue;
307
+ const t = countTokens(m.content);
308
+ if (t > fattestTokens) {
309
+ fattestTokens = t;
310
+ fattestIdx = i;
295
311
  }
296
- ],
297
- historyTokensTotal
298
- };
312
+ }
313
+ const FAT_DOMINANCE_FLOOR = 0.3;
314
+ const fattest = fattestIdx >= 0 ? history[fattestIdx] : void 0;
315
+ if (fattest && historyTokensTotal > 0 && fattestTokens / historyTokensTotal >= FAT_DOMINANCE_FLOOR) {
316
+ const firstLine = fattest.content.split("\n")[0]?.slice(0, 200) ?? "";
317
+ const newContent = `[Earlier ${fattest.role} message content omitted: ~${fattestTokens} tokens. Preview: "${firstLine}"]`;
318
+ const newHistory = history.slice();
319
+ newHistory[fattestIdx] = { ...fattest, content: newContent };
320
+ return {
321
+ value: { ...ir, history: newHistory },
322
+ mutations: [
323
+ {
324
+ id: `compress-fat-message-${fattestIdx}`,
325
+ source: "static_pass",
326
+ passName: "compress_history",
327
+ description: `Replaced fat ${fattest.role} message #${fattestIdx} content (~${fattestTokens} of ${historyTokensTotal} tokens, ${Math.round(fattestTokens / historyTokensTotal * 100)}% of history) with summary stub \u2014 token threshold ${summarizeAboveTokens} exceeded (history.length ${history.length} <= keepRecent ${keepRecent}, slice not possible)`
328
+ }
329
+ ],
330
+ historyTokensTotal
331
+ };
332
+ }
333
+ }
334
+ return { value: ir, mutations: [], historyTokensTotal };
299
335
  }
300
336
  function passApplyCliffs(ir, profile, estimatedInputTokens) {
301
337
  const mutations = [];
@@ -1438,6 +1474,211 @@ var PROFILES_RAW = [
1438
1474
  hunt: 4
1439
1475
  // sequential tools — same as V4-Flash
1440
1476
  }
1477
+ },
1478
+ // ── Auto-onboarded (UNVERIFIED) ──
1479
+ // Cloned by scripts/auto-onboard-models.mjs from a same-family template.
1480
+ // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
1481
+ // provider docs. Verify before promoting status to 'current' (L-049/L-081).
1482
+ {
1483
+ id: "gemini-3-flash-preview",
1484
+ verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
1485
+ provider: "google",
1486
+ status: "preview",
1487
+ maxContextTokens: 1048576,
1488
+ maxOutputTokens: 65535,
1489
+ maxTools: 128,
1490
+ parallelToolCalls: true,
1491
+ structuredOutput: "native",
1492
+ systemPromptMode: "separate",
1493
+ streaming: true,
1494
+ cliffs: [
1495
+ {
1496
+ metric: "input_tokens",
1497
+ threshold: 8e3,
1498
+ action: "downgrade_quality_warning",
1499
+ reason: "Quality degrades significantly above ~8K context tokens"
1500
+ },
1501
+ {
1502
+ metric: "tool_count",
1503
+ threshold: 20,
1504
+ action: "drop_to_top_relevant",
1505
+ reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
1506
+ },
1507
+ {
1508
+ metric: "thinking_with_short_output",
1509
+ threshold: 1,
1510
+ action: "force_thinking_budget_zero",
1511
+ reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
1512
+ },
1513
+ {
1514
+ // s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
1515
+ // tt-intelligence/summarize/gemini-2.5-flash with tools offered.
1516
+ // v1's disable_thinking_for_short_output already fired and didn't
1517
+ // help — disabling thinking is necessary but not sufficient. Tools
1518
+ // present + summarize intent confuses Flash into a no-output state
1519
+ // (likely tool-decision purgatory). Strip tools entirely for this
1520
+ // archetype on this model.
1521
+ metric: "tool_count",
1522
+ threshold: 1,
1523
+ whenIntent: "summarize",
1524
+ action: "strip_tools",
1525
+ reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
1526
+ }
1527
+ ],
1528
+ costInputPer1m: 0.3,
1529
+ costOutputPer1m: 2.5,
1530
+ lowering: {
1531
+ ...GOOGLE_LOWERING_BASE,
1532
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1533
+ },
1534
+ recovery: [
1535
+ {
1536
+ signal: "empty_response_after_tool",
1537
+ action: "retry_with_params",
1538
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1539
+ maxRetries: 1,
1540
+ reason: "Known: empty after tool result \u2014 retry with thinking off"
1541
+ },
1542
+ {
1543
+ signal: "empty_response",
1544
+ action: "retry_with_params",
1545
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1546
+ maxRetries: 1,
1547
+ reason: "Empty response \u2014 try with thinking off"
1548
+ },
1549
+ {
1550
+ signal: "malformed_function_call",
1551
+ action: "escalate",
1552
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
1553
+ }
1554
+ ],
1555
+ strengths: ["speed", "volume", "classification", "1m_context", "cost"],
1556
+ weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
1557
+ notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
1558
+ // Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
1559
+ // 15-75 calls/step beats Sonnet — L-040), summarize, classify.
1560
+ archetypePerf: {
1561
+ hunt: 9,
1562
+ // L-040: parallel tool throughput 15-75/step
1563
+ classify: 7,
1564
+ // brain-validated, 218 rows
1565
+ summarize: 7,
1566
+ // brain-validated; cliff strips tools when present
1567
+ transform: 7,
1568
+ ask: 7,
1569
+ generate: 6,
1570
+ plan: 5,
1571
+ extract: 6,
1572
+ // alpha.8 MAX_TOKENS history on structured output
1573
+ critique: 4
1574
+ // reasoning shallower than Sonnet/Opus
1575
+ }
1576
+ },
1577
+ {
1578
+ // ── Gemini 2.5 Flash-Lite ──
1579
+ // Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
1580
+ // it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
1581
+ // stable. Positioned BELOW Flash on the cost/perf frontier:
1582
+ // input $0.10/M (Flash $0.30/M) — 3× cheaper
1583
+ // output $0.40/M (Flash $2.50/M) — 6× cheaper
1584
+ // cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
1585
+ // Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
1586
+ // is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
1587
+ // thresholds. The brain will validate/relax these as evidence accumulates
1588
+ // per (archetype, model) tuple. Currently ZERO brain rows for this model.
1589
+ id: "gemini-3.1-flash-lite",
1590
+ verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
1591
+ provider: "google",
1592
+ status: "preview",
1593
+ maxContextTokens: 1048576,
1594
+ maxOutputTokens: 65536,
1595
+ maxTools: 128,
1596
+ parallelToolCalls: true,
1597
+ structuredOutput: "native",
1598
+ systemPromptMode: "separate",
1599
+ streaming: true,
1600
+ cliffs: [
1601
+ {
1602
+ metric: "input_tokens",
1603
+ threshold: 8e3,
1604
+ action: "downgrade_quality_warning",
1605
+ reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
1606
+ },
1607
+ {
1608
+ metric: "tool_count",
1609
+ threshold: 10,
1610
+ action: "drop_to_top_relevant",
1611
+ reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
1612
+ },
1613
+ {
1614
+ metric: "thinking_with_short_output",
1615
+ threshold: 1,
1616
+ action: "force_thinking_budget_zero",
1617
+ reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
1618
+ },
1619
+ {
1620
+ // Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
1621
+ // trust artifact, kgauto commit 3872832). Flash-Lite shares the
1622
+ // same architectural family — almost certainly inherits this cliff.
1623
+ // Ship the guard preemptively; brain telemetry confirms or relaxes.
1624
+ metric: "tool_count",
1625
+ threshold: 1,
1626
+ whenIntent: "summarize",
1627
+ action: "strip_tools",
1628
+ reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
1629
+ }
1630
+ ],
1631
+ costInputPer1m: 0.1,
1632
+ costOutputPer1m: 0.4,
1633
+ lowering: {
1634
+ ...GOOGLE_LOWERING_BASE,
1635
+ // Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
1636
+ // $0.10/M input. Material for repeat-prompt workloads (classify shape).
1637
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
1638
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1639
+ },
1640
+ recovery: [
1641
+ {
1642
+ signal: "empty_response_after_tool",
1643
+ action: "retry_with_params",
1644
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1645
+ maxRetries: 1,
1646
+ reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
1647
+ },
1648
+ {
1649
+ signal: "empty_response",
1650
+ action: "retry_with_params",
1651
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1652
+ maxRetries: 1,
1653
+ reason: "Empty response \u2014 try with thinking off."
1654
+ },
1655
+ {
1656
+ signal: "malformed_function_call",
1657
+ action: "escalate",
1658
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
1659
+ }
1660
+ ],
1661
+ strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
1662
+ weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
1663
+ notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash-lite`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
1664
+ // Tier 3 emergency floor for summarize/classify chains. ZERO brain
1665
+ // rows — all values are starter hypotheses anchored to "smaller
1666
+ // sibling of Flash, at-or-below Flash perf on every archetype." The
1667
+ // first 50 brain rows per archetype will validate or relax these.
1668
+ archetypePerf: {
1669
+ classify: 6,
1670
+ // starter hypothesis — verify (Flash is 7, lite likely ≤)
1671
+ summarize: 6,
1672
+ // starter hypothesis — verify; cliff strips tools
1673
+ transform: 6,
1674
+ // starter hypothesis — verify
1675
+ ask: 5,
1676
+ hunt: 5,
1677
+ generate: 4,
1678
+ extract: 4,
1679
+ plan: 3,
1680
+ critique: 3
1681
+ }
1441
1682
  }
1442
1683
  ];
1443
1684
  var ALIASES = {
@@ -2288,56 +2529,257 @@ function tryParseJson(s) {
2288
2529
  }
2289
2530
  }
2290
2531
 
2291
- // src/call.ts
2292
- async function call(ir, opts = {}) {
2293
- const initial = compileAndRegister(ir, opts);
2294
- const start = Date.now();
2295
- const attempts = [];
2296
- const rawTargets = [initial.target, ...initial.fallbackChain];
2297
- let unreachableFiltered;
2298
- let targetsToTry;
2299
- if (opts.noAutoFilter) {
2300
- targetsToTry = rawTargets;
2301
- } else {
2302
- const dropped = [];
2303
- targetsToTry = [];
2304
- for (const t of rawTargets) {
2305
- if (isModelReachable(t, { apiKeys: opts.apiKeys })) {
2306
- targetsToTry.push(t);
2307
- } else {
2308
- dropped.push(t);
2309
- }
2310
- }
2311
- unreachableFiltered = dropped;
2312
- if (targetsToTry.length === 0) {
2313
- const latencyMs2 = Date.now() - start;
2314
- await record({
2315
- handle: initial.handle,
2316
- tokensIn: 0,
2317
- tokensOut: 0,
2318
- latencyMs: latencyMs2,
2319
- success: false,
2320
- errorType: "no_reachable_models",
2321
- promptPreview: extractPromptPreview(ir)
2322
- });
2323
- const noReachableAttempts = dropped.map((m) => ({
2324
- model: m,
2325
- status: "terminal",
2326
- errorCode: "unreachable_provider",
2327
- message: `No API key for ${m}'s provider \u2014 set one of PROVIDER_ENV_KEYS or pass apiKeys`
2328
- }));
2329
- throw new CallError(
2330
- `call(): no reachable models in chain. Filtered: [${dropped.join(", ")}]. Add a key for one provider, or pass apiKeys.`,
2331
- noReachableAttempts,
2332
- void 0,
2333
- "no_reachable_models"
2532
+ // src/chains-brain.ts
2533
+ function isChainsRow(x) {
2534
+ if (!x || typeof x !== "object") return false;
2535
+ const r = x;
2536
+ return typeof r.archetype === "string" && typeof r.tier === "number" && typeof r.model_id === "string";
2537
+ }
2538
+ function mapRowsToChains(rows) {
2539
+ const grouped = /* @__PURE__ */ new Map();
2540
+ for (const row of rows) {
2541
+ if (!isChainsRow(row)) continue;
2542
+ const list = grouped.get(row.archetype) ?? [];
2543
+ list.push(row);
2544
+ grouped.set(row.archetype, list);
2545
+ }
2546
+ const out = {};
2547
+ for (const [archetype, group] of grouped.entries()) {
2548
+ group.sort((a, b) => a.tier - b.tier);
2549
+ out[archetype] = group.map((r) => r.model_id);
2550
+ }
2551
+ const bundled = getAllStarterChains();
2552
+ for (const archetype of Object.keys(bundled)) {
2553
+ if (!out[archetype]) out[archetype] = bundled[archetype];
2554
+ }
2555
+ return out;
2556
+ }
2557
+ var loadChainsFromBrain = createBrainQueryCache({
2558
+ table: "kgauto_chains",
2559
+ mapRows: mapRowsToChains,
2560
+ bundledFallback: getAllStarterChains
2561
+ });
2562
+
2563
+ // src/fallback.ts
2564
+ var STARTER_CHAINS = {
2565
+ // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
2566
+ critique: [
2567
+ "claude-opus-4-7",
2568
+ "claude-sonnet-4-6",
2569
+ "gemini-2.5-pro"
2570
+ ],
2571
+ // Reasoning matters Sonnet primary; walk UP to Opus on 429 (rare exception
2572
+ // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
2573
+ plan: [
2574
+ "claude-sonnet-4-6",
2575
+ "claude-opus-4-7",
2576
+ "gemini-2.5-pro",
2577
+ "deepseek-v4-pro"
2578
+ ],
2579
+ // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
2580
+ // Flash floor for the open-posture chain.
2581
+ generate: [
2582
+ "claude-sonnet-4-6",
2583
+ "claude-haiku-4-5",
2584
+ "gemini-2.5-pro",
2585
+ "gemini-2.5-flash"
2586
+ ],
2587
+ ask: [
2588
+ "claude-sonnet-4-6",
2589
+ "claude-haiku-4-5",
2590
+ "gemini-2.5-pro",
2591
+ "gemini-2.5-flash"
2592
+ ],
2593
+ // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
2594
+ // DeepSeek skipped (no brain evidence). Floor at Haiku.
2595
+ extract: [
2596
+ "claude-sonnet-4-6",
2597
+ "claude-haiku-4-5",
2598
+ "gemini-2.5-pro"
2599
+ ],
2600
+ // Forgiving archetype — Sonnet primary but Flash safely floors it.
2601
+ transform: [
2602
+ "claude-sonnet-4-6",
2603
+ "claude-haiku-4-5",
2604
+ "gemini-2.5-pro",
2605
+ "gemini-2.5-flash"
2606
+ ],
2607
+ // Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
2608
+ // Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
2609
+ // Haiku (reduced tool budget — cliff at 16 fires).
2610
+ hunt: [
2611
+ "gemini-2.5-flash",
2612
+ "gemini-2.5-pro",
2613
+ "claude-sonnet-4-6",
2614
+ "claude-haiku-4-5"
2615
+ ],
2616
+ // Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
2617
+ // for quality safety; Flash-Lite emergency floor (onboarded s22).
2618
+ summarize: [
2619
+ "gemini-2.5-flash",
2620
+ "deepseek-v4-flash",
2621
+ "claude-haiku-4-5",
2622
+ "gemini-2.5-flash-lite"
2623
+ ],
2624
+ // Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
2625
+ // Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
2626
+ classify: [
2627
+ "gemini-2.5-flash",
2628
+ "deepseek-v4-flash",
2629
+ "claude-haiku-4-5",
2630
+ "gemini-2.5-flash-lite"
2631
+ ]
2632
+ };
2633
+ function getDefaultFallbackChain(opts) {
2634
+ const { archetype, primary, maxDepth = 3, policy, reachability } = opts;
2635
+ if (maxDepth < 1) {
2636
+ throw new Error(
2637
+ `getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
2638
+ );
2639
+ }
2640
+ const allChains = loadChainsFromBrain();
2641
+ const starter = allChains[archetype];
2642
+ if (!starter) {
2643
+ throw new Error(
2644
+ `getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(allChains).join(", ")}`
2645
+ );
2646
+ }
2647
+ let chain;
2648
+ if (primary) {
2649
+ chain = [primary, ...starter.filter((id) => id !== primary)];
2650
+ } else {
2651
+ chain = [...starter];
2652
+ }
2653
+ if (policy?.blockedModels && policy.blockedModels.length > 0) {
2654
+ const blocked = new Set(policy.blockedModels);
2655
+ chain = chain.filter((id) => !blocked.has(id));
2656
+ }
2657
+ const seen = /* @__PURE__ */ new Set();
2658
+ const deduped = [];
2659
+ for (const id of chain) {
2660
+ if (!seen.has(id)) {
2661
+ seen.add(id);
2662
+ deduped.push(id);
2663
+ }
2664
+ }
2665
+ let filtered = deduped;
2666
+ if (reachability) {
2667
+ filtered = deduped.filter((id) => isModelReachable(id, reachability));
2668
+ }
2669
+ return filtered.slice(0, maxDepth);
2670
+ }
2671
+ function getStarterChain(archetype) {
2672
+ const chain = STARTER_CHAINS[archetype];
2673
+ if (!chain) {
2674
+ throw new Error(
2675
+ `getStarterChain: unknown archetype "${archetype}"`
2676
+ );
2677
+ }
2678
+ return [...chain];
2679
+ }
2680
+ function getAllStarterChains() {
2681
+ const out = {};
2682
+ for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
2683
+ out[archetype] = [...chain];
2684
+ }
2685
+ return out;
2686
+ }
2687
+ function ensureCrossProviderTail(opts) {
2688
+ const { chain, archetype, apiKeys, envSource } = opts;
2689
+ if (chain.length < 1) return { chain };
2690
+ const providers = /* @__PURE__ */ new Set();
2691
+ for (const t of chain) {
2692
+ const p = tryGetProfile(t);
2693
+ if (p) providers.add(p.provider);
2694
+ }
2695
+ if (providers.size >= 2) return { chain };
2696
+ const existingProvider = providers.values().next().value;
2697
+ if (!existingProvider) return { chain };
2698
+ const allChains = loadChainsFromBrain();
2699
+ const fullChain = allChains[archetype];
2700
+ if (!fullChain) return { chain };
2701
+ for (const candidate of fullChain) {
2702
+ if (chain.includes(candidate)) continue;
2703
+ const cp = tryGetProfile(candidate);
2704
+ if (!cp || cp.provider === existingProvider) continue;
2705
+ if (!isModelReachable(candidate, { apiKeys, envSource })) continue;
2706
+ return { chain: [...chain, candidate], appended: candidate };
2707
+ }
2708
+ return { chain };
2709
+ }
2710
+
2711
+ // src/call.ts
2712
+ async function call(ir, opts = {}) {
2713
+ const initial = compileAndRegister(ir, opts);
2714
+ const start = Date.now();
2715
+ const attempts = [];
2716
+ const rawTargets = [initial.target, ...initial.fallbackChain];
2717
+ let unreachableFiltered;
2718
+ let targetsToTry;
2719
+ if (opts.noAutoFilter) {
2720
+ targetsToTry = rawTargets;
2721
+ } else {
2722
+ const dropped = [];
2723
+ targetsToTry = [];
2724
+ for (const t of rawTargets) {
2725
+ if (isModelReachable(t, { apiKeys: opts.apiKeys })) {
2726
+ targetsToTry.push(t);
2727
+ } else {
2728
+ dropped.push(t);
2729
+ }
2730
+ }
2731
+ unreachableFiltered = dropped;
2732
+ if (targetsToTry.length === 0) {
2733
+ const latencyMs2 = Date.now() - start;
2734
+ await record({
2735
+ handle: initial.handle,
2736
+ tokensIn: 0,
2737
+ tokensOut: 0,
2738
+ latencyMs: latencyMs2,
2739
+ success: false,
2740
+ errorType: "no_reachable_models",
2741
+ promptPreview: extractPromptPreview(ir)
2742
+ });
2743
+ const noReachableAttempts = dropped.map((m) => ({
2744
+ model: m,
2745
+ status: "terminal",
2746
+ errorCode: "unreachable_provider",
2747
+ message: `No API key for ${m}'s provider \u2014 set one of PROVIDER_ENV_KEYS or pass apiKeys`
2748
+ }));
2749
+ throw new CallError(
2750
+ `call(): no reachable models in chain. Filtered: [${dropped.join(", ")}]. Add a key for one provider, or pass apiKeys.`,
2751
+ noReachableAttempts,
2752
+ void 0,
2753
+ "no_reachable_models"
2334
2754
  );
2335
2755
  }
2756
+ const archetypeName = ir.intent?.archetype;
2757
+ if (archetypeName) {
2758
+ const ensured = ensureCrossProviderTail({
2759
+ chain: targetsToTry,
2760
+ archetype: archetypeName,
2761
+ apiKeys: opts.apiKeys
2762
+ });
2763
+ if (ensured.appended) {
2764
+ targetsToTry = ensured.chain;
2765
+ }
2766
+ }
2336
2767
  }
2337
2768
  let activeCompile = initial;
2338
2769
  let lastErr;
2770
+ const failedProviders = /* @__PURE__ */ new Set();
2339
2771
  for (let i = 0; i < targetsToTry.length; i++) {
2340
2772
  const targetModel = targetsToTry[i];
2773
+ const targetProfile = tryGetProfile(targetModel);
2774
+ if (targetProfile && failedProviders.has(targetProfile.provider) && !opts.noFallback) {
2775
+ attempts.push({
2776
+ model: targetModel,
2777
+ status: "terminal",
2778
+ errorCode: "auth_inferred",
2779
+ message: `Skipped \u2014 provider ${targetProfile.provider} returned 401/403 earlier in this call; same key inferred to fail`
2780
+ });
2781
+ continue;
2782
+ }
2341
2783
  if (targetModel !== initial.target) {
2342
2784
  try {
2343
2785
  activeCompile = compileAndRegister(
@@ -2406,6 +2848,10 @@ async function call(ir, opts = {}) {
2406
2848
  });
2407
2849
  lastErr = validated;
2408
2850
  if (validated.errorType === "terminal" || opts.noFallback) {
2851
+ if (validated.errorCode === "auth" && !opts.noFallback && activeCompile.provider) {
2852
+ failedProviders.add(activeCompile.provider);
2853
+ continue;
2854
+ }
2409
2855
  break;
2410
2856
  }
2411
2857
  }
@@ -2484,6 +2930,7 @@ function normalizeFallbackReason(attempts) {
2484
2930
  return "cliff";
2485
2931
  }
2486
2932
  if (code === "cost_cap_exceeded") return "cost_cap";
2933
+ if (code === "auth" || code === "auth_inferred") return "provider_auth_failed";
2487
2934
  return "provider_error";
2488
2935
  }
2489
2936
 
@@ -2575,162 +3022,6 @@ function clamp(n) {
2575
3022
  return Math.max(0, Math.min(1, n));
2576
3023
  }
2577
3024
 
2578
- // src/chains-brain.ts
2579
- function isChainsRow(x) {
2580
- if (!x || typeof x !== "object") return false;
2581
- const r = x;
2582
- return typeof r.archetype === "string" && typeof r.tier === "number" && typeof r.model_id === "string";
2583
- }
2584
- function mapRowsToChains(rows) {
2585
- const grouped = /* @__PURE__ */ new Map();
2586
- for (const row of rows) {
2587
- if (!isChainsRow(row)) continue;
2588
- const list = grouped.get(row.archetype) ?? [];
2589
- list.push(row);
2590
- grouped.set(row.archetype, list);
2591
- }
2592
- const out = {};
2593
- for (const [archetype, group] of grouped.entries()) {
2594
- group.sort((a, b) => a.tier - b.tier);
2595
- out[archetype] = group.map((r) => r.model_id);
2596
- }
2597
- const bundled = getAllStarterChains();
2598
- for (const archetype of Object.keys(bundled)) {
2599
- if (!out[archetype]) out[archetype] = bundled[archetype];
2600
- }
2601
- return out;
2602
- }
2603
- var loadChainsFromBrain = createBrainQueryCache({
2604
- table: "kgauto_chains",
2605
- mapRows: mapRowsToChains,
2606
- bundledFallback: getAllStarterChains
2607
- });
2608
-
2609
- // src/fallback.ts
2610
- var STARTER_CHAINS = {
2611
- // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
2612
- critique: [
2613
- "claude-opus-4-7",
2614
- "claude-sonnet-4-6",
2615
- "gemini-2.5-pro"
2616
- ],
2617
- // Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
2618
- // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
2619
- plan: [
2620
- "claude-sonnet-4-6",
2621
- "claude-opus-4-7",
2622
- "gemini-2.5-pro",
2623
- "deepseek-v4-pro"
2624
- ],
2625
- // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
2626
- // Flash floor for the open-posture chain.
2627
- generate: [
2628
- "claude-sonnet-4-6",
2629
- "claude-haiku-4-5",
2630
- "gemini-2.5-pro",
2631
- "gemini-2.5-flash"
2632
- ],
2633
- ask: [
2634
- "claude-sonnet-4-6",
2635
- "claude-haiku-4-5",
2636
- "gemini-2.5-pro",
2637
- "gemini-2.5-flash"
2638
- ],
2639
- // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
2640
- // DeepSeek skipped (no brain evidence). Floor at Haiku.
2641
- extract: [
2642
- "claude-sonnet-4-6",
2643
- "claude-haiku-4-5",
2644
- "gemini-2.5-pro"
2645
- ],
2646
- // Forgiving archetype — Sonnet primary but Flash safely floors it.
2647
- transform: [
2648
- "claude-sonnet-4-6",
2649
- "claude-haiku-4-5",
2650
- "gemini-2.5-pro",
2651
- "gemini-2.5-flash"
2652
- ],
2653
- // Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
2654
- // Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
2655
- // Haiku (reduced tool budget — cliff at 16 fires).
2656
- hunt: [
2657
- "gemini-2.5-flash",
2658
- "gemini-2.5-pro",
2659
- "claude-sonnet-4-6",
2660
- "claude-haiku-4-5"
2661
- ],
2662
- // Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
2663
- // for quality safety; Flash-Lite emergency floor (onboarded s22).
2664
- summarize: [
2665
- "gemini-2.5-flash",
2666
- "deepseek-v4-flash",
2667
- "claude-haiku-4-5",
2668
- "gemini-2.5-flash-lite"
2669
- ],
2670
- // Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
2671
- // Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
2672
- classify: [
2673
- "gemini-2.5-flash",
2674
- "deepseek-v4-flash",
2675
- "claude-haiku-4-5",
2676
- "gemini-2.5-flash-lite"
2677
- ]
2678
- };
2679
- function getDefaultFallbackChain(opts) {
2680
- const { archetype, primary, maxDepth = 3, policy, reachability } = opts;
2681
- if (maxDepth < 1) {
2682
- throw new Error(
2683
- `getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
2684
- );
2685
- }
2686
- const allChains = loadChainsFromBrain();
2687
- const starter = allChains[archetype];
2688
- if (!starter) {
2689
- throw new Error(
2690
- `getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(allChains).join(", ")}`
2691
- );
2692
- }
2693
- let chain;
2694
- if (primary) {
2695
- chain = [primary, ...starter.filter((id) => id !== primary)];
2696
- } else {
2697
- chain = [...starter];
2698
- }
2699
- if (policy?.blockedModels && policy.blockedModels.length > 0) {
2700
- const blocked = new Set(policy.blockedModels);
2701
- chain = chain.filter((id) => !blocked.has(id));
2702
- }
2703
- const seen = /* @__PURE__ */ new Set();
2704
- const deduped = [];
2705
- for (const id of chain) {
2706
- if (!seen.has(id)) {
2707
- seen.add(id);
2708
- deduped.push(id);
2709
- }
2710
- }
2711
- let filtered = deduped;
2712
- if (reachability) {
2713
- filtered = deduped.filter((id) => isModelReachable(id, reachability));
2714
- }
2715
- return filtered.slice(0, maxDepth);
2716
- }
2717
- function getStarterChain(archetype) {
2718
- const chain = STARTER_CHAINS[archetype];
2719
- if (!chain) {
2720
- throw new Error(
2721
- `getStarterChain: unknown archetype "${archetype}"`
2722
- );
2723
- }
2724
- return [...chain];
2725
- }
2726
- function getAllStarterChains() {
2727
- const out = {};
2728
- for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
2729
- out[archetype] = [...chain];
2730
- }
2731
- return out;
2732
- }
2733
-
2734
3025
  // src/archetype-perf-brain.ts
2735
3026
  function isPerfRow(x) {
2736
3027
  if (!x || typeof x !== "object") return false;