@warmdrift/kgauto-compiler 2.0.0-alpha.7 → 2.0.0-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -15,7 +15,7 @@ import {
15
15
  getProfile,
16
16
  profilesByProvider,
17
17
  tryGetProfile
18
- } from "./chunk-MBEI5UOM.mjs";
18
+ } from "./chunk-3KVKELZN.mjs";
19
19
 
20
20
  // src/tokenizer.ts
21
21
  var tokenizerImpl = defaultCharBasedCounter;
@@ -408,7 +408,11 @@ function lowerAnthropic(ir, profile, hints) {
408
408
  system: systemBlocks,
409
409
  messages,
410
410
  tools,
411
- max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
411
+ // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
412
+ // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
413
+ // Profile is the single source of truth; consumers wanting a tighter
414
+ // budget can pass providerOverrides.anthropic.max_tokens explicitly.
415
+ max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
412
416
  },
413
417
  diagnostics: {
414
418
  cacheableTokens,
@@ -687,12 +691,13 @@ function setNestedField(obj, path, value) {
687
691
  }
688
692
 
689
693
  // src/advisor.ts
690
- function runAdvisor(ir, result, profile) {
694
+ function runAdvisor(ir, result, profile, policy) {
691
695
  const out = [];
692
696
  out.push(...detectCachingOff(ir, profile));
693
697
  out.push(...detectSingleChunkSystem(ir, profile));
694
698
  out.push(...detectToolBloat(ir, result));
695
699
  out.push(...detectHistoryUncached(ir, profile));
700
+ out.push(...detectSingleModelArray(ir, policy));
696
701
  return out;
697
702
  }
698
703
  function detectCachingOff(ir, profile) {
@@ -764,6 +769,20 @@ function detectHistoryUncached(ir, profile) {
764
769
  }
765
770
  ];
766
771
  }
772
+ function detectSingleModelArray(ir, policy) {
773
+ if (ir.models.length !== 1) return [];
774
+ if (policy?.posture === "locked") return [];
775
+ const only = ir.models[0];
776
+ return [
777
+ {
778
+ level: "warn",
779
+ code: "single-model-array",
780
+ message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
781
+ suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
782
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
783
+ }
784
+ ];
785
+ }
767
786
 
768
787
  // src/compile.ts
769
788
  var counter = 0;
@@ -836,7 +855,8 @@ function compile(ir, opts = {}) {
836
855
  tokensIn: inputTokens,
837
856
  diagnostics
838
857
  },
839
- profile
858
+ profile,
859
+ opts.policy
840
860
  );
841
861
  return {
842
862
  handle,
@@ -1288,44 +1308,48 @@ async function call(ir, opts = {}) {
1288
1308
  fetchImpl: opts.fetchImpl,
1289
1309
  providerOverrides: opts.providerOverrides
1290
1310
  });
1291
- if (exec.ok) {
1311
+ const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
1312
+ if (validated.ok) {
1292
1313
  attempts.push({ model: targetModel, status: "success" });
1293
1314
  const latencyMs2 = Date.now() - start;
1294
- const responseWithStructured = withStructuredOutput(exec.response, ir);
1295
1315
  await record({
1296
1316
  handle: initial.handle,
1297
- tokensIn: responseWithStructured.tokens.input,
1298
- tokensOut: responseWithStructured.tokens.output,
1317
+ tokensIn: validated.response.tokens.input,
1318
+ tokensOut: validated.response.tokens.output,
1299
1319
  latencyMs: latencyMs2,
1300
1320
  success: true,
1301
- emptyResponse: responseWithStructured.tokens.output === 0,
1302
- toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
1321
+ emptyResponse: validated.response.tokens.output === 0,
1322
+ toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
1303
1323
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1304
1324
  mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
1305
1325
  promptPreview: extractPromptPreview(ir),
1306
- responsePreview: responseWithStructured.text.slice(0, 200),
1307
- cacheReadInputTokens: responseWithStructured.tokens.cached,
1308
- cacheCreationInputTokens: responseWithStructured.tokens.cacheCreated
1326
+ responsePreview: validated.response.text.slice(0, 200),
1327
+ cacheReadInputTokens: validated.response.tokens.cached,
1328
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated
1309
1329
  });
1330
+ const fellOver = targetModel !== initial.target;
1310
1331
  return {
1311
1332
  handle: initial.handle,
1312
1333
  actualModel: targetModel,
1313
1334
  requestedModel: initial.target,
1314
1335
  provider: activeCompile.provider,
1315
- response: responseWithStructured,
1336
+ response: validated.response,
1316
1337
  latencyMs: latencyMs2,
1317
1338
  mutationsApplied: activeCompile.mutationsApplied,
1318
- attempts
1339
+ attempts,
1340
+ servedBy: targetModel,
1341
+ fellOverFrom: fellOver ? initial.target : void 0,
1342
+ fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0
1319
1343
  };
1320
1344
  }
1321
1345
  attempts.push({
1322
1346
  model: targetModel,
1323
- status: exec.errorType,
1324
- errorCode: exec.errorCode,
1325
- message: exec.message
1347
+ status: validated.errorType,
1348
+ errorCode: validated.errorCode,
1349
+ message: validated.message
1326
1350
  });
1327
- lastErr = exec;
1328
- if (exec.errorType === "terminal" || opts.noFallback) {
1351
+ lastErr = validated;
1352
+ if (validated.errorType === "terminal" || opts.noFallback) {
1329
1353
  break;
1330
1354
  }
1331
1355
  }
@@ -1362,20 +1386,49 @@ function extractPromptPreview(ir) {
1362
1386
  if (lastHist) return lastHist.slice(0, 200);
1363
1387
  return void 0;
1364
1388
  }
1365
- function withStructuredOutput(response, ir) {
1366
- if (!ir.constraints?.structuredOutput) return response;
1367
- if (!response.text) return response;
1389
+ function validateStructuredContract(exec, ir) {
1390
+ if (!ir.constraints?.structuredOutput) {
1391
+ return { ok: true, response: exec.response };
1392
+ }
1393
+ const finish = (exec.response.finishReason ?? "").toLowerCase();
1394
+ if (finish === "max_tokens" || finish === "length") {
1395
+ return {
1396
+ ok: false,
1397
+ status: exec.status,
1398
+ errorType: "retryable",
1399
+ errorCode: "max_tokens_on_structured_output",
1400
+ message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
1401
+ raw: exec.response.raw
1402
+ };
1403
+ }
1404
+ if (!exec.response.text) {
1405
+ return { ok: true, response: exec.response };
1406
+ }
1368
1407
  try {
1369
- const parsed = JSON.parse(response.text);
1370
- return { ...response, structuredOutput: parsed };
1408
+ const parsed = JSON.parse(exec.response.text);
1409
+ return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
1371
1410
  } catch (err) {
1372
1411
  return {
1373
- ...response,
1374
- structuredOutput: null,
1375
- parseError: err instanceof Error ? err.message : String(err)
1412
+ ok: false,
1413
+ status: exec.status,
1414
+ errorType: "retryable",
1415
+ errorCode: "structured_output_parse_failed",
1416
+ message: err instanceof Error ? err.message : String(err),
1417
+ raw: exec.response.raw
1376
1418
  };
1377
1419
  }
1378
1420
  }
1421
+ function normalizeFallbackReason(attempts) {
1422
+ const first = attempts.find((a) => a.status !== "success");
1423
+ if (!first) return void 0;
1424
+ const code = first.errorCode ?? "";
1425
+ if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
1426
+ if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
1427
+ return "cliff";
1428
+ }
1429
+ if (code === "cost_cap_exceeded") return "cost_cap";
1430
+ return "provider_error";
1431
+ }
1379
1432
 
1380
1433
  // src/oracle.ts
1381
1434
  var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
@@ -1465,6 +1518,126 @@ function clamp(n) {
1465
1518
  return Math.max(0, Math.min(1, n));
1466
1519
  }
1467
1520
 
1521
+ // src/fallback.ts
1522
+ var STARTER_CHAINS = {
1523
+ // Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
1524
+ critique: [
1525
+ "claude-opus-4-7",
1526
+ "claude-sonnet-4-6",
1527
+ "gemini-2.5-pro"
1528
+ ],
1529
+ // Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
1530
+ // to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
1531
+ plan: [
1532
+ "claude-sonnet-4-6",
1533
+ "claude-opus-4-7",
1534
+ "gemini-2.5-pro",
1535
+ "deepseek-v4-pro"
1536
+ ],
1537
+ // Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
1538
+ // Flash floor for the open-posture chain.
1539
+ generate: [
1540
+ "claude-sonnet-4-6",
1541
+ "claude-haiku-4-5",
1542
+ "gemini-2.5-pro",
1543
+ "gemini-2.5-flash"
1544
+ ],
1545
+ ask: [
1546
+ "claude-sonnet-4-6",
1547
+ "claude-haiku-4-5",
1548
+ "gemini-2.5-pro",
1549
+ "gemini-2.5-flash"
1550
+ ],
1551
+ // Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
1552
+ // DeepSeek skipped (no brain evidence). Floor at Haiku.
1553
+ extract: [
1554
+ "claude-sonnet-4-6",
1555
+ "claude-haiku-4-5",
1556
+ "gemini-2.5-pro"
1557
+ ],
1558
+ // Forgiving archetype — Sonnet primary but Flash safely floors it.
1559
+ transform: [
1560
+ "claude-sonnet-4-6",
1561
+ "claude-haiku-4-5",
1562
+ "gemini-2.5-pro",
1563
+ "gemini-2.5-flash"
1564
+ ],
1565
+ // Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
1566
+ // Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
1567
+ // Haiku (reduced tool budget — cliff at 16 fires).
1568
+ hunt: [
1569
+ "gemini-2.5-flash",
1570
+ "gemini-2.5-pro",
1571
+ "claude-sonnet-4-6",
1572
+ "claude-haiku-4-5"
1573
+ ],
1574
+ // Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
1575
+ // for quality safety; Flash-Lite emergency floor (onboarded s22).
1576
+ summarize: [
1577
+ "gemini-2.5-flash",
1578
+ "deepseek-v4-flash",
1579
+ "claude-haiku-4-5",
1580
+ "gemini-2.5-flash-lite"
1581
+ ],
1582
+ // Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
1583
+ // Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
1584
+ classify: [
1585
+ "gemini-2.5-flash",
1586
+ "deepseek-v4-flash",
1587
+ "claude-haiku-4-5",
1588
+ "gemini-2.5-flash-lite"
1589
+ ]
1590
+ };
1591
+ function getDefaultFallbackChain(opts) {
1592
+ const { archetype, primary, maxDepth = 3, policy } = opts;
1593
+ if (maxDepth < 1) {
1594
+ throw new Error(
1595
+ `getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
1596
+ );
1597
+ }
1598
+ const starter = STARTER_CHAINS[archetype];
1599
+ if (!starter) {
1600
+ throw new Error(
1601
+ `getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(STARTER_CHAINS).join(", ")}`
1602
+ );
1603
+ }
1604
+ let chain;
1605
+ if (primary) {
1606
+ chain = [primary, ...starter.filter((id) => id !== primary)];
1607
+ } else {
1608
+ chain = [...starter];
1609
+ }
1610
+ if (policy?.blockedModels && policy.blockedModels.length > 0) {
1611
+ const blocked = new Set(policy.blockedModels);
1612
+ chain = chain.filter((id) => !blocked.has(id));
1613
+ }
1614
+ const seen = /* @__PURE__ */ new Set();
1615
+ const deduped = [];
1616
+ for (const id of chain) {
1617
+ if (!seen.has(id)) {
1618
+ seen.add(id);
1619
+ deduped.push(id);
1620
+ }
1621
+ }
1622
+ return deduped.slice(0, maxDepth);
1623
+ }
1624
+ function getStarterChain(archetype) {
1625
+ const chain = STARTER_CHAINS[archetype];
1626
+ if (!chain) {
1627
+ throw new Error(
1628
+ `getStarterChain: unknown archetype "${archetype}"`
1629
+ );
1630
+ }
1631
+ return [...chain];
1632
+ }
1633
+ function getAllStarterChains() {
1634
+ const out = {};
1635
+ for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
1636
+ out[archetype] = [...chain];
1637
+ }
1638
+ return out;
1639
+ }
1640
+
1468
1641
  // src/index.ts
1469
1642
  function compile2(ir, opts) {
1470
1643
  const result = compile(ir, opts);
@@ -1488,7 +1661,10 @@ export {
1488
1661
  configureBrain,
1489
1662
  countTokens,
1490
1663
  execute,
1664
+ getAllStarterChains,
1665
+ getDefaultFallbackChain,
1491
1666
  getProfile,
1667
+ getStarterChain,
1492
1668
  hashShape,
1493
1669
  isArchetype,
1494
1670
  learningKey,
@@ -159,6 +159,27 @@ interface CompilePolicy {
159
159
  * differences but not large enough to override hard rejects).
160
160
  */
161
161
  preferredModels?: string[];
162
+ /**
163
+ * Customer-posture tag (master plan §1.2, alpha.9).
164
+ *
165
+ * - `'locked'` — compliance/contract/brand-promise. Caller passes
166
+ * exactly one model; no fallback is desired. kgauto
167
+ * never walks the chain.
168
+ * - `'preferred'` — user-selected primary, fallback chain as safety
169
+ * net. On 429/5xx, walk the chain and surface
170
+ * `fellOverFrom` so the consumer can show "Claude
171
+ * was busy; we used Pro for this answer."
172
+ * - `'open'` — library picks the chain. Model identity is
173
+ * irrelevant; output is the contract.
174
+ *
175
+ * The field is **informational** — kgauto's execution path is already
176
+ * determined by the shape of `ir.models`. Posture surfaces in
177
+ * telemetry so the cost-watcher can distinguish "locked failed, no
178
+ * fallback was tried" from "open chain exhausted." Default: when
179
+ * `ir.models.length === 1` posture is treated as `'locked'` by the
180
+ * advisor; otherwise unspecified.
181
+ */
182
+ posture?: 'locked' | 'preferred' | 'open';
162
183
  }
163
184
  /**
164
185
  * The IR — the input to compile().
@@ -418,6 +439,18 @@ interface CallAttempt {
418
439
  errorCode?: string;
419
440
  message?: string;
420
441
  }
442
+ /**
443
+ * Why fallback fired. Normalized for `CallResult.fallbackReason` (alpha.9).
444
+ *
445
+ * - `rate_limit` provider returned 429
446
+ * - `provider_error` 5xx, network, or other retryable upstream issue
447
+ * - `cost_cap` preflight policy.maxCostPerCallUsd rejected target
448
+ * - `cliff` alpha.8 contract violation (MAX_TOKENS on
449
+ * structured output, parse-failed JSON)
450
+ * - `contract_violation` other compile-time-contract failures (reserved
451
+ * for alpha.10+ — e.g. mid-stream policy rejects)
452
+ */
453
+ type FallbackReason = 'rate_limit' | 'provider_error' | 'cost_cap' | 'cliff' | 'contract_violation';
421
454
  interface CallResult {
422
455
  /** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
423
456
  handle: string;
@@ -432,6 +465,28 @@ interface CallResult {
432
465
  mutationsApplied: MutationApplied[];
433
466
  /** One entry per provider attempt — observability for retry/fallback walks. */
434
467
  attempts: CallAttempt[];
468
+ /**
469
+ * Alpha.9 normalization of fallback-walk telemetry. When the chain
470
+ * succeeded on the first attempt, these collapse to:
471
+ * - `servedBy === requestedModel`
472
+ * - `fellOverFrom` undefined
473
+ * - `fallbackReason` undefined
474
+ *
475
+ * When fallback fired:
476
+ * - `servedBy` = `actualModel` (the model that produced the response)
477
+ * - `fellOverFrom` = `requestedModel` (what the caller / compile() asked for)
478
+ * - `fallbackReason` = normalized cause derived from the first
479
+ * non-success attempt's `errorCode`
480
+ *
481
+ * Consumer UX use: show "Claude was busy; we used Pro for this answer"
482
+ * when `fellOverFrom` is set (master plan §3.6).
483
+ */
484
+ /** Model that actually answered. Equal to `actualModel`; kept distinct for clarity. */
485
+ servedBy: string;
486
+ /** Set only when fallback fired. Equal to `requestedModel` in that case. */
487
+ fellOverFrom?: string;
488
+ /** Set only when fallback fired. Normalized cause. */
489
+ fallbackReason?: FallbackReason;
435
490
  }
436
491
  /**
437
492
  * Thrown when call() exhausts the fallback chain without success.
@@ -609,6 +664,32 @@ interface ModelProfile {
609
664
  weaknesses: string[];
610
665
  notes?: string;
611
666
  verifiedAgainstDocs?: string;
667
+ /**
668
+ * Hand-curated per-archetype performance score on a 0-10 scale.
669
+ *
670
+ * 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
671
+ * 8 = strong second tier (Sonnet on plan, Pro on extract)
672
+ * 7 = competent (Haiku on classify, Flash on hunt)
673
+ * 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
674
+ * 3 = degraded (Flash on critique, DeepSeek on hunt)
675
+ *
676
+ * Missing archetypes default to `5` (no data, neutral). Each non-default
677
+ * value should carry a one-line rationale in the profile's note or inline
678
+ * comment citing brain evidence, family prior, or "starter hypothesis —
679
+ * verify with telemetry."
680
+ *
681
+ * Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
682
+ * Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
683
+ *
684
+ * Anti-hallucination guardrail (master plan §2.5): when the watcher's
685
+ * `--audit-fields` flag flags a profile stale (>90 days since
686
+ * verifiedAgainstDocs), the archetypePerf values get re-audited
687
+ * alongside capability fields. AI-trained intuition is NOT a valid
688
+ * source — only docs or brain evidence.
689
+ *
690
+ * alpha.9.
691
+ */
692
+ archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
612
693
  }
613
694
  declare const ALIASES: Record<string, string>;
614
695
  declare function getProfile(id: string): ModelProfile;
@@ -616,4 +697,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
616
697
  declare function allProfiles(): readonly ModelProfile[];
617
698
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
618
699
 
619
- export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
700
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -159,6 +159,27 @@ interface CompilePolicy {
159
159
  * differences but not large enough to override hard rejects).
160
160
  */
161
161
  preferredModels?: string[];
162
+ /**
163
+ * Customer-posture tag (master plan §1.2, alpha.9).
164
+ *
165
+ * - `'locked'` — compliance/contract/brand-promise. Caller passes
166
+ * exactly one model; no fallback is desired. kgauto
167
+ * never walks the chain.
168
+ * - `'preferred'` — user-selected primary, fallback chain as safety
169
+ * net. On 429/5xx, walk the chain and surface
170
+ * `fellOverFrom` so the consumer can show "Claude
171
+ * was busy; we used Pro for this answer."
172
+ * - `'open'` — library picks the chain. Model identity is
173
+ * irrelevant; output is the contract.
174
+ *
175
+ * The field is **informational** — kgauto's execution path is already
176
+ * determined by the shape of `ir.models`. Posture surfaces in
177
+ * telemetry so the cost-watcher can distinguish "locked failed, no
178
+ * fallback was tried" from "open chain exhausted." Default: when
179
+ * `ir.models.length === 1` posture is treated as `'locked'` by the
180
+ * advisor; otherwise unspecified.
181
+ */
182
+ posture?: 'locked' | 'preferred' | 'open';
162
183
  }
163
184
  /**
164
185
  * The IR — the input to compile().
@@ -418,6 +439,18 @@ interface CallAttempt {
418
439
  errorCode?: string;
419
440
  message?: string;
420
441
  }
442
+ /**
443
+ * Why fallback fired. Normalized for `CallResult.fallbackReason` (alpha.9).
444
+ *
445
+ * - `rate_limit` provider returned 429
446
+ * - `provider_error` 5xx, network, or other retryable upstream issue
447
+ * - `cost_cap` preflight policy.maxCostPerCallUsd rejected target
448
+ * - `cliff` alpha.8 contract violation (MAX_TOKENS on
449
+ * structured output, parse-failed JSON)
450
+ * - `contract_violation` other compile-time-contract failures (reserved
451
+ * for alpha.10+ — e.g. mid-stream policy rejects)
452
+ */
453
+ type FallbackReason = 'rate_limit' | 'provider_error' | 'cost_cap' | 'cliff' | 'contract_violation';
421
454
  interface CallResult {
422
455
  /** Compile handle (still valid for record() if consumer wants to add oracle scores later). */
423
456
  handle: string;
@@ -432,6 +465,28 @@ interface CallResult {
432
465
  mutationsApplied: MutationApplied[];
433
466
  /** One entry per provider attempt — observability for retry/fallback walks. */
434
467
  attempts: CallAttempt[];
468
+ /**
469
+ * Alpha.9 normalization of fallback-walk telemetry. When the chain
470
+ * succeeded on the first attempt, these collapse to:
471
+ * - `servedBy === requestedModel`
472
+ * - `fellOverFrom` undefined
473
+ * - `fallbackReason` undefined
474
+ *
475
+ * When fallback fired:
476
+ * - `servedBy` = `actualModel` (the model that produced the response)
477
+ * - `fellOverFrom` = `requestedModel` (what the caller / compile() asked for)
478
+ * - `fallbackReason` = normalized cause derived from the first
479
+ * non-success attempt's `errorCode`
480
+ *
481
+ * Consumer UX use: show "Claude was busy; we used Pro for this answer"
482
+ * when `fellOverFrom` is set (master plan §3.6).
483
+ */
484
+ /** Model that actually answered. Equal to `actualModel`; kept distinct for clarity. */
485
+ servedBy: string;
486
+ /** Set only when fallback fired. Equal to `requestedModel` in that case. */
487
+ fellOverFrom?: string;
488
+ /** Set only when fallback fired. Normalized cause. */
489
+ fallbackReason?: FallbackReason;
435
490
  }
436
491
  /**
437
492
  * Thrown when call() exhausts the fallback chain without success.
@@ -609,6 +664,32 @@ interface ModelProfile {
609
664
  weaknesses: string[];
610
665
  notes?: string;
611
666
  verifiedAgainstDocs?: string;
667
+ /**
668
+ * Hand-curated per-archetype performance score on a 0-10 scale.
669
+ *
670
+ * 10 = frontier on this archetype (e.g. Opus 4.7 on critique)
671
+ * 8 = strong second tier (Sonnet on plan, Pro on extract)
672
+ * 7 = competent (Haiku on classify, Flash on hunt)
673
+ * 5 = acceptable for tolerant archetypes (Flash-Lite on classify)
674
+ * 3 = degraded (Flash on critique, DeepSeek on hunt)
675
+ *
676
+ * Missing archetypes default to `5` (no data, neutral). Each non-default
677
+ * value should carry a one-line rationale in the profile's note or inline
678
+ * comment citing brain evidence, family prior, or "starter hypothesis —
679
+ * verify with telemetry."
680
+ *
681
+ * Source today: hand-curated from master plan §3.3 + §6.2 starter tables.
682
+ * Source tomorrow (alpha.10+): brain `archetype_model_evidence` view.
683
+ *
684
+ * Anti-hallucination guardrail (master plan §2.5): when the watcher's
685
+ * `--audit-fields` flag flags a profile stale (>90 days since
686
+ * verifiedAgainstDocs), the archetypePerf values get re-audited
687
+ * alongside capability fields. AI-trained intuition is NOT a valid
688
+ * source — only docs or brain evidence.
689
+ *
690
+ * alpha.9.
691
+ */
692
+ archetypePerf?: Partial<Record<IntentArchetypeName, number>>;
612
693
  }
613
694
  declare const ALIASES: Record<string, string>;
614
695
  declare function getProfile(id: string): ModelProfile;
@@ -616,4 +697,4 @@ declare function tryGetProfile(id: string): ModelProfile | undefined;
616
697
  declare function allProfiles(): readonly ModelProfile[];
617
698
  declare function profilesByProvider(provider: Provider): readonly ModelProfile[];
618
699
 
619
- export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
700
+ export { type ApiKeys as A, type BestPracticeAdvisory as B, type CompilePolicy as C, type FallbackReason as F, type HistoryCachePolicy as H, type IntentDeclaration as I, type LoweringSpec as L, type ModelProfile as M, type NormalizedResponse as N, type OracleScore as O, type ProviderOverrides as P, type RecordInput as R, type StructuredOutputCapability as S, type ToolCall as T, type CompiledRequest as a, type PromptIR as b, type CallOptions as c, type CallResult as d, type CompileResult as e, ALIASES as f, type CacheStrategy as g, type CallAttempt as h, CallError as i, type CliffRule as j, type Constraints as k, type Message as l, type MutationApplied as m, type NormalizedTokens as n, type PromptSection as o, type Provider as p, type RecoveryRule as q, type SystemPromptMode as r, type ToolDefinition as s, allProfiles as t, getProfile as u, profilesByProvider as v, tryGetProfile as w };
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-NUZOIzGr.mjs';
2
2
  import './dialect.mjs';
@@ -1,2 +1,2 @@
1
- export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
1
+ export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-BYVOc1eW.js';
2
2
  import './dialect.js';