@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -9,13 +9,47 @@ import {
9
9
  isArchetype,
10
10
  learningKey
11
11
  } from "./chunk-5TI6PNSK.mjs";
12
+ import {
13
+ ABSOLUTE_FLOOR,
14
+ ARCHETYPE_FLOOR_DEFAULT,
15
+ PROVIDER_ENV_KEYS,
16
+ configureBrainQuery,
17
+ createBrainQueryCache,
18
+ ensureCrossProviderTail,
19
+ getAllStarterChains,
20
+ getAllStarterChainsWithGrounding,
21
+ getDefaultFallbackChain,
22
+ getDefaultFallbackChainWithGrounding,
23
+ getModelCompatibility,
24
+ getPerAxisMetrics,
25
+ getReachabilityDiagnostic,
26
+ getSequentialStarterChain,
27
+ getSequentialStarterChainWithGrounding,
28
+ getStarterChain,
29
+ getStarterChainWithGrounding,
30
+ isBrainQueryActiveFor,
31
+ isModelReachable,
32
+ isProviderReachable,
33
+ loadChainsFromBrain,
34
+ resolveProviderKey
35
+ } from "./chunk-WXCFWUCN.mjs";
12
36
  import {
13
37
  ALIASES,
38
+ _setProfileBrainHook,
14
39
  allProfiles,
40
+ allProfilesRaw,
15
41
  getProfile,
16
42
  profilesByProvider,
17
43
  tryGetProfile
18
- } from "./chunk-MBEI5UOM.mjs";
44
+ } from "./chunk-JQGRWJZO.mjs";
45
+ import {
46
+ emitAdvisoryFired,
47
+ emitCompileDone,
48
+ emitCompileStart,
49
+ emitExecuteAttempt,
50
+ emitExecuteSuccess,
51
+ emitFallbackWalked
52
+ } from "./chunk-NBO4R5PC.mjs";
19
53
 
20
54
  // src/tokenizer.ts
21
55
  var tokenizerImpl = defaultCharBasedCounter;
@@ -120,38 +154,96 @@ function passToolRelevance(ir, opts = {}) {
120
154
  ]
121
155
  };
122
156
  }
157
+ function totalHistoryTokens(history) {
158
+ let total = 0;
159
+ for (const m of history) {
160
+ if (typeof m.content === "string") total += countTokens(m.content);
161
+ }
162
+ return total;
163
+ }
123
164
  function passCompressHistory(ir, opts = {}) {
124
165
  const history = ir.history;
125
- if (!history || history.length === 0) return { value: ir, mutations: [] };
166
+ if (!history || history.length === 0) {
167
+ return { value: ir, mutations: [], historyTokensTotal: 0 };
168
+ }
126
169
  const keepRecent = opts.keepRecent ?? 4;
127
170
  const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
128
- if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
129
- const cutIndex = history.length - keepRecent;
130
- const old = history.slice(0, cutIndex);
131
- const recent = history.slice(cutIndex);
132
- const userTurns = old.filter((m) => m.role === "user");
133
- const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
134
- const summary = {
135
- role: "system",
136
- content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
137
- };
138
- return {
139
- value: { ...ir, history: [summary, ...recent] },
140
- mutations: [
141
- {
142
- id: `compress-history-${old.length}`,
143
- source: "static_pass",
144
- passName: "compress_history",
145
- description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
171
+ const summarizeAboveTokens = opts.summarizeAboveTokens;
172
+ const historyTokensTotal = totalHistoryTokens(history);
173
+ const countThresholdHit = history.length > summarizeOlderThan;
174
+ const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens;
175
+ if (!countThresholdHit && !tokenThresholdHit) {
176
+ return { value: ir, mutations: [], historyTokensTotal };
177
+ }
178
+ if (history.length > keepRecent) {
179
+ const cutIndex = history.length - keepRecent;
180
+ const old = history.slice(0, cutIndex);
181
+ const recent = history.slice(cutIndex);
182
+ const userTurns = old.filter((m) => m.role === "user");
183
+ const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
184
+ const oldTokens = totalHistoryTokens(old);
185
+ const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
186
+ const summary = {
187
+ role: "system",
188
+ content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
189
+ };
190
+ return {
191
+ value: { ...ir, history: [summary, ...recent] },
192
+ mutations: [
193
+ {
194
+ id: `compress-history-${old.length}`,
195
+ source: "static_pass",
196
+ passName: "compress_history",
197
+ description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
198
+ }
199
+ ],
200
+ historyTokensTotal
201
+ };
202
+ }
203
+ if (tokenThresholdHit) {
204
+ let fattestIdx = -1;
205
+ let fattestTokens = 0;
206
+ for (let i = 0; i < history.length; i++) {
207
+ const m = history[i];
208
+ if (!m || typeof m.content !== "string") continue;
209
+ const t = countTokens(m.content);
210
+ if (t > fattestTokens) {
211
+ fattestTokens = t;
212
+ fattestIdx = i;
146
213
  }
147
- ]
148
- };
214
+ }
215
+ const FAT_DOMINANCE_FLOOR = 0.3;
216
+ const fattest = fattestIdx >= 0 ? history[fattestIdx] : void 0;
217
+ if (fattest && historyTokensTotal > 0 && fattestTokens / historyTokensTotal >= FAT_DOMINANCE_FLOOR) {
218
+ const firstLine = fattest.content.split("\n")[0]?.slice(0, 200) ?? "";
219
+ const newContent = `[Earlier ${fattest.role} message content omitted: ~${fattestTokens} tokens. Preview: "${firstLine}"]`;
220
+ const newHistory = history.slice();
221
+ newHistory[fattestIdx] = { ...fattest, content: newContent };
222
+ return {
223
+ value: { ...ir, history: newHistory },
224
+ mutations: [
225
+ {
226
+ id: `compress-fat-message-${fattestIdx}`,
227
+ source: "static_pass",
228
+ passName: "compress_history",
229
+ description: `Replaced fat ${fattest.role} message #${fattestIdx} content (~${fattestTokens} of ${historyTokensTotal} tokens, ${Math.round(fattestTokens / historyTokensTotal * 100)}% of history) with summary stub \u2014 token threshold ${summarizeAboveTokens} exceeded (history.length ${history.length} <= keepRecent ${keepRecent}, slice not possible)`
230
+ }
231
+ ],
232
+ historyTokensTotal
233
+ };
234
+ }
235
+ }
236
+ return { value: ir, mutations: [], historyTokensTotal };
149
237
  }
150
238
  function passApplyCliffs(ir, profile, estimatedInputTokens) {
151
239
  const mutations = [];
152
240
  const hints = { qualityWarning: [] };
153
241
  let nextIR = ir;
242
+ const sequentialMode = nextIR.constraints?.toolOrchestration === "sequential";
154
243
  for (const cliff of profile.cliffs) {
244
+ if (sequentialMode && cliff.reason.includes("L-040")) {
245
+ continue;
246
+ }
155
247
  let triggered = false;
156
248
  switch (cliff.metric) {
157
249
  case "input_tokens":
@@ -374,10 +466,16 @@ function lower(ir, profile, hints = {}) {
374
466
  }
375
467
  function lowerAnthropic(ir, profile, hints) {
376
468
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
377
- const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
469
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
470
+ const policy = ir.historyCachePolicy;
471
+ const markIndex = resolveHistoryMarkIndex(history.length, policy);
472
+ const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
378
473
  const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
379
474
  const cacheableTokens = computeCacheableTokens(systemBlocks);
380
- const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
475
+ const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
476
+ const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
477
+ const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
478
+ const toolChoice = hints.wireOverrides?.parallelToolCalls === false && tools && tools.length > 0 ? { type: "auto", disable_parallel_tool_use: true } : void 0;
381
479
  return {
382
480
  request: {
383
481
  provider: "anthropic",
@@ -385,10 +483,16 @@ function lowerAnthropic(ir, profile, hints) {
385
483
  system: systemBlocks,
386
484
  messages,
387
485
  tools,
388
- max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
486
+ // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
487
+ // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
488
+ // Profile is the single source of truth; consumers wanting a tighter
489
+ // budget can pass providerOverrides.anthropic.max_tokens explicitly.
490
+ max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens,
491
+ tool_choice: toolChoice
389
492
  },
390
493
  diagnostics: {
391
494
  cacheableTokens,
495
+ historyCacheableTokens,
392
496
  estimatedCacheSavingsUsd: cacheSavings
393
497
  }
394
498
  };
@@ -421,17 +525,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
421
525
  }
422
526
  return blocks;
423
527
  }
424
- function buildAnthropicMessages(history, currentTurn) {
528
+ function buildAnthropicMessages(history, currentTurn, markIndex) {
425
529
  const out = [];
426
- for (const m of history) {
530
+ for (let i = 0; i < history.length; i++) {
531
+ const m = history[i];
427
532
  if (m.role === "system") continue;
428
- out.push({ role: m.role, content: m.parts ?? m.content });
533
+ const shouldMark = i === markIndex;
534
+ out.push({
535
+ role: m.role,
536
+ content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
537
+ });
429
538
  }
430
539
  if (currentTurn && currentTurn.role !== "system") {
431
540
  out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
432
541
  }
433
542
  return out;
434
543
  }
544
+ function attachAnthropicCacheControl(m) {
545
+ if (Array.isArray(m.parts) && m.parts.length > 0) {
546
+ const blocks = m.parts;
547
+ const last = blocks[blocks.length - 1];
548
+ const withMarker = {
549
+ ...last,
550
+ cache_control: { type: "ephemeral" }
551
+ };
552
+ return [...blocks.slice(0, -1), withMarker];
553
+ }
554
+ return [
555
+ {
556
+ type: "text",
557
+ text: m.content,
558
+ cache_control: { type: "ephemeral" }
559
+ }
560
+ ];
561
+ }
562
+ function resolveHistoryMarkIndex(historyLen, policy) {
563
+ if (!policy || policy.strategy === "none") return -1;
564
+ if (historyLen === 0) return -1;
565
+ if (policy.strategy === "all-but-latest") {
566
+ return historyLen - 1;
567
+ }
568
+ const idx = historyLen - 1 - policy.suffix;
569
+ return idx >= 0 ? idx : -1;
570
+ }
571
+ function sumHistoryTokens(history, throughIndex) {
572
+ let total = 0;
573
+ for (let i = 0; i <= throughIndex && i < history.length; i++) {
574
+ const m = history[i];
575
+ if (m.role === "system") continue;
576
+ if (Array.isArray(m.parts)) {
577
+ for (const p of m.parts) {
578
+ if (typeof p.text === "string") total += countTokens(p.text);
579
+ }
580
+ } else if (typeof m.content === "string") {
581
+ total += countTokens(m.content);
582
+ }
583
+ }
584
+ return total;
585
+ }
435
586
  function toAnthropicTools(tools) {
436
587
  return tools.map((t) => ({
437
588
  name: t.name,
@@ -466,6 +617,9 @@ function lowerGoogle(ir, profile, hints) {
466
617
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
467
618
  const meetsMin = cacheableTokens >= minTokens;
468
619
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
620
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
621
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
622
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
469
623
  return {
470
624
  request: {
471
625
  provider: "google",
@@ -477,6 +631,7 @@ function lowerGoogle(ir, profile, hints) {
477
631
  },
478
632
  diagnostics: {
479
633
  cacheableTokens: meetsMin ? cacheableTokens : 0,
634
+ historyCacheableTokens,
480
635
  estimatedCacheSavingsUsd: cacheSavings
481
636
  }
482
637
  };
@@ -524,6 +679,10 @@ function lowerOpenAI(ir, profile, hints) {
524
679
  content: ir.currentTurn.parts ?? ir.currentTurn.content
525
680
  });
526
681
  }
682
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
683
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
684
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
685
+ const openaiParallelToolCalls = hints.wireOverrides?.parallelToolCalls === false && ir.tools && ir.tools.length > 0 ? false : void 0;
527
686
  return {
528
687
  request: {
529
688
  provider: "openai",
@@ -531,9 +690,14 @@ function lowerOpenAI(ir, profile, hints) {
531
690
  messages,
532
691
  tools: ir.tools && ir.tools.length > 0 ? toOpenAITools(ir.tools) : void 0,
533
692
  response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
534
- reasoning_effort: hints.forceTerseOutput ? "low" : void 0
693
+ reasoning_effort: hints.forceTerseOutput ? "low" : void 0,
694
+ parallel_tool_calls: openaiParallelToolCalls
535
695
  },
536
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
696
+ diagnostics: {
697
+ cacheableTokens: 0,
698
+ historyCacheableTokens,
699
+ estimatedCacheSavingsUsd: 0
700
+ }
537
701
  };
538
702
  }
539
703
  function toOpenAITools(tools) {
@@ -560,6 +724,9 @@ function lowerDeepSeek(ir, profile) {
560
724
  content: ir.currentTurn.parts ?? ir.currentTurn.content
561
725
  });
562
726
  }
727
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
728
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
729
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
563
730
  return {
564
731
  request: {
565
732
  provider: "deepseek",
@@ -574,7 +741,11 @@ function lowerDeepSeek(ir, profile) {
574
741
  }
575
742
  })) : void 0
576
743
  },
577
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
744
+ diagnostics: {
745
+ cacheableTokens: 0,
746
+ historyCacheableTokens,
747
+ estimatedCacheSavingsUsd: 0
748
+ }
578
749
  };
579
750
  }
580
751
  function sortSections(sections) {
@@ -597,6 +768,360 @@ function setNestedField(obj, path, value) {
597
768
  cursor[parts[parts.length - 1]] = value;
598
769
  }
599
770
 
771
+ // src/archetype-perf-brain.ts
772
+ function isPerfRow(x) {
773
+ if (!x || typeof x !== "object") return false;
774
+ const r = x;
775
+ return typeof r.model_id === "string" && typeof r.archetype === "string" && typeof r.perf_score === "number";
776
+ }
777
+ function mapRowsToPerfMap(rows) {
778
+ const out = /* @__PURE__ */ new Map();
779
+ for (const row of rows) {
780
+ if (!isPerfRow(row)) continue;
781
+ const existing = out.get(row.model_id) ?? {};
782
+ existing[row.archetype] = row.perf_score;
783
+ out.set(row.model_id, existing);
784
+ }
785
+ return out;
786
+ }
787
+ function mapRowsToNMap(rows) {
788
+ const out = /* @__PURE__ */ new Map();
789
+ for (const row of rows) {
790
+ if (!isPerfRow(row)) continue;
791
+ if (typeof row.n !== "number") continue;
792
+ const existing = out.get(row.model_id) ?? {};
793
+ existing[row.archetype] = row.n;
794
+ out.set(row.model_id, existing);
795
+ }
796
+ return out;
797
+ }
798
+ function bundledArchetypePerf() {
799
+ const out = /* @__PURE__ */ new Map();
800
+ for (const profile of allProfiles()) {
801
+ if (profile.archetypePerf) out.set(profile.id, profile.archetypePerf);
802
+ }
803
+ return out;
804
+ }
805
+ function bundledArchetypePerfN() {
806
+ return /* @__PURE__ */ new Map();
807
+ }
808
+ var loadArchetypePerfFromBrain = createBrainQueryCache({
809
+ table: "kgauto_archetype_perf",
810
+ mapRows: mapRowsToPerfMap,
811
+ bundledFallback: bundledArchetypePerf
812
+ });
813
+ var loadArchetypePerfNFromBrain = createBrainQueryCache(
814
+ {
815
+ table: "kgauto_archetype_perf",
816
+ mapRows: mapRowsToNMap,
817
+ bundledFallback: bundledArchetypePerfN
818
+ }
819
+ );
820
+ var MEASURED_GROUNDING_MIN_N = 10;
821
+ function getArchetypePerfScore(modelId, archetype) {
822
+ const score = loadArchetypePerfFromBrain().get(modelId)?.[archetype] ?? 5;
823
+ const n = loadArchetypePerfNFromBrain().get(modelId)?.[archetype] ?? 0;
824
+ const grounding = n >= MEASURED_GROUNDING_MIN_N ? "measured" : "judgment";
825
+ return { score, n, grounding };
826
+ }
827
+
828
+ // src/advisor.ts
829
+ var QUALITY_FLOOR_FOR_RECOMMENDATION = 6;
830
+ var TIER_DOWN_COST_RATIO = 0.5;
831
+ var COST_MISMATCHED_CHOSEN_SCORE_CEILING = 7;
832
+ function runAdvisor(ir, result, profile, policy, phase2) {
833
+ const out = [];
834
+ out.push(...detectCachingOff(ir, profile));
835
+ out.push(...detectSingleChunkSystem(ir, profile));
836
+ out.push(...detectToolBloat(ir, result));
837
+ out.push(...detectHistoryUncached(ir, profile));
838
+ out.push(...detectSingleModelArray(ir, policy));
839
+ if (policy?.posture !== "locked") {
840
+ out.push(...detectCostMismatchedArchetype(ir, profile, phase2));
841
+ out.push(...detectModelStaleEvidence(ir, profile));
842
+ out.push(...detectTierDown(ir, profile, phase2));
843
+ }
844
+ if (!translatorClearedToolCallCliff(phase2)) {
845
+ out.push(...detectArchetypePerfFloorBreach(ir, profile));
846
+ }
847
+ return out;
848
+ }
849
+ function translatorClearedToolCallCliff(phase2) {
850
+ const rewrites = phase2?.sectionRewritesApplied;
851
+ if (!rewrites || rewrites.length === 0) return false;
852
+ for (const rw of rewrites) {
853
+ if (rw.kind === "tool_call_contract") return true;
854
+ }
855
+ return false;
856
+ }
857
+ function detectCachingOff(ir, profile) {
858
+ if (profile.provider !== "anthropic") return [];
859
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
860
+ if (totalChars < 2e3) return [];
861
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
862
+ if (anyCacheable) return [];
863
+ return [
864
+ {
865
+ level: "warn",
866
+ code: "caching-off-on-claude",
867
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
868
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
869
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
870
+ }
871
+ ];
872
+ }
873
+ function detectSingleChunkSystem(ir, profile) {
874
+ if (profile.provider !== "anthropic") return [];
875
+ if (ir.sections.length !== 1) return [];
876
+ const only = ir.sections[0];
877
+ if (!only || only.text.length <= 1e3) return [];
878
+ return [
879
+ {
880
+ level: "info",
881
+ code: "single-chunk-system",
882
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
883
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
884
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
885
+ }
886
+ ];
887
+ }
888
+ function detectToolBloat(ir, result) {
889
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
890
+ "classify",
891
+ "extract",
892
+ "summarize",
893
+ "transform",
894
+ "critique"
895
+ ]);
896
+ if (!ir.tools || ir.tools.length === 0) return [];
897
+ const toolsKept = result.diagnostics.toolsKept;
898
+ if (toolsKept <= 10) return [];
899
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
900
+ return [
901
+ {
902
+ level: "warn",
903
+ code: "tool-bloat",
904
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
905
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
906
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
907
+ }
908
+ ];
909
+ }
910
+ function detectHistoryUncached(ir, profile) {
911
+ if (profile.provider !== "anthropic") return [];
912
+ if (!ir.history || ir.history.length < 2) return [];
913
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
914
+ return [];
915
+ }
916
+ return [
917
+ {
918
+ level: "warn",
919
+ code: "history-uncached-on-claude",
920
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
921
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
922
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
923
+ }
924
+ ];
925
+ }
926
+ function detectSingleModelArray(ir, policy) {
927
+ if (ir.models.length !== 1) return [];
928
+ if (policy?.posture === "locked") return [];
929
+ const only = ir.models[0];
930
+ return [
931
+ {
932
+ level: "warn",
933
+ code: "single-model-array",
934
+ message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
935
+ suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
936
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
937
+ }
938
+ ];
939
+ }
940
+ function detectCostMismatchedArchetype(ir, profile, phase2) {
941
+ if (!phase2 || phase2.fallbackChain.length === 0) return [];
942
+ if (!phase2.profileResolver) return [];
943
+ const archetype = ir.intent.archetype;
944
+ const chosenScore = getArchetypePerfScore(profile.id, archetype);
945
+ const chosenHasRoomToGrow = chosenScore.grounding === "judgment" || chosenScore.score < COST_MISMATCHED_CHOSEN_SCORE_CEILING;
946
+ if (!chosenHasRoomToGrow) return [];
947
+ let bestAlt = null;
948
+ for (const altId of phase2.fallbackChain) {
949
+ const altProfile = phase2.profileResolver(altId);
950
+ if (!altProfile) continue;
951
+ if (altProfile.id === profile.id) continue;
952
+ const altScore = getArchetypePerfScore(altProfile.id, archetype);
953
+ if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
954
+ if (altScore.score < chosenScore.score) continue;
955
+ if (altProfile.costInputPer1m >= profile.costInputPer1m) continue;
956
+ if (!bestAlt || altScore.score > bestAlt.score.score || altScore.score === bestAlt.score.score && altProfile.costInputPer1m < bestAlt.profile.costInputPer1m) {
957
+ bestAlt = { id: altId, profile: altProfile, score: altScore };
958
+ }
959
+ }
960
+ if (!bestAlt) return [];
961
+ const tierDownWouldFire = bestAlt.score.grounding === "measured" && bestAlt.profile.costInputPer1m <= profile.costInputPer1m * TIER_DOWN_COST_RATIO;
962
+ if (tierDownWouldFire) return [];
963
+ const chosenGrounding = chosenScore.grounding === "judgment" ? `archetypePerf.${archetype}=judgment` : `archetypePerf.${archetype}=${chosenScore.score}`;
964
+ const altGrounding = bestAlt.score.grounding === "measured" ? `archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}` : `archetypePerf.${archetype}=${bestAlt.score.score}, judgment`;
965
+ return [
966
+ {
967
+ level: "warn",
968
+ code: "cost-mismatched-archetype",
969
+ message: `Cost-mismatched-archetype: target=${profile.id} (${chosenGrounding}) selected for ${archetype}. Alternative ${bestAlt.id} (${altGrounding}) is cheaper ($${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} per 1M) at equal-or-better quality.`,
970
+ suggestion: `Consider declaring \`${bestAlt.id}\` as the primary model for this archetype, or relax to posture='open' to let kgauto select among the chain. If the chosen model is required for compliance/brand reasons, set \`policy.posture = 'locked'\` to silence this rule.`,
971
+ recommendationType: profile.provider === bestAlt.profile.provider ? "tier-down" : "model-swap",
972
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
973
+ }
974
+ ];
975
+ }
976
+ function detectModelStaleEvidence(ir, profile) {
977
+ if (!isBrainQueryActiveFor("kgauto_archetype_perf")) return [];
978
+ const archetype = ir.intent.archetype;
979
+ const chosen = getArchetypePerfScore(profile.id, archetype);
980
+ if (chosen.grounding !== "judgment") return [];
981
+ return [
982
+ {
983
+ level: "info",
984
+ code: "model-stale-evidence",
985
+ message: `Model-stale-evidence: target=${profile.id} archetype=${archetype} is judgment-grounded (n=${chosen.n}) despite brain-query mode being active. Measurement substrate is wired but the brain hasn't accumulated >=10 outcomes for this (model, archetype) tuple yet \u2014 routing decisions remain pre-measured for this slot.`,
986
+ suggestion: "Verify that `record()` is being called on every call() outcome with the appropriate `actualModel` and `mutationsApplied` fields. Once the brain accumulates n>=10 rows on this tuple, the score promotes from judgment to measured automatically (5-min SWR cache). No code change required from your side \u2014 this is the substrate signaling the gap.",
987
+ recommendationType: "prompt-fix",
988
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
989
+ }
990
+ ];
991
+ }
992
+ function detectTierDown(ir, profile, phase2) {
993
+ if (!phase2 || phase2.fallbackChain.length === 0) return [];
994
+ if (!phase2.profileResolver) return [];
995
+ const archetype = ir.intent.archetype;
996
+ const chosenScore = getArchetypePerfScore(profile.id, archetype);
997
+ const chosenCost = profile.costInputPer1m;
998
+ let bestAlt = null;
999
+ for (const altId of phase2.fallbackChain) {
1000
+ const altProfile = phase2.profileResolver(altId);
1001
+ if (!altProfile) continue;
1002
+ if (altProfile.id === profile.id) continue;
1003
+ const altScore = getArchetypePerfScore(altProfile.id, archetype);
1004
+ if (altScore.grounding !== "measured") continue;
1005
+ if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
1006
+ if (altScore.score < chosenScore.score) continue;
1007
+ if (altProfile.costInputPer1m > chosenCost * TIER_DOWN_COST_RATIO) continue;
1008
+ if (!bestAlt || altProfile.costInputPer1m < bestAlt.profile.costInputPer1m || altProfile.costInputPer1m === bestAlt.profile.costInputPer1m && altScore.score > bestAlt.score.score) {
1009
+ bestAlt = { id: altId, profile: altProfile, score: altScore };
1010
+ }
1011
+ }
1012
+ if (!bestAlt) return [];
1013
+ const chosenDesc = chosenScore.grounding === "measured" ? `archetypePerf.${archetype}=${chosenScore.score} (measured, n=${chosenScore.n})` : `archetypePerf.${archetype}=${chosenScore.score} (${chosenScore.grounding})`;
1014
+ return [
1015
+ {
1016
+ level: "warn",
1017
+ code: "tier-down",
1018
+ message: `Tier-down: target=${profile.id} (${chosenDesc}) selected for ${archetype}. Brain shows ${bestAlt.id} delivers equal-or-better quality (archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}) at $${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} per 1M vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} \u2014 a measured tier-down opportunity.`,
1019
+ suggestion: `Move \`${bestAlt.id}\` to primary for this archetype. The brain has n=${bestAlt.score.n} measured outcomes backing the recommendation; this is data, not opinion. If posture='locked' is required (compliance/brand promise), set it explicitly to silence this rule.`,
1020
+ recommendationType: "tier-down",
1021
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1022
+ }
1023
+ ];
1024
+ }
1025
+ function detectArchetypePerfFloorBreach(ir, profile) {
1026
+ const compat = getModelCompatibility(profile.id, {
1027
+ archetype: ir.intent.archetype,
1028
+ toolOrchestration: ir.constraints?.toolOrchestration
1029
+ });
1030
+ if (compat.status === "compatible") return [];
1031
+ if (compat.status === "requires-adapter") {
1032
+ return [
1033
+ {
1034
+ level: "warn",
1035
+ code: "archetype-perf-floor-breach",
1036
+ message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}). A known adapter would lift it: ${compat.adapter.parameter}=${compat.adapter.value}. ${compat.adapter.consequence}`,
1037
+ suggestion: `Pass \`ir.constraints.${compat.adapter.parameter} = '${compat.adapter.value}'\` for this call, OR pick a model whose archetypePerf for ${ir.intent.archetype} already clears the floor (call \`getModelCompatibility(modelId, { archetype: '${ir.intent.archetype}' })\` to check). Estimated post-adapter score: ${compat.archetypePerfWithAdapter}/10.`,
1038
+ recommendationType: "prompt-fix",
1039
+ suggestedAdaptation: compat.adapter,
1040
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1041
+ }
1042
+ ];
1043
+ }
1044
+ return [
1045
+ {
1046
+ level: "critical",
1047
+ code: "archetype-perf-floor-breach",
1048
+ message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}) and no known adapter would lift it. ${compat.reason}`,
1049
+ suggestion: `Swap to a model whose archetypePerf for ${ir.intent.archetype} clears the floor. Use \`getModelCompatibility(candidateId, { archetype: '${ir.intent.archetype}' })\` to vet candidates, or \`getDefaultFallbackChain({ archetype: '${ir.intent.archetype}', posture: 'open' })\` for a library-picked chain that respects the floor by construction.`,
1050
+ recommendationType: "model-swap",
1051
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1052
+ }
1053
+ ];
1054
+ }
1055
+
1056
+ // src/translator.ts
1057
+ var TRANSLATOR_FLOOR = ARCHETYPE_FLOOR_DEFAULT;
1058
+ var RULE_SEQUENTIAL_TOOL_CLIFF = "sequential-tool-cliff-below-floor";
1059
+ var RULE_NARRATION_DRIFT_ANTHROPIC = "narration-drift-anthropic";
1060
+ var RULE_NARRATION_THINKING_LEAK_DEEPSEEK = "narration-thinking-leak-deepseek";
1061
+ var SEQUENTIAL_TOOL_PREAMBLE = "IMPORTANT: Use one tool call per response. Wait for the tool result before deciding the next tool. Do NOT batch tool calls in parallel.";
1062
+ var NARRATION_DRIFT_ANTHROPIC_PREAMBLE = "Output ONLY the requested content. Do not narrate your thought process. Each line \u2264 12 words.";
1063
+ var NARRATION_THINKING_LEAK_DEEPSEEK_PREAMBLE = "Reasoning is internal. Output ONLY the requested content; do not emit <thinking> blocks or internal monologue as user-facing text.";
1064
+ function matchRule(kind, profile, archetype) {
1065
+ if (kind === "tool_call_contract") {
1066
+ if (!profile.archetypePerf) return null;
1067
+ const archetypeScore = profile.archetypePerf[archetype];
1068
+ if (typeof archetypeScore !== "number" || archetypeScore >= TRANSLATOR_FLOOR) {
1069
+ return null;
1070
+ }
1071
+ return {
1072
+ id: RULE_SEQUENTIAL_TOOL_CLIFF,
1073
+ preamble: SEQUENTIAL_TOOL_PREAMBLE,
1074
+ wireOverrides: { parallelToolCalls: false }
1075
+ };
1076
+ }
1077
+ if (kind === "narration_contract") {
1078
+ if (profile.provider === "anthropic") {
1079
+ return {
1080
+ id: RULE_NARRATION_DRIFT_ANTHROPIC,
1081
+ preamble: NARRATION_DRIFT_ANTHROPIC_PREAMBLE
1082
+ };
1083
+ }
1084
+ if (profile.provider === "deepseek") {
1085
+ return {
1086
+ id: RULE_NARRATION_THINKING_LEAK_DEEPSEEK,
1087
+ preamble: NARRATION_THINKING_LEAK_DEEPSEEK_PREAMBLE
1088
+ };
1089
+ }
1090
+ return null;
1091
+ }
1092
+ return null;
1093
+ }
1094
+ function applySectionRewrites(args) {
1095
+ const { ir, profile, archetype } = args;
1096
+ if (!Array.isArray(ir.sections) || ir.sections.length === 0) {
1097
+ return { rewrittenIR: ir, rewrites: [] };
1098
+ }
1099
+ const rewrites = [];
1100
+ const newSections = ir.sections.map((section) => {
1101
+ if (!section.kind || section.kind === "arbitrary") return section;
1102
+ const rule = matchRule(section.kind, profile, archetype);
1103
+ if (!rule) return section;
1104
+ const originalText = section.text;
1105
+ const transformedText = `${rule.preamble}
1106
+
1107
+ ${originalText}`;
1108
+ rewrites.push({
1109
+ sectionId: section.id,
1110
+ kind: section.kind,
1111
+ rule: rule.id,
1112
+ originalText,
1113
+ transformedText,
1114
+ ...rule.wireOverrides ? { wireOverrides: rule.wireOverrides } : {}
1115
+ });
1116
+ return { ...section, text: transformedText };
1117
+ });
1118
+ if (rewrites.length === 0) {
1119
+ return { rewrittenIR: ir, rewrites: [] };
1120
+ }
1121
+ const rewrittenIR = { ...ir, sections: newSections };
1122
+ return { rewrittenIR, rewrites };
1123
+ }
1124
+
600
1125
  // src/compile.ts
601
1126
  var counter = 0;
602
1127
  function makeHandle() {
@@ -612,7 +1137,8 @@ function compile(ir, opts = {}) {
612
1137
  threshold: opts.toolRelevanceThreshold
613
1138
  });
614
1139
  const compressed = passCompressHistory(toolFiltered.value, {
615
- summarizeOlderThan: opts.compressHistoryAfter
1140
+ summarizeOlderThan: opts.compressHistoryAfter,
1141
+ summarizeAboveTokens: opts.compressHistoryAboveTokens
616
1142
  });
617
1143
  let workingIR = compressed.value;
618
1144
  const accumulatedMutations = [
@@ -639,14 +1165,89 @@ function compile(ir, opts = {}) {
639
1165
  const cliffs = passApplyCliffs(workingIR, profile, inputTokens);
640
1166
  workingIR = cliffs.value.ir;
641
1167
  accumulatedMutations.push(...cliffs.mutations);
1168
+ const translated = applySectionRewrites({
1169
+ ir: workingIR,
1170
+ profile,
1171
+ archetype: ir.intent.archetype
1172
+ });
1173
+ workingIR = translated.rewrittenIR;
1174
+ const sectionRewritesApplied = translated.rewrites;
1175
+ let wireOverrides;
1176
+ for (const rw of sectionRewritesApplied) {
1177
+ if (!rw.wireOverrides) continue;
1178
+ if (!wireOverrides) wireOverrides = {};
1179
+ if (rw.wireOverrides.parallelToolCalls !== void 0) {
1180
+ wireOverrides.parallelToolCalls = rw.wireOverrides.parallelToolCalls;
1181
+ }
1182
+ }
1183
+ for (const rw of sectionRewritesApplied) {
1184
+ accumulatedMutations.push({
1185
+ id: `translator:${rw.rule}:${rw.sectionId}`,
1186
+ source: "translator",
1187
+ passName: "translator",
1188
+ description: `Rewrote section "${rw.sectionId}" (kind=${rw.kind}) via rule "${rw.rule}".`
1189
+ });
1190
+ }
642
1191
  const lowered = lower(workingIR, profile, {
643
1192
  forceThinkingZero: cliffs.value.loweringHints.forceThinkingZero,
644
- forceTerseOutput: cliffs.value.loweringHints.forceTerseOutput
1193
+ forceTerseOutput: cliffs.value.loweringHints.forceTerseOutput,
1194
+ wireOverrides
645
1195
  });
646
1196
  validateFinalFit(workingIR, profile, inputTokens);
647
1197
  const handle = makeHandle();
648
1198
  const finalShape = computeShape(workingIR, inputTokens);
649
1199
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
1200
+ const diagnostics = {
1201
+ sectionsKept: workingIR.sections.length,
1202
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
1203
+ toolsKept: workingIR.tools?.length ?? 0,
1204
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1205
+ historyKept: workingIR.history?.length ?? 0,
1206
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1207
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
1208
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1209
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
1210
+ historyTokensTotal: compressed.historyTokensTotal,
1211
+ // alpha.20 E3: mirror the consumer's declared mode for Glass-Box +
1212
+ // brain observability. Undefined when not declared (pre-alpha.20).
1213
+ toolOrchestration: ir.constraints?.toolOrchestration
1214
+ };
1215
+ if (ir.intent.archetype === "hunt" && ir.constraints?.toolOrchestration === "sequential") {
1216
+ accumulatedMutations.push({
1217
+ id: "sequential-mode-chain-selected",
1218
+ source: "tool_orchestration",
1219
+ passName: "compile",
1220
+ description: "ir.constraints.toolOrchestration='sequential' selected the DeepSeek-tier-0 hunt chain overlay (L-040 parallel-tool cliff doesn't apply at single-step granularity)."
1221
+ });
1222
+ }
1223
+ const phase2ProfileResolver = opts.profileResolver ? (id) => {
1224
+ try {
1225
+ return opts.profileResolver(id);
1226
+ } catch {
1227
+ return void 0;
1228
+ }
1229
+ } : tryGetProfile;
1230
+ const advisories = runAdvisor(
1231
+ ir,
1232
+ {
1233
+ target: profile.id,
1234
+ provider: profile.provider,
1235
+ tokensIn: inputTokens,
1236
+ diagnostics
1237
+ },
1238
+ profile,
1239
+ opts.policy,
1240
+ {
1241
+ fallbackChain,
1242
+ profileResolver: phase2ProfileResolver,
1243
+ // alpha.29 — feed translator rewrites to the advisor so the
1244
+ // `archetype-perf-floor-breach` rule can suppress when the translator
1245
+ // already cleared the cliff for the same archetype. Without this,
1246
+ // both the rewrite AND the advisory fire — noisy, and the advisory
1247
+ // would mislead consumers into thinking the cliff is unaddressed.
1248
+ sectionRewritesApplied
1249
+ }
1250
+ );
650
1251
  return {
651
1252
  handle,
652
1253
  target: profile.id,
@@ -656,16 +1257,10 @@ function compile(ir, opts = {}) {
656
1257
  estimatedCostUsd: target.estimatedCostUsd,
657
1258
  mutationsApplied: accumulatedMutations,
658
1259
  fallbackChain,
659
- diagnostics: {
660
- sectionsKept: workingIR.sections.length,
661
- sectionsDropped: ir.sections.length - workingIR.sections.length,
662
- toolsKept: workingIR.tools?.length ?? 0,
663
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
664
- historyKept: workingIR.history?.length ?? 0,
665
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
666
- cacheableTokens: lowered.diagnostics.cacheableTokens,
667
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
668
- }
1260
+ advisories,
1261
+ diagnostics,
1262
+ sectionRewritesApplied,
1263
+ wireOverrides
669
1264
  };
670
1265
  }
671
1266
  function validateIR(ir) {
@@ -706,14 +1301,92 @@ function validateFinalFit(ir, profile, tokens) {
706
1301
  }
707
1302
  }
708
1303
 
1304
+ // src/pricing-brain.ts
1305
+ function isPricingRow(x) {
1306
+ if (!x || typeof x !== "object") return false;
1307
+ const r = x;
1308
+ return typeof r.model_id === "string" && typeof r.cost_input_per_1m === "number" && typeof r.cost_output_per_1m === "number" && typeof r.valid_from === "string";
1309
+ }
1310
+ function mapRowsToPricing(rows) {
1311
+ const out = [];
1312
+ for (const row of rows) {
1313
+ if (!isPricingRow(row)) continue;
1314
+ out.push({
1315
+ modelId: row.model_id,
1316
+ costInputPer1m: row.cost_input_per_1m,
1317
+ costOutputPer1m: row.cost_output_per_1m,
1318
+ cacheInputPer1m: row.cache_input_per_1m ?? void 0,
1319
+ cacheCreationPer1m: row.cache_creation_per_1m ?? void 0,
1320
+ validFrom: Date.parse(row.valid_from),
1321
+ validUntil: row.valid_until == null ? void 0 : Date.parse(row.valid_until),
1322
+ source: row.source ?? void 0
1323
+ });
1324
+ }
1325
+ return out;
1326
+ }
1327
+ function bundledPricing() {
1328
+ const out = [];
1329
+ for (const profile of allProfiles()) {
1330
+ out.push({
1331
+ modelId: profile.id,
1332
+ costInputPer1m: profile.costInputPer1m,
1333
+ costOutputPer1m: profile.costOutputPer1m,
1334
+ cacheInputPer1m: profile.lowering.cache.discount !== void 0 && profile.lowering.cache.discount > 0 ? profile.costInputPer1m * profile.lowering.cache.discount : void 0,
1335
+ validFrom: 0,
1336
+ validUntil: void 0,
1337
+ source: "profile_seed"
1338
+ });
1339
+ }
1340
+ return out;
1341
+ }
1342
+ var loadPricingFromBrain = createBrainQueryCache({
1343
+ table: "kgauto_pricing",
1344
+ mapRows: mapRowsToPricing,
1345
+ bundledFallback: bundledPricing
1346
+ });
1347
+ function resolvePricingAt(modelId, at = /* @__PURE__ */ new Date()) {
1348
+ const ts = at.getTime();
1349
+ const all = loadPricingFromBrain();
1350
+ let best;
1351
+ for (const row of all) {
1352
+ if (row.modelId !== modelId) continue;
1353
+ if (row.validFrom > ts) continue;
1354
+ if (row.validUntil !== void 0 && row.validUntil <= ts) continue;
1355
+ if (!best || row.validFrom > best.validFrom) best = row;
1356
+ }
1357
+ return best;
1358
+ }
1359
+
709
1360
  // src/brain.ts
710
1361
  var activeConfig;
711
1362
  function configureBrain(config) {
712
1363
  const endpoint = config.endpoint.replace(/\/outcomes\/?$/, "");
713
1364
  activeConfig = { ...config, endpoint };
1365
+ const bq = config.brainQuery ?? {};
1366
+ const enabledTables = /* @__PURE__ */ new Set();
1367
+ if (bq.chains !== false) enabledTables.add("kgauto_chains");
1368
+ if (bq.perf !== false) enabledTables.add("kgauto_archetype_perf");
1369
+ if (bq.pricing !== false) enabledTables.add("kgauto_pricing");
1370
+ if (bq.models !== false) {
1371
+ enabledTables.add("kgauto_models");
1372
+ enabledTables.add("kgauto_aliases");
1373
+ }
1374
+ if (enabledTables.size === 0) {
1375
+ configureBrainQuery(void 0);
1376
+ return;
1377
+ }
1378
+ configureBrainQuery({
1379
+ endpoint,
1380
+ configEndpoint: bq.configEndpoint,
1381
+ ttlMs: bq.cacheTtlMs ?? 3e5,
1382
+ fetchImpl: config.fetchImpl ?? fetch,
1383
+ enabledTables,
1384
+ onError: config.onError
1385
+ });
714
1386
  }
715
1387
  function clearBrain() {
716
1388
  activeConfig = void 0;
1389
+ configureBrainQuery(void 0);
717
1390
  }
718
1391
  var compileRegistry = /* @__PURE__ */ new Map();
719
1392
  var REGISTRY_MAX_ENTRIES = 1e4;
@@ -740,6 +1413,9 @@ function registerCompile(appId, archetype, ir, result) {
740
1413
  tokens
741
1414
  );
742
1415
  const shapeKey = `${shape.contextBucket}-${shape.toolCountBucket}-${shape.historyDepth}-${shape.outputMode}`;
1416
+ const toolsCount = result.diagnostics.toolsKept;
1417
+ const historyDepth = Array.isArray(ir.history) ? ir.history.length : 0;
1418
+ const systemPromptChars = estimateSystemPromptChars(ir.sections);
743
1419
  compileRegistry.set(result.handle, {
744
1420
  appId,
745
1421
  archetype,
@@ -749,9 +1425,35 @@ function registerCompile(appId, archetype, ir, result) {
749
1425
  learningKey: learningKey(archetype, result.target, shape),
750
1426
  estimatedTokensIn: tokens,
751
1427
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
752
- startedAt: Date.now()
1428
+ // alpha.30: cache the in-memory advisories so record() can auto-persist
1429
+ // to `compile_outcome_advisories` without consumer-side threading.
1430
+ advisoriesFromCompile: result.advisories ?? [],
1431
+ startedAt: Date.now(),
1432
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens,
1433
+ historyTokensTotal: result.diagnostics.historyTokensTotal,
1434
+ // alpha.20 E3: capture consumer's declared mode for the brain payload.
1435
+ toolOrchestration: result.diagnostics.toolOrchestration,
1436
+ // alpha.28: shape fields for Glass-Box renderer.
1437
+ toolsCount,
1438
+ historyDepth,
1439
+ systemPromptChars,
1440
+ // alpha.29: translator activity — persisted on the brain row so
1441
+ // cross-app aggregates can answer "Sonnet narration rule fired N times,
1442
+ // outcome quality lifted to M."
1443
+ sectionRewritesApplied: result.sectionRewritesApplied
753
1444
  });
754
1445
  }
1446
+ function estimateSystemPromptChars(sections) {
1447
+ if (!Array.isArray(sections) || sections.length === 0) return void 0;
1448
+ let total = 0;
1449
+ for (const s of sections) {
1450
+ if (s && typeof s === "object") {
1451
+ const content = s.content;
1452
+ if (typeof content === "string") total += content.length;
1453
+ }
1454
+ }
1455
+ return total > 0 ? total : void 0;
1456
+ }
755
1457
  async function record(input) {
756
1458
  const reg = compileRegistry.get(input.handle);
757
1459
  if (reg) compileRegistry.delete(input.handle);
@@ -762,11 +1464,22 @@ async function record(input) {
762
1464
  const config = activeConfig;
763
1465
  const fetchFn = config.fetchImpl ?? fetch;
764
1466
  const send = async () => {
1467
+ let outcomeId;
765
1468
  try {
766
1469
  const res = await fetchFn(`${config.endpoint}/outcomes`, {
767
1470
  method: "POST",
768
1471
  headers: {
769
1472
  "Content-Type": "application/json",
1473
+ // alpha.20: request the inserted row back so we can JOIN advisories
1474
+ // to it via outcome_id. PostgREST returns the row when
1475
+ // `Prefer: return=representation` is set; proxies that pass the
1476
+ // header through (the recommended `const row = { ...body }` shape
1477
+ // from OutcomePayload's forward-compat rule) will surface
1478
+ // the row id. Proxies that don't (legacy / hand-rolled shapes)
1479
+ // simply produce no parseable id → secondary advisory POST is
1480
+ // skipped silently. Best-effort — primary outcome row is the
1481
+ // load-bearing write.
1482
+ Prefer: "return=representation",
770
1483
  ...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
771
1484
  },
772
1485
  body: JSON.stringify(payload)
@@ -775,6 +1488,29 @@ async function record(input) {
775
1488
  const text = await res.text().catch(() => "<no body>");
776
1489
  throw new Error(`brain ${res.status}: ${text}`);
777
1490
  }
1491
+ outcomeId = await tryExtractOutcomeId(res);
1492
+ } catch (err) {
1493
+ (config.onError ?? defaultOnError)(err);
1494
+ return;
1495
+ }
1496
+ const advisories = input.advisories ?? reg?.advisoriesFromCompile;
1497
+ if (!advisories || advisories.length === 0) return;
1498
+ if (outcomeId === void 0) return;
1499
+ try {
1500
+ const advisoryPayload = advisories.map((a) => buildAdvisoryRow(outcomeId, a));
1501
+ const res = await fetchFn(`${config.endpoint}/compile_outcome_advisories`, {
1502
+ method: "POST",
1503
+ headers: {
1504
+ "Content-Type": "application/json",
1505
+ Prefer: "return=minimal",
1506
+ ...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
1507
+ },
1508
+ body: JSON.stringify(advisoryPayload)
1509
+ });
1510
+ if (!res.ok) {
1511
+ const text = await res.text().catch(() => "<no body>");
1512
+ throw new Error(`brain advisories ${res.status}: ${text}`);
1513
+ }
778
1514
  } catch (err) {
779
1515
  (config.onError ?? defaultOnError)(err);
780
1516
  }
@@ -792,6 +1528,11 @@ function buildPayload(input, reg) {
792
1528
  const compileTarget = reg?.model;
793
1529
  const actual = input.actualModel ?? compileTarget;
794
1530
  const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
1531
+ const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
1532
+ const costModel = actual;
1533
+ const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
1534
+ const fellOverFrom = input.fellOverFrom ?? requested;
1535
+ const fallbackReason = fellOverFrom ? input.fallbackReason : void 0;
795
1536
  return {
796
1537
  handle: input.handle,
797
1538
  app_id: reg?.appId,
@@ -801,7 +1542,7 @@ function buildPayload(input, reg) {
801
1542
  provider: reg?.provider,
802
1543
  shape_key: reg?.shapeKey,
803
1544
  learning_key: reg?.learningKey,
804
- mutations_applied: reg?.mutationsApplied ?? [],
1545
+ mutations_applied: mutationsApplied,
805
1546
  tokens_in: input.tokensIn,
806
1547
  tokens_out: input.tokensOut,
807
1548
  estimated_tokens_in: reg?.estimatedTokensIn,
@@ -815,8 +1556,117 @@ function buildPayload(input, reg) {
815
1556
  oracle_rationale: input.oracleScore?.rationale,
816
1557
  prompt_preview: input.promptPreview,
817
1558
  response_preview: input.responsePreview,
818
- dialect_version: "v1"
1559
+ dialect_version: "v1",
1560
+ cache_read_input_tokens: input.cacheReadInputTokens,
1561
+ cache_creation_input_tokens: input.cacheCreationInputTokens,
1562
+ cost_usd_actual: costUsdActual,
1563
+ ttft_ms: input.ttftMs,
1564
+ history_cacheable_tokens: reg?.historyCacheableTokens,
1565
+ history_tokens_at_compile: reg?.historyTokensTotal,
1566
+ // alpha.20 E3: mirror consumer's declared tool-orchestration mode so
1567
+ // the brain can measure per-mode model perf separately (DeepSeek in
1568
+ // sequential vs parallel mode is two different stories — L-040).
1569
+ // Null when consumer hadn't adopted the constraint yet.
1570
+ tool_orchestration: reg?.toolOrchestration ?? null,
1571
+ // alpha.28 — Glass-Box renderer substrate (migration 018). All optional;
1572
+ // omitted-undefined PostgREST inserts store NULL → renderer renders "—".
1573
+ finish_reason: input.finishReason,
1574
+ total_ms: input.totalMs ?? input.latencyMs,
1575
+ tools_count: input.toolsCount ?? reg?.toolsCount,
1576
+ history_depth: input.historyDepth ?? reg?.historyDepth,
1577
+ system_prompt_chars: input.systemPromptChars ?? reg?.systemPromptChars,
1578
+ fell_over_from: fellOverFrom,
1579
+ fallback_reason: fallbackReason,
1580
+ // alpha.29 — translator activity (migration 019). Send NULL when no
1581
+ // rewrites fired so the brain's "did the translator do anything?"
1582
+ // queries can use `IS NOT NULL` cleanly.
1583
+ section_rewrites_applied: reg?.sectionRewritesApplied && reg.sectionRewritesApplied.length > 0 ? reg.sectionRewritesApplied : null
1584
+ };
1585
+ }
1586
+ function computeCostUsd(modelId, tokensIn, tokensOut) {
1587
+ if (tokensIn === 0 && tokensOut === 0) return void 0;
1588
+ const brainRow = resolvePricingAt(modelId);
1589
+ if (brainRow && (brainRow.costInputPer1m > 0 || brainRow.costOutputPer1m > 0)) {
1590
+ const inUsd2 = tokensIn / 1e6 * brainRow.costInputPer1m;
1591
+ const outUsd2 = tokensOut / 1e6 * brainRow.costOutputPer1m;
1592
+ return Math.round((inUsd2 + outUsd2) * 1e6) / 1e6;
1593
+ }
1594
+ const profile = tryGetProfile(modelId);
1595
+ if (!profile) return void 0;
1596
+ const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
1597
+ const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
1598
+ return Math.round((inUsd + outUsd) * 1e6) / 1e6;
1599
+ }
1600
+ async function tryExtractOutcomeId(res) {
1601
+ try {
1602
+ const ct = res.headers?.get?.("content-type") ?? "";
1603
+ if (ct && !ct.includes("application/json")) return void 0;
1604
+ if (typeof res.json !== "function") return void 0;
1605
+ const body = await res.json();
1606
+ if (Array.isArray(body) && body.length > 0) {
1607
+ const first = body[0];
1608
+ const id = first?.id;
1609
+ if (typeof id === "number") return id;
1610
+ } else if (body && typeof body === "object") {
1611
+ const id = body.id;
1612
+ if (typeof id === "number") return id;
1613
+ }
1614
+ return void 0;
1615
+ } catch {
1616
+ return void 0;
1617
+ }
1618
+ }
1619
+ function buildAdvisoryRow(outcomeId, a) {
1620
+ return {
1621
+ outcome_id: outcomeId,
1622
+ code: a.code,
1623
+ level: a.level,
1624
+ message: a.message,
1625
+ ...a.recommendationType ? { recommendation_type: a.recommendationType } : {},
1626
+ ...a.suggestion ? { suggestion: a.suggestion } : {},
1627
+ ...a.docsUrl ? { docs_url: a.docsUrl } : {}
1628
+ };
1629
+ }
1630
+ async function recordOutcome(input) {
1631
+ if (!activeConfig) {
1632
+ return { ok: false, reason: "brain_not_configured" };
1633
+ }
1634
+ const config = activeConfig;
1635
+ const fetchFn = config.fetchImpl ?? fetch;
1636
+ const payload = {
1637
+ outcome_id: input.outcomeId,
1638
+ outcome: input.outcome,
1639
+ rating: input.rating ?? null,
1640
+ reason: input.reason ?? null,
1641
+ observed_confidence: input.observedConfidence ?? null
1642
+ };
1643
+ const send = async () => {
1644
+ try {
1645
+ const res = await fetchFn(`${config.endpoint}/compile_outcome_quality`, {
1646
+ method: "POST",
1647
+ headers: {
1648
+ "Content-Type": "application/json",
1649
+ ...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
1650
+ },
1651
+ body: JSON.stringify(payload)
1652
+ });
1653
+ if (!res.ok) {
1654
+ const text = await res.text().catch(() => "<no body>");
1655
+ const err = new Error(`brain ${res.status}: ${text}`);
1656
+ (config.onError ?? defaultOnError)(err);
1657
+ return { ok: false, reason: "persistence_failed" };
1658
+ }
1659
+ return { ok: true };
1660
+ } catch (err) {
1661
+ (config.onError ?? defaultOnError)(err);
1662
+ return { ok: false, reason: "persistence_failed" };
1663
+ }
819
1664
  };
1665
+ if (config.sync) {
1666
+ return send();
1667
+ }
1668
+ void send();
1669
+ return { ok: true };
820
1670
  }
821
1671
 
822
1672
  // src/ir.ts
@@ -855,7 +1705,7 @@ async function execute(request, opts = {}) {
855
1705
  }
856
1706
  }
857
1707
  async function executeAnthropic(request, opts) {
858
- const apiKey = opts.apiKeys?.anthropic ?? process.env.ANTHROPIC_API_KEY;
1708
+ const apiKey = resolveProviderKey("anthropic", { apiKeys: opts.apiKeys });
859
1709
  if (!apiKey) {
860
1710
  return terminalError(401, "auth", "ANTHROPIC_API_KEY missing");
861
1711
  }
@@ -894,7 +1744,7 @@ function normalizeAnthropic(raw) {
894
1744
  return { text, structuredOutput: null, toolCalls, tokens, finishReason: r.stop_reason, raw };
895
1745
  }
896
1746
  async function executeGoogle(request, opts) {
897
- const apiKey = opts.apiKeys?.google ?? process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY;
1747
+ const apiKey = resolveProviderKey("google", { apiKeys: opts.apiKeys });
898
1748
  if (!apiKey) {
899
1749
  return terminalError(401, "auth", "GOOGLE_API_KEY/GEMINI_API_KEY missing");
900
1750
  }
@@ -936,7 +1786,7 @@ function normalizeGoogle(raw) {
936
1786
  return { text, structuredOutput: null, toolCalls, tokens, finishReason: candidate?.finishReason, raw };
937
1787
  }
938
1788
  async function executeOpenAI(request, opts) {
939
- const apiKey = opts.apiKeys?.openai ?? process.env.OPENAI_API_KEY;
1789
+ const apiKey = resolveProviderKey("openai", { apiKeys: opts.apiKeys });
940
1790
  if (!apiKey) {
941
1791
  return terminalError(401, "auth", "OPENAI_API_KEY missing");
942
1792
  }
@@ -958,7 +1808,7 @@ async function executeOpenAI(request, opts) {
958
1808
  return { ok: true, status: res.status, response: normalizeOpenAILike(json) };
959
1809
  }
960
1810
  async function executeDeepSeek(request, opts) {
961
- const apiKey = opts.apiKeys?.deepseek ?? process.env.DEEPSEEK_API_KEY;
1811
+ const apiKey = resolveProviderKey("deepseek", { apiKeys: opts.apiKeys });
962
1812
  if (!apiKey) {
963
1813
  return terminalError(401, "auth", "DEEPSEEK_API_KEY missing");
964
1814
  }
@@ -1053,15 +1903,142 @@ function tryParseJson(s) {
1053
1903
 
1054
1904
  // src/call.ts
1055
1905
  async function call(ir, opts = {}) {
1906
+ const traceId = generateTraceId();
1907
+ safeEmit(
1908
+ () => emitCompileStart(traceId, ir.appId, {
1909
+ appId: ir.appId,
1910
+ archetype: ir.intent.archetype,
1911
+ models: ir.models
1912
+ })
1913
+ );
1056
1914
  const initial = compileAndRegister(ir, opts);
1915
+ safeEmit(
1916
+ () => emitCompileDone(traceId, ir.appId, {
1917
+ target: initial.target,
1918
+ provider: initial.provider,
1919
+ fallbackChain: initial.fallbackChain,
1920
+ tokensIn: initial.tokensIn,
1921
+ estimatedCostUsd: initial.estimatedCostUsd,
1922
+ mutationsApplied: initial.mutationsApplied,
1923
+ advisories: initial.advisories
1924
+ })
1925
+ );
1926
+ for (const adv of initial.advisories) {
1927
+ safeEmit(
1928
+ () => emitAdvisoryFired(traceId, ir.appId, { code: adv.code, message: adv.message })
1929
+ );
1930
+ }
1057
1931
  const start = Date.now();
1058
1932
  const attempts = [];
1059
- const targetsToTry = [initial.target, ...initial.fallbackChain];
1933
+ const rawTargets = [initial.target, ...initial.fallbackChain];
1934
+ let unreachableFiltered;
1935
+ let targetsToTry;
1936
+ if (opts.noAutoFilter) {
1937
+ targetsToTry = rawTargets;
1938
+ } else {
1939
+ const dropped = [];
1940
+ targetsToTry = [];
1941
+ for (const t of rawTargets) {
1942
+ if (isModelReachable(t, { apiKeys: opts.apiKeys })) {
1943
+ targetsToTry.push(t);
1944
+ } else {
1945
+ dropped.push(t);
1946
+ }
1947
+ }
1948
+ unreachableFiltered = dropped;
1949
+ if (targetsToTry.length === 0) {
1950
+ const latencyMs2 = Date.now() - start;
1951
+ await record({
1952
+ handle: initial.handle,
1953
+ tokensIn: 0,
1954
+ tokensOut: 0,
1955
+ latencyMs: latencyMs2,
1956
+ success: false,
1957
+ errorType: "no_reachable_models",
1958
+ promptPreview: extractPromptPreview(ir)
1959
+ });
1960
+ const noReachableAttempts = dropped.map((m) => ({
1961
+ model: m,
1962
+ status: "terminal",
1963
+ errorCode: "unreachable_provider",
1964
+ message: `No API key for ${m}'s provider \u2014 set one of PROVIDER_ENV_KEYS or pass apiKeys`
1965
+ }));
1966
+ throw new CallError(
1967
+ `call(): no reachable models in chain. Filtered: [${dropped.join(", ")}]. Add a key for one provider, or pass apiKeys.`,
1968
+ noReachableAttempts,
1969
+ void 0,
1970
+ "no_reachable_models"
1971
+ );
1972
+ }
1973
+ const archetypeName = ir.intent?.archetype;
1974
+ if (archetypeName) {
1975
+ const ensured = ensureCrossProviderTail({
1976
+ chain: targetsToTry,
1977
+ archetype: archetypeName,
1978
+ apiKeys: opts.apiKeys
1979
+ });
1980
+ if (ensured.appended) {
1981
+ targetsToTry = ensured.chain;
1982
+ }
1983
+ }
1984
+ }
1985
+ let policyBlockedFiltered;
1986
+ if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
1987
+ const blocked = new Set(opts.policy.blockedModels);
1988
+ const filtered = [];
1989
+ const dropped = [];
1990
+ for (const t of targetsToTry) {
1991
+ if (blocked.has(t)) {
1992
+ dropped.push(t);
1993
+ } else {
1994
+ filtered.push(t);
1995
+ }
1996
+ }
1997
+ if (dropped.length > 0) {
1998
+ policyBlockedFiltered = dropped;
1999
+ targetsToTry = filtered;
2000
+ }
2001
+ if (targetsToTry.length === 0) {
2002
+ const latencyMs2 = Date.now() - start;
2003
+ await record({
2004
+ handle: initial.handle,
2005
+ tokensIn: 0,
2006
+ tokensOut: 0,
2007
+ latencyMs: latencyMs2,
2008
+ success: false,
2009
+ errorType: "all_blocked_by_policy",
2010
+ promptPreview: extractPromptPreview(ir)
2011
+ });
2012
+ const blockedAttempts = dropped.map((m) => ({
2013
+ model: m,
2014
+ status: "terminal",
2015
+ errorCode: "blocked_by_policy",
2016
+ message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
2017
+ }));
2018
+ throw new CallError(
2019
+ `call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
2020
+ blockedAttempts,
2021
+ void 0,
2022
+ "all_blocked_by_policy"
2023
+ );
2024
+ }
2025
+ }
1060
2026
  let activeCompile = initial;
1061
2027
  let lastErr;
2028
+ const failedProviders = /* @__PURE__ */ new Set();
1062
2029
  for (let i = 0; i < targetsToTry.length; i++) {
1063
2030
  const targetModel = targetsToTry[i];
1064
- if (i > 0) {
2031
+ const targetProfile = tryGetProfile(targetModel);
2032
+ if (targetProfile && failedProviders.has(targetProfile.provider) && !opts.noFallback) {
2033
+ attempts.push({
2034
+ model: targetModel,
2035
+ status: "terminal",
2036
+ errorCode: "auth_inferred",
2037
+ message: `Skipped \u2014 provider ${targetProfile.provider} returned 401/403 earlier in this call; same key inferred to fail`
2038
+ });
2039
+ continue;
2040
+ }
2041
+ if (targetModel !== initial.target) {
1065
2042
  try {
1066
2043
  activeCompile = compileAndRegister(
1067
2044
  {
@@ -1081,59 +2058,112 @@ async function call(ir, opts = {}) {
1081
2058
  continue;
1082
2059
  }
1083
2060
  }
2061
+ safeEmit(
2062
+ () => emitExecuteAttempt(traceId, ir.appId, { model: targetModel, attemptIndex: i })
2063
+ );
1084
2064
  const exec = await execute(activeCompile.request, {
1085
2065
  apiKeys: opts.apiKeys,
1086
2066
  fetchImpl: opts.fetchImpl,
1087
2067
  providerOverrides: opts.providerOverrides
1088
2068
  });
1089
- if (exec.ok) {
2069
+ const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
2070
+ if (validated.ok) {
1090
2071
  attempts.push({ model: targetModel, status: "success" });
1091
2072
  const latencyMs2 = Date.now() - start;
1092
- const responseWithStructured = withStructuredOutput(exec.response, ir);
1093
- void record({
2073
+ safeEmit(
2074
+ () => emitExecuteSuccess(traceId, ir.appId, {
2075
+ model: targetModel,
2076
+ tokensIn: validated.response.tokens.input,
2077
+ tokensOut: validated.response.tokens.output,
2078
+ latencyMs: latencyMs2
2079
+ })
2080
+ );
2081
+ const fellOver = targetModel !== initial.target;
2082
+ const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
2083
+ await record({
1094
2084
  handle: initial.handle,
1095
- tokensIn: responseWithStructured.tokens.input,
1096
- tokensOut: responseWithStructured.tokens.output,
2085
+ tokensIn: validated.response.tokens.input,
2086
+ tokensOut: validated.response.tokens.output,
1097
2087
  latencyMs: latencyMs2,
1098
2088
  success: true,
1099
- emptyResponse: responseWithStructured.tokens.output === 0,
1100
- toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
2089
+ emptyResponse: validated.response.tokens.output === 0,
2090
+ toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
1101
2091
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1102
- responsePreview: responseWithStructured.text.slice(0, 200)
2092
+ mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
2093
+ promptPreview: extractPromptPreview(ir),
2094
+ responsePreview: validated.response.text.slice(0, 200),
2095
+ cacheReadInputTokens: validated.response.tokens.cached,
2096
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated,
2097
+ // alpha.28 — Glass-Box renderer substrate (migration 018). call()
2098
+ // owns the lifecycle so it has direct visibility into finishReason
2099
+ // (from the normalized provider response), totalMs (mirrors latencyMs
2100
+ // for non-streaming; future streaming variant may diverge), and the
2101
+ // fell-over-from / fallback-reason pair (already computed above for
2102
+ // the CallResult return shape).
2103
+ finishReason: validated.response.finishReason,
2104
+ totalMs: latencyMs2,
2105
+ fellOverFrom: fellOver ? initial.target : void 0,
2106
+ fallbackReason
1103
2107
  });
2108
+ if (fellOver) {
2109
+ const firstFailed = attempts.find((a) => a.status !== "success");
2110
+ if (firstFailed) {
2111
+ safeEmit(
2112
+ () => emitFallbackWalked(traceId, ir.appId, {
2113
+ from: initial.target,
2114
+ to: targetModel,
2115
+ reason: fallbackReason ?? "unknown",
2116
+ attempt: firstFailed
2117
+ })
2118
+ );
2119
+ }
2120
+ }
1104
2121
  return {
1105
2122
  handle: initial.handle,
1106
2123
  actualModel: targetModel,
1107
2124
  requestedModel: initial.target,
1108
2125
  provider: activeCompile.provider,
1109
- response: responseWithStructured,
2126
+ response: validated.response,
1110
2127
  latencyMs: latencyMs2,
1111
2128
  mutationsApplied: activeCompile.mutationsApplied,
1112
- attempts
2129
+ attempts,
2130
+ servedBy: targetModel,
2131
+ fellOverFrom: fellOver ? initial.target : void 0,
2132
+ fallbackReason,
2133
+ unreachableFiltered,
2134
+ policyBlockedFiltered,
2135
+ traceId
1113
2136
  };
1114
2137
  }
1115
2138
  attempts.push({
1116
2139
  model: targetModel,
1117
- status: exec.errorType,
1118
- errorCode: exec.errorCode,
1119
- message: exec.message
2140
+ status: validated.errorType,
2141
+ errorCode: validated.errorCode,
2142
+ message: validated.message
1120
2143
  });
1121
- lastErr = exec;
1122
- if (exec.errorType === "terminal" || opts.noFallback) {
2144
+ lastErr = validated;
2145
+ if (validated.errorType === "terminal" || opts.noFallback) {
2146
+ if (validated.errorCode === "auth" && !opts.noFallback && activeCompile.provider) {
2147
+ failedProviders.add(activeCompile.provider);
2148
+ continue;
2149
+ }
1123
2150
  break;
1124
2151
  }
1125
2152
  }
1126
2153
  const latencyMs = Date.now() - start;
1127
- void record({
2154
+ await record({
1128
2155
  handle: initial.handle,
1129
2156
  tokensIn: 0,
1130
2157
  tokensOut: 0,
1131
2158
  latencyMs,
1132
2159
  success: false,
1133
- errorType: lastErr?.errorCode
2160
+ errorType: lastErr?.errorCode,
2161
+ promptPreview: extractPromptPreview(ir)
1134
2162
  });
2163
+ const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
2164
+ const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
1135
2165
  throw new CallError(
1136
- `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
2166
+ `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
1137
2167
  attempts,
1138
2168
  lastErr?.status,
1139
2169
  lastErr?.errorCode
@@ -1148,20 +2178,74 @@ function compileAndRegister(ir, opts) {
1148
2178
  registerCompile(ir.appId, ir.intent.archetype, ir, result);
1149
2179
  return result;
1150
2180
  }
1151
- function withStructuredOutput(response, ir) {
1152
- if (!ir.constraints?.structuredOutput) return response;
1153
- if (!response.text) return response;
2181
+ function extractPromptPreview(ir) {
2182
+ const turn = ir.currentTurn?.content;
2183
+ if (turn) return turn.slice(0, 200);
2184
+ const lastHist = ir.history?.[ir.history.length - 1]?.content;
2185
+ if (lastHist) return lastHist.slice(0, 200);
2186
+ return void 0;
2187
+ }
2188
+ function validateStructuredContract(exec, ir) {
2189
+ if (!ir.constraints?.structuredOutput) {
2190
+ return { ok: true, response: exec.response };
2191
+ }
2192
+ const finish = (exec.response.finishReason ?? "").toLowerCase();
2193
+ if (finish === "max_tokens" || finish === "length") {
2194
+ return {
2195
+ ok: false,
2196
+ status: exec.status,
2197
+ errorType: "retryable",
2198
+ errorCode: "max_tokens_on_structured_output",
2199
+ message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
2200
+ raw: exec.response.raw
2201
+ };
2202
+ }
2203
+ if (!exec.response.text) {
2204
+ return { ok: true, response: exec.response };
2205
+ }
1154
2206
  try {
1155
- const parsed = JSON.parse(response.text);
1156
- return { ...response, structuredOutput: parsed };
2207
+ const parsed = JSON.parse(exec.response.text);
2208
+ return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
1157
2209
  } catch (err) {
1158
2210
  return {
1159
- ...response,
1160
- structuredOutput: null,
1161
- parseError: err instanceof Error ? err.message : String(err)
2211
+ ok: false,
2212
+ status: exec.status,
2213
+ errorType: "retryable",
2214
+ errorCode: "structured_output_parse_failed",
2215
+ message: err instanceof Error ? err.message : String(err),
2216
+ raw: exec.response.raw
1162
2217
  };
1163
2218
  }
1164
2219
  }
2220
+ function normalizeFallbackReason(attempts) {
2221
+ const first = attempts.find((a) => a.status !== "success");
2222
+ if (!first) return void 0;
2223
+ const code = first.errorCode ?? "";
2224
+ if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
2225
+ if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
2226
+ return "cliff";
2227
+ }
2228
+ if (code === "cost_cap_exceeded") return "cost_cap";
2229
+ if (code === "auth" || code === "auth_inferred") return "provider_auth_failed";
2230
+ return "provider_error";
2231
+ }
2232
+ function generateTraceId() {
2233
+ try {
2234
+ const g = globalThis;
2235
+ if (g.crypto && typeof g.crypto.randomUUID === "function") {
2236
+ return g.crypto.randomUUID();
2237
+ }
2238
+ } catch {
2239
+ }
2240
+ const hex = (n) => Math.floor(Math.random() * Math.pow(16, n)).toString(16).padStart(n, "0");
2241
+ return `${hex(8)}-${hex(4)}-${hex(4)}-${hex(4)}-${hex(12)}`;
2242
+ }
2243
+ function safeEmit(fn) {
2244
+ try {
2245
+ fn();
2246
+ } catch {
2247
+ }
2248
+ }
1165
2249
 
1166
2250
  // src/oracle.ts
1167
2251
  var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
@@ -1251,6 +2335,397 @@ function clamp(n) {
1251
2335
  return Math.max(0, Math.min(1, n));
1252
2336
  }
1253
2337
 
2338
+ // src/advisories-api.ts
2339
+ var SEVERITY_SET = /* @__PURE__ */ new Set(["info", "warn", "critical"]);
2340
+ var STATUS_SET = /* @__PURE__ */ new Set(["open", "snoozed", "resolved"]);
2341
+ var RESOLUTION_SOURCE_SET = /* @__PURE__ */ new Set([
2342
+ "auto",
2343
+ "consumer-marked",
2344
+ "declined"
2345
+ ]);
2346
+ function asString(v) {
2347
+ return typeof v === "string" && v.length > 0 ? v : void 0;
2348
+ }
2349
+ function asSeverity(v) {
2350
+ if (typeof v === "string" && SEVERITY_SET.has(v)) {
2351
+ return v;
2352
+ }
2353
+ return "info";
2354
+ }
2355
+ function asStatus(v) {
2356
+ if (typeof v === "string" && STATUS_SET.has(v)) {
2357
+ return v;
2358
+ }
2359
+ return "open";
2360
+ }
2361
+ function asResolutionSource(v) {
2362
+ if (typeof v === "string" && RESOLUTION_SOURCE_SET.has(v)) {
2363
+ return v;
2364
+ }
2365
+ return void 0;
2366
+ }
2367
+ function rowToAdvisory(row) {
2368
+ const archetype = asString(row.applies_to_archetype);
2369
+ const model = asString(row.applies_to_model);
2370
+ const docsLink = asString(row.docs_url);
2371
+ const suggestion = asString(row.suggestion);
2372
+ let suggestedFix = null;
2373
+ if (docsLink || suggestion) {
2374
+ suggestedFix = { type: "manual" };
2375
+ if (docsLink) suggestedFix.docsLink = docsLink;
2376
+ if (suggestion) suggestedFix.before = suggestion;
2377
+ }
2378
+ const out = {
2379
+ id: typeof row.id === "string" ? row.id : "",
2380
+ rule: typeof row.rule === "string" ? row.rule : "",
2381
+ severity: asSeverity(row.severity),
2382
+ openedAt: typeof row.opened_at === "string" ? row.opened_at : "",
2383
+ lastObservedAt: typeof row.last_observed_at === "string" ? row.last_observed_at : "",
2384
+ observationCount: typeof row.observation_count === "number" ? row.observation_count : 0,
2385
+ appliesTo: {
2386
+ ...archetype ? { archetype } : {},
2387
+ ...model ? { model } : {}
2388
+ },
2389
+ message: typeof row.message === "string" ? row.message : "",
2390
+ suggestedFix,
2391
+ autoApplicable: false,
2392
+ // reserved — alpha.30+
2393
+ status: asStatus(row.status)
2394
+ };
2395
+ const resolvedAt = asString(row.resolved_at);
2396
+ if (resolvedAt) out.resolvedAt = resolvedAt;
2397
+ const resolutionSource = asResolutionSource(row.resolution_source);
2398
+ if (resolutionSource) out.resolutionSource = resolutionSource;
2399
+ const resolutionNote = asString(row.resolution_note);
2400
+ if (resolutionNote) out.resolutionNote = resolutionNote;
2401
+ return out;
2402
+ }
2403
+ function resolveFetch(injected) {
2404
+ return injected ?? ((...args) => globalThis.fetch(...args));
2405
+ }
2406
+ function normalizeEndpoint(endpoint) {
2407
+ return endpoint.replace(/\/+$/, "");
2408
+ }
2409
+ async function getActionableAdvisories(opts) {
2410
+ const {
2411
+ appId,
2412
+ severity,
2413
+ status,
2414
+ brainEndpoint,
2415
+ brainJwt,
2416
+ brainAnonKey,
2417
+ fetch: injectedFetch
2418
+ } = opts;
2419
+ if (!appId) {
2420
+ throw new Error("getActionableAdvisories: appId is required");
2421
+ }
2422
+ const doFetch = resolveFetch(injectedFetch);
2423
+ const base = normalizeEndpoint(brainEndpoint);
2424
+ const qs = new URLSearchParams();
2425
+ qs.set("app_id", `eq.${appId}`);
2426
+ if (severity) qs.set("severity", `eq.${severity}`);
2427
+ const effectiveStatus = status ?? "open";
2428
+ if (effectiveStatus !== "all") {
2429
+ qs.set("status", `eq.${effectiveStatus}`);
2430
+ }
2431
+ qs.set("order", "last_observed_at.desc");
2432
+ const url = `${base}/rest/v1/actionable_advisories_v?${qs.toString()}`;
2433
+ let res;
2434
+ try {
2435
+ res = await doFetch(url, {
2436
+ method: "GET",
2437
+ headers: {
2438
+ Authorization: `Bearer ${brainJwt}`,
2439
+ apikey: brainAnonKey,
2440
+ Accept: "application/json"
2441
+ }
2442
+ });
2443
+ } catch (err) {
2444
+ const msg = err instanceof Error ? err.message : String(err);
2445
+ throw new Error(`getActionableAdvisories: network error: ${msg}`);
2446
+ }
2447
+ if (res.status === 401 || res.status === 403) {
2448
+ throw new Error("getActionableAdvisories: brain auth misconfig");
2449
+ }
2450
+ if (res.status >= 500) {
2451
+ throw new Error(`getActionableAdvisories: brain unavailable (${res.status})`);
2452
+ }
2453
+ if (!res.ok) {
2454
+ throw new Error(`getActionableAdvisories: bad request (${res.status})`);
2455
+ }
2456
+ let rows;
2457
+ try {
2458
+ rows = await res.json();
2459
+ } catch {
2460
+ throw new Error("getActionableAdvisories: malformed brain response");
2461
+ }
2462
+ if (!Array.isArray(rows)) {
2463
+ throw new Error("getActionableAdvisories: expected array from brain");
2464
+ }
2465
+ const out = [];
2466
+ for (const raw of rows) {
2467
+ if (raw && typeof raw === "object") {
2468
+ out.push(rowToAdvisory(raw));
2469
+ }
2470
+ }
2471
+ return out;
2472
+ }
2473
+ async function markAdvisoryResolved(opts) {
2474
+ const {
2475
+ id,
2476
+ resolutionNote,
2477
+ brainEndpoint,
2478
+ brainJwt,
2479
+ brainAnonKey,
2480
+ fetch: injectedFetch
2481
+ } = opts;
2482
+ if (!id) {
2483
+ return { ok: false, reason: "id_required" };
2484
+ }
2485
+ const doFetch = resolveFetch(injectedFetch);
2486
+ const base = normalizeEndpoint(brainEndpoint);
2487
+ const lookupUrl = `${base}/rest/v1/actionable_advisories_v?id=eq.${encodeURIComponent(id)}&select=app_id,rule`;
2488
+ let lookupRes;
2489
+ try {
2490
+ lookupRes = await doFetch(lookupUrl, {
2491
+ method: "GET",
2492
+ headers: {
2493
+ Authorization: `Bearer ${brainJwt}`,
2494
+ apikey: brainAnonKey,
2495
+ Accept: "application/json"
2496
+ }
2497
+ });
2498
+ } catch (err) {
2499
+ const msg = err instanceof Error ? err.message : String(err);
2500
+ return { ok: false, reason: `network_error:${msg}` };
2501
+ }
2502
+ if (lookupRes.status === 401 || lookupRes.status === 403) {
2503
+ return { ok: false, reason: "brain_auth_misconfig" };
2504
+ }
2505
+ if (lookupRes.status >= 500) {
2506
+ return { ok: false, reason: "brain_unavailable" };
2507
+ }
2508
+ if (!lookupRes.ok) {
2509
+ return { ok: false, reason: `brain_lookup_failed:${lookupRes.status}` };
2510
+ }
2511
+ let lookupRows;
2512
+ try {
2513
+ lookupRows = await lookupRes.json();
2514
+ } catch {
2515
+ return { ok: false, reason: "brain_lookup_malformed" };
2516
+ }
2517
+ if (!Array.isArray(lookupRows) || lookupRows.length === 0) {
2518
+ return { ok: false, reason: "advisory_not_found" };
2519
+ }
2520
+ const tuple = lookupRows[0];
2521
+ const appId = typeof tuple.app_id === "string" ? tuple.app_id : "";
2522
+ const code = typeof tuple.rule === "string" ? tuple.rule : "";
2523
+ if (!appId || !code) {
2524
+ return { ok: false, reason: "advisory_tuple_invalid" };
2525
+ }
2526
+ const outcomesUrl = `${base}/rest/v1/compile_outcomes?app_id=eq.${encodeURIComponent(appId)}&select=id`;
2527
+ let outcomesRes;
2528
+ try {
2529
+ outcomesRes = await doFetch(outcomesUrl, {
2530
+ method: "GET",
2531
+ headers: {
2532
+ Authorization: `Bearer ${brainJwt}`,
2533
+ apikey: brainAnonKey,
2534
+ Accept: "application/json"
2535
+ }
2536
+ });
2537
+ } catch (err) {
2538
+ const msg = err instanceof Error ? err.message : String(err);
2539
+ return { ok: false, reason: `network_error:${msg}` };
2540
+ }
2541
+ if (outcomesRes.status === 401 || outcomesRes.status === 403) {
2542
+ return { ok: false, reason: "brain_auth_misconfig" };
2543
+ }
2544
+ if (outcomesRes.status >= 500) {
2545
+ return { ok: false, reason: "brain_unavailable" };
2546
+ }
2547
+ if (!outcomesRes.ok) {
2548
+ return { ok: false, reason: `brain_lookup_failed:${outcomesRes.status}` };
2549
+ }
2550
+ let outcomeRows;
2551
+ try {
2552
+ outcomeRows = await outcomesRes.json();
2553
+ } catch {
2554
+ return { ok: false, reason: "brain_lookup_malformed" };
2555
+ }
2556
+ if (!Array.isArray(outcomeRows)) {
2557
+ return { ok: false, reason: "brain_lookup_malformed" };
2558
+ }
2559
+ const outcomeIds = [];
2560
+ for (const row of outcomeRows) {
2561
+ if (row && typeof row === "object") {
2562
+ const idVal = row.id;
2563
+ if (typeof idVal === "number" && Number.isFinite(idVal)) {
2564
+ outcomeIds.push(idVal);
2565
+ }
2566
+ }
2567
+ }
2568
+ if (outcomeIds.length === 0) {
2569
+ return { ok: true };
2570
+ }
2571
+ const inList = outcomeIds.join(",");
2572
+ const patchUrl = `${base}/rest/v1/compile_outcome_advisories?outcome_id=in.(${inList})&code=eq.${encodeURIComponent(code)}&resolved_at=is.null`;
2573
+ const patchBody = {
2574
+ resolved_at: (/* @__PURE__ */ new Date()).toISOString(),
2575
+ resolution_source: "consumer-marked"
2576
+ };
2577
+ if (resolutionNote !== void 0) {
2578
+ patchBody.resolution_note = resolutionNote;
2579
+ }
2580
+ let patchRes;
2581
+ try {
2582
+ patchRes = await doFetch(patchUrl, {
2583
+ method: "PATCH",
2584
+ headers: {
2585
+ Authorization: `Bearer ${brainJwt}`,
2586
+ apikey: brainAnonKey,
2587
+ "Content-Type": "application/json",
2588
+ Accept: "application/json",
2589
+ // PostgREST default is no return; we don't need the row back.
2590
+ Prefer: "return=minimal"
2591
+ },
2592
+ body: JSON.stringify(patchBody)
2593
+ });
2594
+ } catch (err) {
2595
+ const msg = err instanceof Error ? err.message : String(err);
2596
+ return { ok: false, reason: `network_error:${msg}` };
2597
+ }
2598
+ if (patchRes.status === 401 || patchRes.status === 403) {
2599
+ return { ok: false, reason: "brain_auth_misconfig" };
2600
+ }
2601
+ if (patchRes.status >= 500) {
2602
+ return { ok: false, reason: "brain_unavailable" };
2603
+ }
2604
+ if (!patchRes.ok) {
2605
+ return { ok: false, reason: `patch_failed:${patchRes.status}` };
2606
+ }
2607
+ return { ok: true };
2608
+ }
2609
+
2610
+ // src/models-brain.ts
2611
+ function isModelRow(x) {
2612
+ if (!x || typeof x !== "object") return false;
2613
+ const r = x;
2614
+ return typeof r.model_id === "string" && typeof r.provider === "string";
2615
+ }
2616
+ function isAliasRow(x) {
2617
+ if (!x || typeof x !== "object") return false;
2618
+ const r = x;
2619
+ return typeof r.alias_id === "string" && typeof r.canonical_id === "string";
2620
+ }
2621
+ function rowToProfile(row) {
2622
+ try {
2623
+ if (row.cliffs !== void 0 && row.cliffs !== null && !Array.isArray(row.cliffs)) {
2624
+ return null;
2625
+ }
2626
+ if (row.recovery !== void 0 && row.recovery !== null && !Array.isArray(row.recovery)) {
2627
+ return null;
2628
+ }
2629
+ if (row.lowering !== void 0 && row.lowering !== null && (typeof row.lowering !== "object" || Array.isArray(row.lowering))) {
2630
+ return null;
2631
+ }
2632
+ return {
2633
+ id: row.model_id,
2634
+ provider: row.provider,
2635
+ status: row.status ?? "current",
2636
+ maxContextTokens: row.max_context_tokens ?? 0,
2637
+ maxOutputTokens: row.max_output_tokens ?? 0,
2638
+ maxTools: row.max_tools ?? 0,
2639
+ parallelToolCalls: row.parallel_tool_calls ?? false,
2640
+ structuredOutput: row.structured_output ?? "none",
2641
+ systemPromptMode: row.system_prompt_mode ?? "inline",
2642
+ streaming: row.streaming ?? true,
2643
+ cliffs: row.cliffs ?? [],
2644
+ costInputPer1m: row.cost_input_per_1m ?? 0,
2645
+ costOutputPer1m: row.cost_output_per_1m ?? 0,
2646
+ lowering: row.lowering ?? { system: { mode: "inline" }, cache: { strategy: "unsupported" } },
2647
+ recovery: row.recovery ?? [],
2648
+ strengths: row.strengths ?? [],
2649
+ weaknesses: row.weaknesses ?? [],
2650
+ notes: row.notes ?? void 0,
2651
+ verifiedAgainstDocs: row.verified_against_docs ?? void 0,
2652
+ archetypePerf: row.archetype_perf ?? void 0
2653
+ };
2654
+ } catch {
2655
+ return null;
2656
+ }
2657
+ }
2658
+ function profileToRow(profile, opts = {}) {
2659
+ const row = {
2660
+ model_id: profile.id,
2661
+ provider: profile.provider,
2662
+ status: profile.status,
2663
+ max_context_tokens: profile.maxContextTokens,
2664
+ max_output_tokens: profile.maxOutputTokens,
2665
+ max_tools: profile.maxTools,
2666
+ parallel_tool_calls: profile.parallelToolCalls,
2667
+ structured_output: profile.structuredOutput,
2668
+ system_prompt_mode: profile.systemPromptMode,
2669
+ streaming: profile.streaming,
2670
+ cliffs: profile.cliffs,
2671
+ cost_input_per_1m: profile.costInputPer1m,
2672
+ cost_output_per_1m: profile.costOutputPer1m,
2673
+ lowering: profile.lowering,
2674
+ recovery: profile.recovery,
2675
+ strengths: profile.strengths,
2676
+ weaknesses: profile.weaknesses,
2677
+ notes: profile.notes ?? null,
2678
+ archetype_perf: profile.archetypePerf ?? null,
2679
+ active: opts.active ?? true
2680
+ };
2681
+ if (opts.verifiedAgainstDocs !== void 0) {
2682
+ row.verified_against_docs = opts.verifiedAgainstDocs;
2683
+ } else if (profile.verifiedAgainstDocs !== void 0) {
2684
+ const v = profile.verifiedAgainstDocs;
2685
+ row.verified_against_docs = /^\d{4}-\d{2}-\d{2}/.test(v) ? v : null;
2686
+ }
2687
+ if (opts.versionAdded !== void 0) row.version_added = opts.versionAdded;
2688
+ if (opts.versionRemoved !== void 0) row.version_removed = opts.versionRemoved;
2689
+ return row;
2690
+ }
2691
+ function mapRowsToModels(rows) {
2692
+ const out = /* @__PURE__ */ new Map();
2693
+ for (const row of rows) {
2694
+ if (!isModelRow(row)) continue;
2695
+ const profile = rowToProfile(row);
2696
+ if (profile) out.set(profile.id, profile);
2697
+ }
2698
+ return out;
2699
+ }
2700
+ function mapRowsToAliases(rows) {
2701
+ const out = {};
2702
+ for (const row of rows) {
2703
+ if (!isAliasRow(row)) continue;
2704
+ out[row.alias_id] = row.canonical_id;
2705
+ }
2706
+ return out;
2707
+ }
2708
+ function bundledModels() {
2709
+ return new Map(allProfilesRaw().map((p) => [p.id, p]));
2710
+ }
2711
+ function bundledAliases() {
2712
+ return { ...ALIASES };
2713
+ }
2714
+ var loadModelsFromBrain = createBrainQueryCache({
2715
+ table: "kgauto_models",
2716
+ mapRows: mapRowsToModels,
2717
+ bundledFallback: bundledModels
2718
+ });
2719
+ var loadAliasesFromBrain = createBrainQueryCache({
2720
+ table: "kgauto_aliases",
2721
+ mapRows: mapRowsToAliases,
2722
+ bundledFallback: bundledAliases
2723
+ });
2724
+ _setProfileBrainHook({
2725
+ getProfile: (canonical) => loadModelsFromBrain().get(canonical),
2726
+ resolveAlias: (id) => loadAliasesFromBrain()[id]
2727
+ });
2728
+
1254
2729
  // src/index.ts
1255
2730
  function compile2(ir, opts) {
1256
2731
  const result = compile(ir, opts);
@@ -1258,12 +2733,19 @@ function compile2(ir, opts) {
1258
2733
  return result;
1259
2734
  }
1260
2735
  export {
2736
+ ABSOLUTE_FLOOR,
1261
2737
  ALIASES,
1262
2738
  ALL_ARCHETYPES,
2739
+ ARCHETYPE_FLOOR_DEFAULT,
1263
2740
  CallError,
1264
2741
  DIALECT_VERSION,
1265
2742
  INTENT_ARCHETYPES,
2743
+ MEASURED_GROUNDING_MIN_N,
2744
+ PROVIDER_ENV_KEYS,
2745
+ RULE_SEQUENTIAL_TOOL_CLIFF,
2746
+ TRANSLATOR_FLOOR,
1266
2747
  allProfiles,
2748
+ applySectionRewrites,
1267
2749
  bucketContext,
1268
2750
  bucketHistory,
1269
2751
  bucketToolCount,
@@ -1274,13 +2756,41 @@ export {
1274
2756
  configureBrain,
1275
2757
  countTokens,
1276
2758
  execute,
2759
+ getActionableAdvisories,
2760
+ getAllStarterChains,
2761
+ getAllStarterChainsWithGrounding,
2762
+ getArchetypePerfScore,
2763
+ getDefaultFallbackChain,
2764
+ getDefaultFallbackChainWithGrounding,
2765
+ getModelCompatibility,
2766
+ getPerAxisMetrics,
1277
2767
  getProfile,
2768
+ getReachabilityDiagnostic,
2769
+ getSequentialStarterChain,
2770
+ getSequentialStarterChainWithGrounding,
2771
+ getStarterChain,
2772
+ getStarterChainWithGrounding,
1278
2773
  hashShape,
1279
2774
  isArchetype,
2775
+ isBrainQueryActiveFor,
2776
+ isModelReachable,
2777
+ isProviderReachable,
1280
2778
  learningKey,
2779
+ loadAliasesFromBrain,
2780
+ loadArchetypePerfFromBrain,
2781
+ loadArchetypePerfNFromBrain,
2782
+ loadChainsFromBrain,
2783
+ loadModelsFromBrain,
2784
+ loadPricingFromBrain,
2785
+ markAdvisoryResolved,
2786
+ profileToRow,
1281
2787
  profilesByProvider,
1282
2788
  record,
2789
+ recordOutcome,
1283
2790
  resetTokenizer,
2791
+ resolvePricingAt,
2792
+ resolveProviderKey,
2793
+ runAdvisor,
1284
2794
  setTokenizer,
1285
2795
  tryGetProfile
1286
2796
  };