@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -9,13 +9,47 @@ import {
9
9
  isArchetype,
10
10
  learningKey
11
11
  } from "./chunk-5TI6PNSK.mjs";
12
+ import {
13
+ ABSOLUTE_FLOOR,
14
+ ARCHETYPE_FLOOR_DEFAULT,
15
+ PROVIDER_ENV_KEYS,
16
+ configureBrainQuery,
17
+ createBrainQueryCache,
18
+ ensureCrossProviderTail,
19
+ getAllStarterChains,
20
+ getAllStarterChainsWithGrounding,
21
+ getDefaultFallbackChain,
22
+ getDefaultFallbackChainWithGrounding,
23
+ getModelCompatibility,
24
+ getPerAxisMetrics,
25
+ getReachabilityDiagnostic,
26
+ getSequentialStarterChain,
27
+ getSequentialStarterChainWithGrounding,
28
+ getStarterChain,
29
+ getStarterChainWithGrounding,
30
+ isBrainQueryActiveFor,
31
+ isModelReachable,
32
+ isProviderReachable,
33
+ loadChainsFromBrain,
34
+ resolveProviderKey
35
+ } from "./chunk-WXCFWUCN.mjs";
12
36
  import {
13
37
  ALIASES,
38
+ _setProfileBrainHook,
14
39
  allProfiles,
40
+ allProfilesRaw,
15
41
  getProfile,
16
42
  profilesByProvider,
17
43
  tryGetProfile
18
- } from "./chunk-MBEI5UOM.mjs";
44
+ } from "./chunk-JQGRWJZO.mjs";
45
+ import {
46
+ emitAdvisoryFired,
47
+ emitCompileDone,
48
+ emitCompileStart,
49
+ emitExecuteAttempt,
50
+ emitExecuteSuccess,
51
+ emitFallbackWalked
52
+ } from "./chunk-NBO4R5PC.mjs";
19
53
 
20
54
  // src/tokenizer.ts
21
55
  var tokenizerImpl = defaultCharBasedCounter;
@@ -120,38 +154,96 @@ function passToolRelevance(ir, opts = {}) {
120
154
  ]
121
155
  };
122
156
  }
157
+ function totalHistoryTokens(history) {
158
+ let total = 0;
159
+ for (const m of history) {
160
+ if (typeof m.content === "string") total += countTokens(m.content);
161
+ }
162
+ return total;
163
+ }
123
164
  function passCompressHistory(ir, opts = {}) {
124
165
  const history = ir.history;
125
- if (!history || history.length === 0) return { value: ir, mutations: [] };
166
+ if (!history || history.length === 0) {
167
+ return { value: ir, mutations: [], historyTokensTotal: 0 };
168
+ }
126
169
  const keepRecent = opts.keepRecent ?? 4;
127
170
  const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
128
- if (history.length <= summarizeOlderThan) return { value: ir, mutations: [] };
129
- const cutIndex = history.length - keepRecent;
130
- const old = history.slice(0, cutIndex);
131
- const recent = history.slice(cutIndex);
132
- const userTurns = old.filter((m) => m.role === "user");
133
- const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
134
- const summary = {
135
- role: "system",
136
- content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
137
- };
138
- return {
139
- value: { ...ir, history: [summary, ...recent] },
140
- mutations: [
141
- {
142
- id: `compress-history-${old.length}`,
143
- source: "static_pass",
144
- passName: "compress_history",
145
- description: `Compressed ${old.length} old turns into 1 summary line (kept ${keepRecent} recent)`
171
+ const summarizeAboveTokens = opts.summarizeAboveTokens;
172
+ const historyTokensTotal = totalHistoryTokens(history);
173
+ const countThresholdHit = history.length > summarizeOlderThan;
174
+ const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens;
175
+ if (!countThresholdHit && !tokenThresholdHit) {
176
+ return { value: ir, mutations: [], historyTokensTotal };
177
+ }
178
+ if (history.length > keepRecent) {
179
+ const cutIndex = history.length - keepRecent;
180
+ const old = history.slice(0, cutIndex);
181
+ const recent = history.slice(cutIndex);
182
+ const userTurns = old.filter((m) => m.role === "user");
183
+ const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
184
+ const oldTokens = totalHistoryTokens(old);
185
+ const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
186
+ const summary = {
187
+ role: "system",
188
+ content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
189
+ };
190
+ return {
191
+ value: { ...ir, history: [summary, ...recent] },
192
+ mutations: [
193
+ {
194
+ id: `compress-history-${old.length}`,
195
+ source: "static_pass",
196
+ passName: "compress_history",
197
+ description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
198
+ }
199
+ ],
200
+ historyTokensTotal
201
+ };
202
+ }
203
+ if (tokenThresholdHit) {
204
+ let fattestIdx = -1;
205
+ let fattestTokens = 0;
206
+ for (let i = 0; i < history.length; i++) {
207
+ const m = history[i];
208
+ if (!m || typeof m.content !== "string") continue;
209
+ const t = countTokens(m.content);
210
+ if (t > fattestTokens) {
211
+ fattestTokens = t;
212
+ fattestIdx = i;
146
213
  }
147
- ]
148
- };
214
+ }
215
+ const FAT_DOMINANCE_FLOOR = 0.3;
216
+ const fattest = fattestIdx >= 0 ? history[fattestIdx] : void 0;
217
+ if (fattest && historyTokensTotal > 0 && fattestTokens / historyTokensTotal >= FAT_DOMINANCE_FLOOR) {
218
+ const firstLine = fattest.content.split("\n")[0]?.slice(0, 200) ?? "";
219
+ const newContent = `[Earlier ${fattest.role} message content omitted: ~${fattestTokens} tokens. Preview: "${firstLine}"]`;
220
+ const newHistory = history.slice();
221
+ newHistory[fattestIdx] = { ...fattest, content: newContent };
222
+ return {
223
+ value: { ...ir, history: newHistory },
224
+ mutations: [
225
+ {
226
+ id: `compress-fat-message-${fattestIdx}`,
227
+ source: "static_pass",
228
+ passName: "compress_history",
229
+ description: `Replaced fat ${fattest.role} message #${fattestIdx} content (~${fattestTokens} of ${historyTokensTotal} tokens, ${Math.round(fattestTokens / historyTokensTotal * 100)}% of history) with summary stub \u2014 token threshold ${summarizeAboveTokens} exceeded (history.length ${history.length} <= keepRecent ${keepRecent}, slice not possible)`
230
+ }
231
+ ],
232
+ historyTokensTotal
233
+ };
234
+ }
235
+ }
236
+ return { value: ir, mutations: [], historyTokensTotal };
149
237
  }
150
238
  function passApplyCliffs(ir, profile, estimatedInputTokens) {
151
239
  const mutations = [];
152
240
  const hints = { qualityWarning: [] };
153
241
  let nextIR = ir;
242
+ const sequentialMode = nextIR.constraints?.toolOrchestration === "sequential";
154
243
  for (const cliff of profile.cliffs) {
244
+ if (sequentialMode && cliff.reason.includes("L-040")) {
245
+ continue;
246
+ }
155
247
  let triggered = false;
156
248
  switch (cliff.metric) {
157
249
  case "input_tokens":
@@ -374,10 +466,16 @@ function lower(ir, profile, hints = {}) {
374
466
  }
375
467
  function lowerAnthropic(ir, profile, hints) {
376
468
  const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
377
- const messages = buildAnthropicMessages(ir.history ?? [], ir.currentTurn);
469
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
470
+ const policy = ir.historyCachePolicy;
471
+ const markIndex = resolveHistoryMarkIndex(history.length, policy);
472
+ const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
378
473
  const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
379
474
  const cacheableTokens = computeCacheableTokens(systemBlocks);
380
- const cacheSavings = cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
475
+ const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
476
+ const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
477
+ const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
478
+ const toolChoice = hints.wireOverrides?.parallelToolCalls === false && tools && tools.length > 0 ? { type: "auto", disable_parallel_tool_use: true } : void 0;
381
479
  return {
382
480
  request: {
383
481
  provider: "anthropic",
@@ -385,10 +483,16 @@ function lowerAnthropic(ir, profile, hints) {
385
483
  system: systemBlocks,
386
484
  messages,
387
485
  tools,
388
- max_tokens: hints.forceTerseOutput ? 200 : Math.min(profile.maxOutputTokens, 4096)
486
+ // alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
487
+ // floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
488
+ // Profile is the single source of truth; consumers wanting a tighter
489
+ // budget can pass providerOverrides.anthropic.max_tokens explicitly.
490
+ max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens,
491
+ tool_choice: toolChoice
389
492
  },
390
493
  diagnostics: {
391
494
  cacheableTokens,
495
+ historyCacheableTokens,
392
496
  estimatedCacheSavingsUsd: cacheSavings
393
497
  }
394
498
  };
@@ -421,17 +525,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
421
525
  }
422
526
  return blocks;
423
527
  }
424
- function buildAnthropicMessages(history, currentTurn) {
528
+ function buildAnthropicMessages(history, currentTurn, markIndex) {
425
529
  const out = [];
426
- for (const m of history) {
530
+ for (let i = 0; i < history.length; i++) {
531
+ const m = history[i];
427
532
  if (m.role === "system") continue;
428
- out.push({ role: m.role, content: m.parts ?? m.content });
533
+ const shouldMark = i === markIndex;
534
+ out.push({
535
+ role: m.role,
536
+ content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
537
+ });
429
538
  }
430
539
  if (currentTurn && currentTurn.role !== "system") {
431
540
  out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
432
541
  }
433
542
  return out;
434
543
  }
544
+ function attachAnthropicCacheControl(m) {
545
+ if (Array.isArray(m.parts) && m.parts.length > 0) {
546
+ const blocks = m.parts;
547
+ const last = blocks[blocks.length - 1];
548
+ const withMarker = {
549
+ ...last,
550
+ cache_control: { type: "ephemeral" }
551
+ };
552
+ return [...blocks.slice(0, -1), withMarker];
553
+ }
554
+ return [
555
+ {
556
+ type: "text",
557
+ text: m.content,
558
+ cache_control: { type: "ephemeral" }
559
+ }
560
+ ];
561
+ }
562
+ function resolveHistoryMarkIndex(historyLen, policy) {
563
+ if (!policy || policy.strategy === "none") return -1;
564
+ if (historyLen === 0) return -1;
565
+ if (policy.strategy === "all-but-latest") {
566
+ return historyLen - 1;
567
+ }
568
+ const idx = historyLen - 1 - policy.suffix;
569
+ return idx >= 0 ? idx : -1;
570
+ }
571
+ function sumHistoryTokens(history, throughIndex) {
572
+ let total = 0;
573
+ for (let i = 0; i <= throughIndex && i < history.length; i++) {
574
+ const m = history[i];
575
+ if (m.role === "system") continue;
576
+ if (Array.isArray(m.parts)) {
577
+ for (const p of m.parts) {
578
+ if (typeof p.text === "string") total += countTokens(p.text);
579
+ }
580
+ } else if (typeof m.content === "string") {
581
+ total += countTokens(m.content);
582
+ }
583
+ }
584
+ return total;
585
+ }
435
586
  function toAnthropicTools(tools) {
436
587
  return tools.map((t) => ({
437
588
  name: t.name,
@@ -466,6 +617,9 @@ function lowerGoogle(ir, profile, hints) {
466
617
  const minTokens = profile.lowering.cache.minTokens ?? 4096;
467
618
  const meetsMin = cacheableTokens >= minTokens;
468
619
  const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
620
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
621
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
622
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
469
623
  return {
470
624
  request: {
471
625
  provider: "google",
@@ -477,6 +631,7 @@ function lowerGoogle(ir, profile, hints) {
477
631
  },
478
632
  diagnostics: {
479
633
  cacheableTokens: meetsMin ? cacheableTokens : 0,
634
+ historyCacheableTokens,
480
635
  estimatedCacheSavingsUsd: cacheSavings
481
636
  }
482
637
  };
@@ -524,6 +679,10 @@ function lowerOpenAI(ir, profile, hints) {
524
679
  content: ir.currentTurn.parts ?? ir.currentTurn.content
525
680
  });
526
681
  }
682
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
683
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
684
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
685
+ const openaiParallelToolCalls = hints.wireOverrides?.parallelToolCalls === false && ir.tools && ir.tools.length > 0 ? false : void 0;
527
686
  return {
528
687
  request: {
529
688
  provider: "openai",
@@ -531,9 +690,14 @@ function lowerOpenAI(ir, profile, hints) {
531
690
  messages,
532
691
  tools: ir.tools && ir.tools.length > 0 ? toOpenAITools(ir.tools) : void 0,
533
692
  response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
534
- reasoning_effort: hints.forceTerseOutput ? "low" : void 0
693
+ reasoning_effort: hints.forceTerseOutput ? "low" : void 0,
694
+ parallel_tool_calls: openaiParallelToolCalls
535
695
  },
536
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
696
+ diagnostics: {
697
+ cacheableTokens: 0,
698
+ historyCacheableTokens,
699
+ estimatedCacheSavingsUsd: 0
700
+ }
537
701
  };
538
702
  }
539
703
  function toOpenAITools(tools) {
@@ -560,6 +724,9 @@ function lowerDeepSeek(ir, profile) {
560
724
  content: ir.currentTurn.parts ?? ir.currentTurn.content
561
725
  });
562
726
  }
727
+ const history = (ir.history ?? []).filter((m) => m.role !== "system");
728
+ const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
729
+ const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
563
730
  return {
564
731
  request: {
565
732
  provider: "deepseek",
@@ -574,7 +741,11 @@ function lowerDeepSeek(ir, profile) {
574
741
  }
575
742
  })) : void 0
576
743
  },
577
- diagnostics: { cacheableTokens: 0, estimatedCacheSavingsUsd: 0 }
744
+ diagnostics: {
745
+ cacheableTokens: 0,
746
+ historyCacheableTokens,
747
+ estimatedCacheSavingsUsd: 0
748
+ }
578
749
  };
579
750
  }
580
751
  function sortSections(sections) {
@@ -597,6 +768,332 @@ function setNestedField(obj, path, value) {
597
768
  cursor[parts[parts.length - 1]] = value;
598
769
  }
599
770
 
771
+ // src/archetype-perf-brain.ts
772
+ function isPerfRow(x) {
773
+ if (!x || typeof x !== "object") return false;
774
+ const r = x;
775
+ return typeof r.model_id === "string" && typeof r.archetype === "string" && typeof r.perf_score === "number";
776
+ }
777
+ function mapRowsToPerfMap(rows) {
778
+ const out = /* @__PURE__ */ new Map();
779
+ for (const row of rows) {
780
+ if (!isPerfRow(row)) continue;
781
+ const existing = out.get(row.model_id) ?? {};
782
+ existing[row.archetype] = row.perf_score;
783
+ out.set(row.model_id, existing);
784
+ }
785
+ return out;
786
+ }
787
+ function mapRowsToNMap(rows) {
788
+ const out = /* @__PURE__ */ new Map();
789
+ for (const row of rows) {
790
+ if (!isPerfRow(row)) continue;
791
+ if (typeof row.n !== "number") continue;
792
+ const existing = out.get(row.model_id) ?? {};
793
+ existing[row.archetype] = row.n;
794
+ out.set(row.model_id, existing);
795
+ }
796
+ return out;
797
+ }
798
+ function bundledArchetypePerf() {
799
+ const out = /* @__PURE__ */ new Map();
800
+ for (const profile of allProfiles()) {
801
+ if (profile.archetypePerf) out.set(profile.id, profile.archetypePerf);
802
+ }
803
+ return out;
804
+ }
805
+ function bundledArchetypePerfN() {
806
+ return /* @__PURE__ */ new Map();
807
+ }
808
+ var loadArchetypePerfFromBrain = createBrainQueryCache({
809
+ table: "kgauto_archetype_perf",
810
+ mapRows: mapRowsToPerfMap,
811
+ bundledFallback: bundledArchetypePerf
812
+ });
813
+ var loadArchetypePerfNFromBrain = createBrainQueryCache(
814
+ {
815
+ table: "kgauto_archetype_perf",
816
+ mapRows: mapRowsToNMap,
817
+ bundledFallback: bundledArchetypePerfN
818
+ }
819
+ );
820
+ var MEASURED_GROUNDING_MIN_N = 10;
821
+ function getArchetypePerfScore(modelId, archetype) {
822
+ const score = loadArchetypePerfFromBrain().get(modelId)?.[archetype] ?? 5;
823
+ const n = loadArchetypePerfNFromBrain().get(modelId)?.[archetype] ?? 0;
824
+ const grounding = n >= MEASURED_GROUNDING_MIN_N ? "measured" : "judgment";
825
+ return { score, n, grounding };
826
+ }
827
+
828
+ // src/advisor.ts
829
+ var QUALITY_FLOOR_FOR_RECOMMENDATION = 6;
830
+ var TIER_DOWN_COST_RATIO = 0.5;
831
+ var COST_MISMATCHED_CHOSEN_SCORE_CEILING = 7;
832
+ function runAdvisor(ir, result, profile, policy, phase2) {
833
+ const out = [];
834
+ out.push(...detectCachingOff(ir, profile));
835
+ out.push(...detectSingleChunkSystem(ir, profile));
836
+ out.push(...detectToolBloat(ir, result));
837
+ out.push(...detectHistoryUncached(ir, profile));
838
+ out.push(...detectSingleModelArray(ir, policy));
839
+ if (policy?.posture !== "locked") {
840
+ out.push(...detectCostMismatchedArchetype(ir, profile, phase2));
841
+ out.push(...detectModelStaleEvidence(ir, profile));
842
+ out.push(...detectTierDown(ir, profile, phase2));
843
+ }
844
+ if (!translatorClearedToolCallCliff(phase2)) {
845
+ out.push(...detectArchetypePerfFloorBreach(ir, profile));
846
+ }
847
+ return out;
848
+ }
849
+ function translatorClearedToolCallCliff(phase2) {
850
+ const rewrites = phase2?.sectionRewritesApplied;
851
+ if (!rewrites || rewrites.length === 0) return false;
852
+ for (const rw of rewrites) {
853
+ if (rw.kind === "tool_call_contract") return true;
854
+ }
855
+ return false;
856
+ }
857
+ function detectCachingOff(ir, profile) {
858
+ if (profile.provider !== "anthropic") return [];
859
+ const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
860
+ if (totalChars < 2e3) return [];
861
+ const anyCacheable = ir.sections.some((s) => s.cacheable === true);
862
+ if (anyCacheable) return [];
863
+ return [
864
+ {
865
+ level: "warn",
866
+ code: "caching-off-on-claude",
867
+ message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
868
+ suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
869
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
870
+ }
871
+ ];
872
+ }
873
+ function detectSingleChunkSystem(ir, profile) {
874
+ if (profile.provider !== "anthropic") return [];
875
+ if (ir.sections.length !== 1) return [];
876
+ const only = ir.sections[0];
877
+ if (!only || only.text.length <= 1e3) return [];
878
+ return [
879
+ {
880
+ level: "info",
881
+ code: "single-chunk-system",
882
+ message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
883
+ suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
884
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
885
+ }
886
+ ];
887
+ }
888
+ function detectToolBloat(ir, result) {
889
+ const SHORT_OUTPUT = /* @__PURE__ */ new Set([
890
+ "classify",
891
+ "extract",
892
+ "summarize",
893
+ "transform",
894
+ "critique"
895
+ ]);
896
+ if (!ir.tools || ir.tools.length === 0) return [];
897
+ const toolsKept = result.diagnostics.toolsKept;
898
+ if (toolsKept <= 10) return [];
899
+ if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
900
+ return [
901
+ {
902
+ level: "warn",
903
+ code: "tool-bloat",
904
+ message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
905
+ suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
906
+ docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
907
+ }
908
+ ];
909
+ }
910
+ function detectHistoryUncached(ir, profile) {
911
+ if (profile.provider !== "anthropic") return [];
912
+ if (!ir.history || ir.history.length < 2) return [];
913
+ if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
914
+ return [];
915
+ }
916
+ return [
917
+ {
918
+ level: "warn",
919
+ code: "history-uncached-on-claude",
920
+ message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
921
+ suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
922
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
923
+ }
924
+ ];
925
+ }
926
+ function detectSingleModelArray(ir, policy) {
927
+ if (ir.models.length !== 1) return [];
928
+ if (policy?.posture === "locked") return [];
929
+ const only = ir.models[0];
930
+ return [
931
+ {
932
+ level: "warn",
933
+ code: "single-model-array",
934
+ message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
935
+ suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
936
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
937
+ }
938
+ ];
939
+ }
940
+ function detectCostMismatchedArchetype(ir, profile, phase2) {
941
+ if (!phase2 || phase2.fallbackChain.length === 0) return [];
942
+ if (!phase2.profileResolver) return [];
943
+ const archetype = ir.intent.archetype;
944
+ const chosenScore = getArchetypePerfScore(profile.id, archetype);
945
+ const chosenHasRoomToGrow = chosenScore.grounding === "judgment" || chosenScore.score < COST_MISMATCHED_CHOSEN_SCORE_CEILING;
946
+ if (!chosenHasRoomToGrow) return [];
947
+ let bestAlt = null;
948
+ for (const altId of phase2.fallbackChain) {
949
+ const altProfile = phase2.profileResolver(altId);
950
+ if (!altProfile) continue;
951
+ if (altProfile.id === profile.id) continue;
952
+ const altScore = getArchetypePerfScore(altProfile.id, archetype);
953
+ if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
954
+ if (altScore.score < chosenScore.score) continue;
955
+ if (altProfile.costInputPer1m >= profile.costInputPer1m) continue;
956
+ if (!bestAlt || altScore.score > bestAlt.score.score || altScore.score === bestAlt.score.score && altProfile.costInputPer1m < bestAlt.profile.costInputPer1m) {
957
+ bestAlt = { id: altId, profile: altProfile, score: altScore };
958
+ }
959
+ }
960
+ if (!bestAlt) return [];
961
+ const tierDownWouldFire = bestAlt.score.grounding === "measured" && bestAlt.profile.costInputPer1m <= profile.costInputPer1m * TIER_DOWN_COST_RATIO;
962
+ if (tierDownWouldFire) return [];
963
+ const chosenGrounding = chosenScore.grounding === "judgment" ? `archetypePerf.${archetype}=judgment` : `archetypePerf.${archetype}=${chosenScore.score}`;
964
+ const altGrounding = bestAlt.score.grounding === "measured" ? `archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}` : `archetypePerf.${archetype}=${bestAlt.score.score}, judgment`;
965
+ return [
966
+ {
967
+ level: "warn",
968
+ code: "cost-mismatched-archetype",
969
+ message: `Cost-mismatched-archetype: target=${profile.id} (${chosenGrounding}) selected for ${archetype}. Alternative ${bestAlt.id} (${altGrounding}) is cheaper ($${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} per 1M) at equal-or-better quality.`,
970
+ suggestion: `Consider declaring \`${bestAlt.id}\` as the primary model for this archetype, or relax to posture='open' to let kgauto select among the chain. If the chosen model is required for compliance/brand reasons, set \`policy.posture = 'locked'\` to silence this rule.`,
971
+ recommendationType: profile.provider === bestAlt.profile.provider ? "tier-down" : "model-swap",
972
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
973
+ }
974
+ ];
975
+ }
976
+ function detectModelStaleEvidence(ir, profile) {
977
+ if (!isBrainQueryActiveFor("kgauto_archetype_perf")) return [];
978
+ const archetype = ir.intent.archetype;
979
+ const chosen = getArchetypePerfScore(profile.id, archetype);
980
+ if (chosen.grounding !== "judgment") return [];
981
+ return [
982
+ {
983
+ level: "info",
984
+ code: "model-stale-evidence",
985
+ message: `Model-stale-evidence: target=${profile.id} archetype=${archetype} is judgment-grounded (n=${chosen.n}) despite brain-query mode being active. Measurement substrate is wired but the brain hasn't accumulated >=10 outcomes for this (model, archetype) tuple yet \u2014 routing decisions remain pre-measured for this slot.`,
986
+ suggestion: "Verify that `record()` is being called on every call() outcome with the appropriate `actualModel` and `mutationsApplied` fields. Once the brain accumulates n>=10 rows on this tuple, the score promotes from judgment to measured automatically (5-min SWR cache). No code change required from your side \u2014 this is the substrate signaling the gap.",
987
+ recommendationType: "prompt-fix",
988
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
989
+ }
990
+ ];
991
+ }
992
+ function detectTierDown(ir, profile, phase2) {
993
+ if (!phase2 || phase2.fallbackChain.length === 0) return [];
994
+ if (!phase2.profileResolver) return [];
995
+ const archetype = ir.intent.archetype;
996
+ const chosenScore = getArchetypePerfScore(profile.id, archetype);
997
+ const chosenCost = profile.costInputPer1m;
998
+ let bestAlt = null;
999
+ for (const altId of phase2.fallbackChain) {
1000
+ const altProfile = phase2.profileResolver(altId);
1001
+ if (!altProfile) continue;
1002
+ if (altProfile.id === profile.id) continue;
1003
+ const altScore = getArchetypePerfScore(altProfile.id, archetype);
1004
+ if (altScore.grounding !== "measured") continue;
1005
+ if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
1006
+ if (altScore.score < chosenScore.score) continue;
1007
+ if (altProfile.costInputPer1m > chosenCost * TIER_DOWN_COST_RATIO) continue;
1008
+ if (!bestAlt || altProfile.costInputPer1m < bestAlt.profile.costInputPer1m || altProfile.costInputPer1m === bestAlt.profile.costInputPer1m && altScore.score > bestAlt.score.score) {
1009
+ bestAlt = { id: altId, profile: altProfile, score: altScore };
1010
+ }
1011
+ }
1012
+ if (!bestAlt) return [];
1013
+ const chosenDesc = chosenScore.grounding === "measured" ? `archetypePerf.${archetype}=${chosenScore.score} (measured, n=${chosenScore.n})` : `archetypePerf.${archetype}=${chosenScore.score} (${chosenScore.grounding})`;
1014
+ return [
1015
+ {
1016
+ level: "warn",
1017
+ code: "tier-down",
1018
+ message: `Tier-down: target=${profile.id} (${chosenDesc}) selected for ${archetype}. Brain shows ${bestAlt.id} delivers equal-or-better quality (archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}) at $${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} per 1M vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} \u2014 a measured tier-down opportunity.`,
1019
+ suggestion: `Move \`${bestAlt.id}\` to primary for this archetype. The brain has n=${bestAlt.score.n} measured outcomes backing the recommendation; this is data, not opinion. If posture='locked' is required (compliance/brand promise), set it explicitly to silence this rule.`,
1020
+ recommendationType: "tier-down",
1021
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1022
+ }
1023
+ ];
1024
+ }
1025
+ function detectArchetypePerfFloorBreach(ir, profile) {
1026
+ const compat = getModelCompatibility(profile.id, {
1027
+ archetype: ir.intent.archetype,
1028
+ toolOrchestration: ir.constraints?.toolOrchestration
1029
+ });
1030
+ if (compat.status === "compatible") return [];
1031
+ if (compat.status === "requires-adapter") {
1032
+ return [
1033
+ {
1034
+ level: "warn",
1035
+ code: "archetype-perf-floor-breach",
1036
+ message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}). A known adapter would lift it: ${compat.adapter.parameter}=${compat.adapter.value}. ${compat.adapter.consequence}`,
1037
+ suggestion: `Pass \`ir.constraints.${compat.adapter.parameter} = '${compat.adapter.value}'\` for this call, OR pick a model whose archetypePerf for ${ir.intent.archetype} already clears the floor (call \`getModelCompatibility(modelId, { archetype: '${ir.intent.archetype}' })\` to check). Estimated post-adapter score: ${compat.archetypePerfWithAdapter}/10.`,
1038
+ recommendationType: "prompt-fix",
1039
+ suggestedAdaptation: compat.adapter,
1040
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1041
+ }
1042
+ ];
1043
+ }
1044
+ return [
1045
+ {
1046
+ level: "critical",
1047
+ code: "archetype-perf-floor-breach",
1048
+ message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}) and no known adapter would lift it. ${compat.reason}`,
1049
+ suggestion: `Swap to a model whose archetypePerf for ${ir.intent.archetype} clears the floor. Use \`getModelCompatibility(candidateId, { archetype: '${ir.intent.archetype}' })\` to vet candidates, or \`getDefaultFallbackChain({ archetype: '${ir.intent.archetype}', posture: 'open' })\` for a library-picked chain that respects the floor by construction.`,
1050
+ recommendationType: "model-swap",
1051
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
1052
+ }
1053
+ ];
1054
+ }
1055
+
1056
+ // src/translator.ts
1057
+ var TRANSLATOR_FLOOR = ARCHETYPE_FLOOR_DEFAULT;
1058
+ var RULE_SEQUENTIAL_TOOL_CLIFF = "sequential-tool-cliff-below-floor";
1059
+ var SEQUENTIAL_TOOL_PREAMBLE = "IMPORTANT: Use one tool call per response. Wait for the tool result before deciding the next tool. Do NOT batch tool calls in parallel.";
1060
+ function applySectionRewrites(args) {
1061
+ const { ir, profile, archetype } = args;
1062
+ if (!Array.isArray(ir.sections) || ir.sections.length === 0) {
1063
+ return { rewrittenIR: ir, rewrites: [] };
1064
+ }
1065
+ if (!profile.archetypePerf) {
1066
+ return { rewrittenIR: ir, rewrites: [] };
1067
+ }
1068
+ const archetypeScore = profile.archetypePerf[archetype];
1069
+ const cliffFires = typeof archetypeScore === "number" && archetypeScore < TRANSLATOR_FLOOR;
1070
+ if (!cliffFires) {
1071
+ return { rewrittenIR: ir, rewrites: [] };
1072
+ }
1073
+ const rewrites = [];
1074
+ const newSections = ir.sections.map((section) => {
1075
+ if (section.kind !== "tool_call_contract") return section;
1076
+ const originalText = section.text;
1077
+ const transformedText = `${SEQUENTIAL_TOOL_PREAMBLE}
1078
+
1079
+ ${originalText}`;
1080
+ rewrites.push({
1081
+ sectionId: section.id,
1082
+ kind: "tool_call_contract",
1083
+ rule: RULE_SEQUENTIAL_TOOL_CLIFF,
1084
+ originalText,
1085
+ transformedText,
1086
+ wireOverrides: { parallelToolCalls: false }
1087
+ });
1088
+ return { ...section, text: transformedText };
1089
+ });
1090
+ if (rewrites.length === 0) {
1091
+ return { rewrittenIR: ir, rewrites: [] };
1092
+ }
1093
+ const rewrittenIR = { ...ir, sections: newSections };
1094
+ return { rewrittenIR, rewrites };
1095
+ }
1096
+
600
1097
  // src/compile.ts
601
1098
  var counter = 0;
602
1099
  function makeHandle() {
@@ -612,7 +1109,8 @@ function compile(ir, opts = {}) {
612
1109
  threshold: opts.toolRelevanceThreshold
613
1110
  });
614
1111
  const compressed = passCompressHistory(toolFiltered.value, {
615
- summarizeOlderThan: opts.compressHistoryAfter
1112
+ summarizeOlderThan: opts.compressHistoryAfter,
1113
+ summarizeAboveTokens: opts.compressHistoryAboveTokens
616
1114
  });
617
1115
  let workingIR = compressed.value;
618
1116
  const accumulatedMutations = [
@@ -639,14 +1137,89 @@ function compile(ir, opts = {}) {
639
1137
  const cliffs = passApplyCliffs(workingIR, profile, inputTokens);
640
1138
  workingIR = cliffs.value.ir;
641
1139
  accumulatedMutations.push(...cliffs.mutations);
1140
+ const translated = applySectionRewrites({
1141
+ ir: workingIR,
1142
+ profile,
1143
+ archetype: ir.intent.archetype
1144
+ });
1145
+ workingIR = translated.rewrittenIR;
1146
+ const sectionRewritesApplied = translated.rewrites;
1147
+ let wireOverrides;
1148
+ for (const rw of sectionRewritesApplied) {
1149
+ if (!rw.wireOverrides) continue;
1150
+ if (!wireOverrides) wireOverrides = {};
1151
+ if (rw.wireOverrides.parallelToolCalls !== void 0) {
1152
+ wireOverrides.parallelToolCalls = rw.wireOverrides.parallelToolCalls;
1153
+ }
1154
+ }
1155
+ for (const rw of sectionRewritesApplied) {
1156
+ accumulatedMutations.push({
1157
+ id: `translator:${rw.rule}:${rw.sectionId}`,
1158
+ source: "translator",
1159
+ passName: "translator",
1160
+ description: `Rewrote section "${rw.sectionId}" (kind=${rw.kind}) via rule "${rw.rule}".`
1161
+ });
1162
+ }
642
1163
  const lowered = lower(workingIR, profile, {
643
1164
  forceThinkingZero: cliffs.value.loweringHints.forceThinkingZero,
644
- forceTerseOutput: cliffs.value.loweringHints.forceTerseOutput
1165
+ forceTerseOutput: cliffs.value.loweringHints.forceTerseOutput,
1166
+ wireOverrides
645
1167
  });
646
1168
  validateFinalFit(workingIR, profile, inputTokens);
647
1169
  const handle = makeHandle();
648
1170
  const finalShape = computeShape(workingIR, inputTokens);
649
1171
  const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
1172
+ const diagnostics = {
1173
+ sectionsKept: workingIR.sections.length,
1174
+ sectionsDropped: ir.sections.length - workingIR.sections.length,
1175
+ toolsKept: workingIR.tools?.length ?? 0,
1176
+ toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
1177
+ historyKept: workingIR.history?.length ?? 0,
1178
+ historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
1179
+ cacheableTokens: lowered.diagnostics.cacheableTokens,
1180
+ estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
1181
+ historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
1182
+ historyTokensTotal: compressed.historyTokensTotal,
1183
+ // alpha.20 E3: mirror the consumer's declared mode for Glass-Box +
1184
+ // brain observability. Undefined when not declared (pre-alpha.20).
1185
+ toolOrchestration: ir.constraints?.toolOrchestration
1186
+ };
1187
+ if (ir.intent.archetype === "hunt" && ir.constraints?.toolOrchestration === "sequential") {
1188
+ accumulatedMutations.push({
1189
+ id: "sequential-mode-chain-selected",
1190
+ source: "tool_orchestration",
1191
+ passName: "compile",
1192
+ description: "ir.constraints.toolOrchestration='sequential' selected the DeepSeek-tier-0 hunt chain overlay (L-040 parallel-tool cliff doesn't apply at single-step granularity)."
1193
+ });
1194
+ }
1195
+ const phase2ProfileResolver = opts.profileResolver ? (id) => {
1196
+ try {
1197
+ return opts.profileResolver(id);
1198
+ } catch {
1199
+ return void 0;
1200
+ }
1201
+ } : tryGetProfile;
1202
+ const advisories = runAdvisor(
1203
+ ir,
1204
+ {
1205
+ target: profile.id,
1206
+ provider: profile.provider,
1207
+ tokensIn: inputTokens,
1208
+ diagnostics
1209
+ },
1210
+ profile,
1211
+ opts.policy,
1212
+ {
1213
+ fallbackChain,
1214
+ profileResolver: phase2ProfileResolver,
1215
+ // alpha.29 — feed translator rewrites to the advisor so the
1216
+ // `archetype-perf-floor-breach` rule can suppress when the translator
1217
+ // already cleared the cliff for the same archetype. Without this,
1218
+ // both the rewrite AND the advisory fire — noisy, and the advisory
1219
+ // would mislead consumers into thinking the cliff is unaddressed.
1220
+ sectionRewritesApplied
1221
+ }
1222
+ );
650
1223
  return {
651
1224
  handle,
652
1225
  target: profile.id,
@@ -656,16 +1229,10 @@ function compile(ir, opts = {}) {
656
1229
  estimatedCostUsd: target.estimatedCostUsd,
657
1230
  mutationsApplied: accumulatedMutations,
658
1231
  fallbackChain,
659
- diagnostics: {
660
- sectionsKept: workingIR.sections.length,
661
- sectionsDropped: ir.sections.length - workingIR.sections.length,
662
- toolsKept: workingIR.tools?.length ?? 0,
663
- toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
664
- historyKept: workingIR.history?.length ?? 0,
665
- historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
666
- cacheableTokens: lowered.diagnostics.cacheableTokens,
667
- estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
668
- }
1232
+ advisories,
1233
+ diagnostics,
1234
+ sectionRewritesApplied,
1235
+ wireOverrides
669
1236
  };
670
1237
  }
671
1238
  function validateIR(ir) {
@@ -706,14 +1273,92 @@ function validateFinalFit(ir, profile, tokens) {
706
1273
  }
707
1274
  }
708
1275
 
1276
+ // src/pricing-brain.ts
1277
+ function isPricingRow(x) {
1278
+ if (!x || typeof x !== "object") return false;
1279
+ const r = x;
1280
+ return typeof r.model_id === "string" && typeof r.cost_input_per_1m === "number" && typeof r.cost_output_per_1m === "number" && typeof r.valid_from === "string";
1281
+ }
1282
+ function mapRowsToPricing(rows) {
1283
+ const out = [];
1284
+ for (const row of rows) {
1285
+ if (!isPricingRow(row)) continue;
1286
+ out.push({
1287
+ modelId: row.model_id,
1288
+ costInputPer1m: row.cost_input_per_1m,
1289
+ costOutputPer1m: row.cost_output_per_1m,
1290
+ cacheInputPer1m: row.cache_input_per_1m ?? void 0,
1291
+ cacheCreationPer1m: row.cache_creation_per_1m ?? void 0,
1292
+ validFrom: Date.parse(row.valid_from),
1293
+ validUntil: row.valid_until == null ? void 0 : Date.parse(row.valid_until),
1294
+ source: row.source ?? void 0
1295
+ });
1296
+ }
1297
+ return out;
1298
+ }
1299
+ function bundledPricing() {
1300
+ const out = [];
1301
+ for (const profile of allProfiles()) {
1302
+ out.push({
1303
+ modelId: profile.id,
1304
+ costInputPer1m: profile.costInputPer1m,
1305
+ costOutputPer1m: profile.costOutputPer1m,
1306
+ cacheInputPer1m: profile.lowering.cache.discount !== void 0 && profile.lowering.cache.discount > 0 ? profile.costInputPer1m * profile.lowering.cache.discount : void 0,
1307
+ validFrom: 0,
1308
+ validUntil: void 0,
1309
+ source: "profile_seed"
1310
+ });
1311
+ }
1312
+ return out;
1313
+ }
1314
+ var loadPricingFromBrain = createBrainQueryCache({
1315
+ table: "kgauto_pricing",
1316
+ mapRows: mapRowsToPricing,
1317
+ bundledFallback: bundledPricing
1318
+ });
1319
+ function resolvePricingAt(modelId, at = /* @__PURE__ */ new Date()) {
1320
+ const ts = at.getTime();
1321
+ const all = loadPricingFromBrain();
1322
+ let best;
1323
+ for (const row of all) {
1324
+ if (row.modelId !== modelId) continue;
1325
+ if (row.validFrom > ts) continue;
1326
+ if (row.validUntil !== void 0 && row.validUntil <= ts) continue;
1327
+ if (!best || row.validFrom > best.validFrom) best = row;
1328
+ }
1329
+ return best;
1330
+ }
1331
+
709
1332
  // src/brain.ts
710
1333
  var activeConfig;
711
1334
  function configureBrain(config) {
712
1335
  const endpoint = config.endpoint.replace(/\/outcomes\/?$/, "");
713
1336
  activeConfig = { ...config, endpoint };
1337
+ const bq = config.brainQuery ?? {};
1338
+ const enabledTables = /* @__PURE__ */ new Set();
1339
+ if (bq.chains !== false) enabledTables.add("kgauto_chains");
1340
+ if (bq.perf !== false) enabledTables.add("kgauto_archetype_perf");
1341
+ if (bq.pricing !== false) enabledTables.add("kgauto_pricing");
1342
+ if (bq.models !== false) {
1343
+ enabledTables.add("kgauto_models");
1344
+ enabledTables.add("kgauto_aliases");
1345
+ }
1346
+ if (enabledTables.size === 0) {
1347
+ configureBrainQuery(void 0);
1348
+ return;
1349
+ }
1350
+ configureBrainQuery({
1351
+ endpoint,
1352
+ configEndpoint: bq.configEndpoint,
1353
+ ttlMs: bq.cacheTtlMs ?? 3e5,
1354
+ fetchImpl: config.fetchImpl ?? fetch,
1355
+ enabledTables,
1356
+ onError: config.onError
1357
+ });
714
1358
  }
715
1359
  function clearBrain() {
716
1360
  activeConfig = void 0;
1361
+ configureBrainQuery(void 0);
717
1362
  }
718
1363
  var compileRegistry = /* @__PURE__ */ new Map();
719
1364
  var REGISTRY_MAX_ENTRIES = 1e4;
@@ -740,6 +1385,9 @@ function registerCompile(appId, archetype, ir, result) {
740
1385
  tokens
741
1386
  );
742
1387
  const shapeKey = `${shape.contextBucket}-${shape.toolCountBucket}-${shape.historyDepth}-${shape.outputMode}`;
1388
+ const toolsCount = result.diagnostics.toolsKept;
1389
+ const historyDepth = Array.isArray(ir.history) ? ir.history.length : 0;
1390
+ const systemPromptChars = estimateSystemPromptChars(ir.sections);
743
1391
  compileRegistry.set(result.handle, {
744
1392
  appId,
745
1393
  archetype,
@@ -749,9 +1397,35 @@ function registerCompile(appId, archetype, ir, result) {
749
1397
  learningKey: learningKey(archetype, result.target, shape),
750
1398
  estimatedTokensIn: tokens,
751
1399
  mutationsApplied: result.mutationsApplied.map((m) => m.id),
752
- startedAt: Date.now()
1400
+ // alpha.30: cache the in-memory advisories so record() can auto-persist
1401
+ // to `compile_outcome_advisories` without consumer-side threading.
1402
+ advisoriesFromCompile: result.advisories ?? [],
1403
+ startedAt: Date.now(),
1404
+ historyCacheableTokens: result.diagnostics.historyCacheableTokens,
1405
+ historyTokensTotal: result.diagnostics.historyTokensTotal,
1406
+ // alpha.20 E3: capture consumer's declared mode for the brain payload.
1407
+ toolOrchestration: result.diagnostics.toolOrchestration,
1408
+ // alpha.28: shape fields for Glass-Box renderer.
1409
+ toolsCount,
1410
+ historyDepth,
1411
+ systemPromptChars,
1412
+ // alpha.29: translator activity — persisted on the brain row so
1413
+ // cross-app aggregates can answer "Sonnet narration rule fired N times,
1414
+ // outcome quality lifted to M."
1415
+ sectionRewritesApplied: result.sectionRewritesApplied
753
1416
  });
754
1417
  }
1418
+ function estimateSystemPromptChars(sections) {
1419
+ if (!Array.isArray(sections) || sections.length === 0) return void 0;
1420
+ let total = 0;
1421
+ for (const s of sections) {
1422
+ if (s && typeof s === "object") {
1423
+ const content = s.content;
1424
+ if (typeof content === "string") total += content.length;
1425
+ }
1426
+ }
1427
+ return total > 0 ? total : void 0;
1428
+ }
755
1429
  async function record(input) {
756
1430
  const reg = compileRegistry.get(input.handle);
757
1431
  if (reg) compileRegistry.delete(input.handle);
@@ -762,11 +1436,22 @@ async function record(input) {
762
1436
  const config = activeConfig;
763
1437
  const fetchFn = config.fetchImpl ?? fetch;
764
1438
  const send = async () => {
1439
+ let outcomeId;
765
1440
  try {
766
1441
  const res = await fetchFn(`${config.endpoint}/outcomes`, {
767
1442
  method: "POST",
768
1443
  headers: {
769
1444
  "Content-Type": "application/json",
1445
+ // alpha.20: request the inserted row back so we can JOIN advisories
1446
+ // to it via outcome_id. PostgREST returns the row when
1447
+ // `Prefer: return=representation` is set; proxies that pass the
1448
+ // header through (the recommended `const row = { ...body }` shape
1449
+ // from OutcomePayload's forward-compat rule) will surface
1450
+ // the row id. Proxies that don't (legacy / hand-rolled shapes)
1451
+ // simply produce no parseable id → secondary advisory POST is
1452
+ // skipped silently. Best-effort — primary outcome row is the
1453
+ // load-bearing write.
1454
+ Prefer: "return=representation",
770
1455
  ...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
771
1456
  },
772
1457
  body: JSON.stringify(payload)
@@ -775,6 +1460,29 @@ async function record(input) {
775
1460
  const text = await res.text().catch(() => "<no body>");
776
1461
  throw new Error(`brain ${res.status}: ${text}`);
777
1462
  }
1463
+ outcomeId = await tryExtractOutcomeId(res);
1464
+ } catch (err) {
1465
+ (config.onError ?? defaultOnError)(err);
1466
+ return;
1467
+ }
1468
+ const advisories = input.advisories ?? reg?.advisoriesFromCompile;
1469
+ if (!advisories || advisories.length === 0) return;
1470
+ if (outcomeId === void 0) return;
1471
+ try {
1472
+ const advisoryPayload = advisories.map((a) => buildAdvisoryRow(outcomeId, a));
1473
+ const res = await fetchFn(`${config.endpoint}/compile_outcome_advisories`, {
1474
+ method: "POST",
1475
+ headers: {
1476
+ "Content-Type": "application/json",
1477
+ Prefer: "return=minimal",
1478
+ ...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
1479
+ },
1480
+ body: JSON.stringify(advisoryPayload)
1481
+ });
1482
+ if (!res.ok) {
1483
+ const text = await res.text().catch(() => "<no body>");
1484
+ throw new Error(`brain advisories ${res.status}: ${text}`);
1485
+ }
778
1486
  } catch (err) {
779
1487
  (config.onError ?? defaultOnError)(err);
780
1488
  }
@@ -792,6 +1500,11 @@ function buildPayload(input, reg) {
792
1500
  const compileTarget = reg?.model;
793
1501
  const actual = input.actualModel ?? compileTarget;
794
1502
  const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
1503
+ const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
1504
+ const costModel = actual;
1505
+ const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
1506
+ const fellOverFrom = input.fellOverFrom ?? requested;
1507
+ const fallbackReason = fellOverFrom ? input.fallbackReason : void 0;
795
1508
  return {
796
1509
  handle: input.handle,
797
1510
  app_id: reg?.appId,
@@ -801,7 +1514,7 @@ function buildPayload(input, reg) {
801
1514
  provider: reg?.provider,
802
1515
  shape_key: reg?.shapeKey,
803
1516
  learning_key: reg?.learningKey,
804
- mutations_applied: reg?.mutationsApplied ?? [],
1517
+ mutations_applied: mutationsApplied,
805
1518
  tokens_in: input.tokensIn,
806
1519
  tokens_out: input.tokensOut,
807
1520
  estimated_tokens_in: reg?.estimatedTokensIn,
@@ -815,9 +1528,118 @@ function buildPayload(input, reg) {
815
1528
  oracle_rationale: input.oracleScore?.rationale,
816
1529
  prompt_preview: input.promptPreview,
817
1530
  response_preview: input.responsePreview,
818
- dialect_version: "v1"
1531
+ dialect_version: "v1",
1532
+ cache_read_input_tokens: input.cacheReadInputTokens,
1533
+ cache_creation_input_tokens: input.cacheCreationInputTokens,
1534
+ cost_usd_actual: costUsdActual,
1535
+ ttft_ms: input.ttftMs,
1536
+ history_cacheable_tokens: reg?.historyCacheableTokens,
1537
+ history_tokens_at_compile: reg?.historyTokensTotal,
1538
+ // alpha.20 E3: mirror consumer's declared tool-orchestration mode so
1539
+ // the brain can measure per-mode model perf separately (DeepSeek in
1540
+ // sequential vs parallel mode is two different stories — L-040).
1541
+ // Null when consumer hadn't adopted the constraint yet.
1542
+ tool_orchestration: reg?.toolOrchestration ?? null,
1543
+ // alpha.28 — Glass-Box renderer substrate (migration 018). All optional;
1544
+ // omitted-undefined PostgREST inserts store NULL → renderer renders "—".
1545
+ finish_reason: input.finishReason,
1546
+ total_ms: input.totalMs ?? input.latencyMs,
1547
+ tools_count: input.toolsCount ?? reg?.toolsCount,
1548
+ history_depth: input.historyDepth ?? reg?.historyDepth,
1549
+ system_prompt_chars: input.systemPromptChars ?? reg?.systemPromptChars,
1550
+ fell_over_from: fellOverFrom,
1551
+ fallback_reason: fallbackReason,
1552
+ // alpha.29 — translator activity (migration 019). Send NULL when no
1553
+ // rewrites fired so the brain's "did the translator do anything?"
1554
+ // queries can use `IS NOT NULL` cleanly.
1555
+ section_rewrites_applied: reg?.sectionRewritesApplied && reg.sectionRewritesApplied.length > 0 ? reg.sectionRewritesApplied : null
1556
+ };
1557
+ }
1558
+ function computeCostUsd(modelId, tokensIn, tokensOut) {
1559
+ if (tokensIn === 0 && tokensOut === 0) return void 0;
1560
+ const brainRow = resolvePricingAt(modelId);
1561
+ if (brainRow && (brainRow.costInputPer1m > 0 || brainRow.costOutputPer1m > 0)) {
1562
+ const inUsd2 = tokensIn / 1e6 * brainRow.costInputPer1m;
1563
+ const outUsd2 = tokensOut / 1e6 * brainRow.costOutputPer1m;
1564
+ return Math.round((inUsd2 + outUsd2) * 1e6) / 1e6;
1565
+ }
1566
+ const profile = tryGetProfile(modelId);
1567
+ if (!profile) return void 0;
1568
+ const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
1569
+ const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
1570
+ return Math.round((inUsd + outUsd) * 1e6) / 1e6;
1571
+ }
1572
+ async function tryExtractOutcomeId(res) {
1573
+ try {
1574
+ const ct = res.headers?.get?.("content-type") ?? "";
1575
+ if (ct && !ct.includes("application/json")) return void 0;
1576
+ if (typeof res.json !== "function") return void 0;
1577
+ const body = await res.json();
1578
+ if (Array.isArray(body) && body.length > 0) {
1579
+ const first = body[0];
1580
+ const id = first?.id;
1581
+ if (typeof id === "number") return id;
1582
+ } else if (body && typeof body === "object") {
1583
+ const id = body.id;
1584
+ if (typeof id === "number") return id;
1585
+ }
1586
+ return void 0;
1587
+ } catch {
1588
+ return void 0;
1589
+ }
1590
+ }
1591
+ function buildAdvisoryRow(outcomeId, a) {
1592
+ return {
1593
+ outcome_id: outcomeId,
1594
+ code: a.code,
1595
+ level: a.level,
1596
+ message: a.message,
1597
+ ...a.recommendationType ? { recommendation_type: a.recommendationType } : {},
1598
+ ...a.suggestion ? { suggestion: a.suggestion } : {},
1599
+ ...a.docsUrl ? { docs_url: a.docsUrl } : {}
819
1600
  };
820
1601
  }
1602
+ async function recordOutcome(input) {
1603
+ if (!activeConfig) {
1604
+ return { ok: false, reason: "brain_not_configured" };
1605
+ }
1606
+ const config = activeConfig;
1607
+ const fetchFn = config.fetchImpl ?? fetch;
1608
+ const payload = {
1609
+ outcome_id: input.outcomeId,
1610
+ outcome: input.outcome,
1611
+ rating: input.rating ?? null,
1612
+ reason: input.reason ?? null,
1613
+ observed_confidence: input.observedConfidence ?? null
1614
+ };
1615
+ const send = async () => {
1616
+ try {
1617
+ const res = await fetchFn(`${config.endpoint}/compile_outcome_quality`, {
1618
+ method: "POST",
1619
+ headers: {
1620
+ "Content-Type": "application/json",
1621
+ ...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
1622
+ },
1623
+ body: JSON.stringify(payload)
1624
+ });
1625
+ if (!res.ok) {
1626
+ const text = await res.text().catch(() => "<no body>");
1627
+ const err = new Error(`brain ${res.status}: ${text}`);
1628
+ (config.onError ?? defaultOnError)(err);
1629
+ return { ok: false, reason: "persistence_failed" };
1630
+ }
1631
+ return { ok: true };
1632
+ } catch (err) {
1633
+ (config.onError ?? defaultOnError)(err);
1634
+ return { ok: false, reason: "persistence_failed" };
1635
+ }
1636
+ };
1637
+ if (config.sync) {
1638
+ return send();
1639
+ }
1640
+ void send();
1641
+ return { ok: true };
1642
+ }
821
1643
 
822
1644
  // src/ir.ts
823
1645
  var CallError = class extends Error {
@@ -855,7 +1677,7 @@ async function execute(request, opts = {}) {
855
1677
  }
856
1678
  }
857
1679
  async function executeAnthropic(request, opts) {
858
- const apiKey = opts.apiKeys?.anthropic ?? process.env.ANTHROPIC_API_KEY;
1680
+ const apiKey = resolveProviderKey("anthropic", { apiKeys: opts.apiKeys });
859
1681
  if (!apiKey) {
860
1682
  return terminalError(401, "auth", "ANTHROPIC_API_KEY missing");
861
1683
  }
@@ -894,7 +1716,7 @@ function normalizeAnthropic(raw) {
894
1716
  return { text, structuredOutput: null, toolCalls, tokens, finishReason: r.stop_reason, raw };
895
1717
  }
896
1718
  async function executeGoogle(request, opts) {
897
- const apiKey = opts.apiKeys?.google ?? process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY;
1719
+ const apiKey = resolveProviderKey("google", { apiKeys: opts.apiKeys });
898
1720
  if (!apiKey) {
899
1721
  return terminalError(401, "auth", "GOOGLE_API_KEY/GEMINI_API_KEY missing");
900
1722
  }
@@ -936,7 +1758,7 @@ function normalizeGoogle(raw) {
936
1758
  return { text, structuredOutput: null, toolCalls, tokens, finishReason: candidate?.finishReason, raw };
937
1759
  }
938
1760
  async function executeOpenAI(request, opts) {
939
- const apiKey = opts.apiKeys?.openai ?? process.env.OPENAI_API_KEY;
1761
+ const apiKey = resolveProviderKey("openai", { apiKeys: opts.apiKeys });
940
1762
  if (!apiKey) {
941
1763
  return terminalError(401, "auth", "OPENAI_API_KEY missing");
942
1764
  }
@@ -958,7 +1780,7 @@ async function executeOpenAI(request, opts) {
958
1780
  return { ok: true, status: res.status, response: normalizeOpenAILike(json) };
959
1781
  }
960
1782
  async function executeDeepSeek(request, opts) {
961
- const apiKey = opts.apiKeys?.deepseek ?? process.env.DEEPSEEK_API_KEY;
1783
+ const apiKey = resolveProviderKey("deepseek", { apiKeys: opts.apiKeys });
962
1784
  if (!apiKey) {
963
1785
  return terminalError(401, "auth", "DEEPSEEK_API_KEY missing");
964
1786
  }
@@ -1053,15 +1875,142 @@ function tryParseJson(s) {
1053
1875
 
1054
1876
  // src/call.ts
1055
1877
  async function call(ir, opts = {}) {
1878
+ const traceId = generateTraceId();
1879
+ safeEmit(
1880
+ () => emitCompileStart(traceId, ir.appId, {
1881
+ appId: ir.appId,
1882
+ archetype: ir.intent.archetype,
1883
+ models: ir.models
1884
+ })
1885
+ );
1056
1886
  const initial = compileAndRegister(ir, opts);
1887
+ safeEmit(
1888
+ () => emitCompileDone(traceId, ir.appId, {
1889
+ target: initial.target,
1890
+ provider: initial.provider,
1891
+ fallbackChain: initial.fallbackChain,
1892
+ tokensIn: initial.tokensIn,
1893
+ estimatedCostUsd: initial.estimatedCostUsd,
1894
+ mutationsApplied: initial.mutationsApplied,
1895
+ advisories: initial.advisories
1896
+ })
1897
+ );
1898
+ for (const adv of initial.advisories) {
1899
+ safeEmit(
1900
+ () => emitAdvisoryFired(traceId, ir.appId, { code: adv.code, message: adv.message })
1901
+ );
1902
+ }
1057
1903
  const start = Date.now();
1058
1904
  const attempts = [];
1059
- const targetsToTry = [initial.target, ...initial.fallbackChain];
1905
+ const rawTargets = [initial.target, ...initial.fallbackChain];
1906
+ let unreachableFiltered;
1907
+ let targetsToTry;
1908
+ if (opts.noAutoFilter) {
1909
+ targetsToTry = rawTargets;
1910
+ } else {
1911
+ const dropped = [];
1912
+ targetsToTry = [];
1913
+ for (const t of rawTargets) {
1914
+ if (isModelReachable(t, { apiKeys: opts.apiKeys })) {
1915
+ targetsToTry.push(t);
1916
+ } else {
1917
+ dropped.push(t);
1918
+ }
1919
+ }
1920
+ unreachableFiltered = dropped;
1921
+ if (targetsToTry.length === 0) {
1922
+ const latencyMs2 = Date.now() - start;
1923
+ await record({
1924
+ handle: initial.handle,
1925
+ tokensIn: 0,
1926
+ tokensOut: 0,
1927
+ latencyMs: latencyMs2,
1928
+ success: false,
1929
+ errorType: "no_reachable_models",
1930
+ promptPreview: extractPromptPreview(ir)
1931
+ });
1932
+ const noReachableAttempts = dropped.map((m) => ({
1933
+ model: m,
1934
+ status: "terminal",
1935
+ errorCode: "unreachable_provider",
1936
+ message: `No API key for ${m}'s provider \u2014 set one of PROVIDER_ENV_KEYS or pass apiKeys`
1937
+ }));
1938
+ throw new CallError(
1939
+ `call(): no reachable models in chain. Filtered: [${dropped.join(", ")}]. Add a key for one provider, or pass apiKeys.`,
1940
+ noReachableAttempts,
1941
+ void 0,
1942
+ "no_reachable_models"
1943
+ );
1944
+ }
1945
+ const archetypeName = ir.intent?.archetype;
1946
+ if (archetypeName) {
1947
+ const ensured = ensureCrossProviderTail({
1948
+ chain: targetsToTry,
1949
+ archetype: archetypeName,
1950
+ apiKeys: opts.apiKeys
1951
+ });
1952
+ if (ensured.appended) {
1953
+ targetsToTry = ensured.chain;
1954
+ }
1955
+ }
1956
+ }
1957
+ let policyBlockedFiltered;
1958
+ if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
1959
+ const blocked = new Set(opts.policy.blockedModels);
1960
+ const filtered = [];
1961
+ const dropped = [];
1962
+ for (const t of targetsToTry) {
1963
+ if (blocked.has(t)) {
1964
+ dropped.push(t);
1965
+ } else {
1966
+ filtered.push(t);
1967
+ }
1968
+ }
1969
+ if (dropped.length > 0) {
1970
+ policyBlockedFiltered = dropped;
1971
+ targetsToTry = filtered;
1972
+ }
1973
+ if (targetsToTry.length === 0) {
1974
+ const latencyMs2 = Date.now() - start;
1975
+ await record({
1976
+ handle: initial.handle,
1977
+ tokensIn: 0,
1978
+ tokensOut: 0,
1979
+ latencyMs: latencyMs2,
1980
+ success: false,
1981
+ errorType: "all_blocked_by_policy",
1982
+ promptPreview: extractPromptPreview(ir)
1983
+ });
1984
+ const blockedAttempts = dropped.map((m) => ({
1985
+ model: m,
1986
+ status: "terminal",
1987
+ errorCode: "blocked_by_policy",
1988
+ message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
1989
+ }));
1990
+ throw new CallError(
1991
+ `call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
1992
+ blockedAttempts,
1993
+ void 0,
1994
+ "all_blocked_by_policy"
1995
+ );
1996
+ }
1997
+ }
1060
1998
  let activeCompile = initial;
1061
1999
  let lastErr;
2000
+ const failedProviders = /* @__PURE__ */ new Set();
1062
2001
  for (let i = 0; i < targetsToTry.length; i++) {
1063
2002
  const targetModel = targetsToTry[i];
1064
- if (i > 0) {
2003
+ const targetProfile = tryGetProfile(targetModel);
2004
+ if (targetProfile && failedProviders.has(targetProfile.provider) && !opts.noFallback) {
2005
+ attempts.push({
2006
+ model: targetModel,
2007
+ status: "terminal",
2008
+ errorCode: "auth_inferred",
2009
+ message: `Skipped \u2014 provider ${targetProfile.provider} returned 401/403 earlier in this call; same key inferred to fail`
2010
+ });
2011
+ continue;
2012
+ }
2013
+ if (targetModel !== initial.target) {
1065
2014
  try {
1066
2015
  activeCompile = compileAndRegister(
1067
2016
  {
@@ -1081,59 +2030,112 @@ async function call(ir, opts = {}) {
1081
2030
  continue;
1082
2031
  }
1083
2032
  }
2033
+ safeEmit(
2034
+ () => emitExecuteAttempt(traceId, ir.appId, { model: targetModel, attemptIndex: i })
2035
+ );
1084
2036
  const exec = await execute(activeCompile.request, {
1085
2037
  apiKeys: opts.apiKeys,
1086
2038
  fetchImpl: opts.fetchImpl,
1087
2039
  providerOverrides: opts.providerOverrides
1088
2040
  });
1089
- if (exec.ok) {
2041
+ const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
2042
+ if (validated.ok) {
1090
2043
  attempts.push({ model: targetModel, status: "success" });
1091
2044
  const latencyMs2 = Date.now() - start;
1092
- const responseWithStructured = withStructuredOutput(exec.response, ir);
1093
- void record({
2045
+ safeEmit(
2046
+ () => emitExecuteSuccess(traceId, ir.appId, {
2047
+ model: targetModel,
2048
+ tokensIn: validated.response.tokens.input,
2049
+ tokensOut: validated.response.tokens.output,
2050
+ latencyMs: latencyMs2
2051
+ })
2052
+ );
2053
+ const fellOver = targetModel !== initial.target;
2054
+ const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
2055
+ await record({
1094
2056
  handle: initial.handle,
1095
- tokensIn: responseWithStructured.tokens.input,
1096
- tokensOut: responseWithStructured.tokens.output,
2057
+ tokensIn: validated.response.tokens.input,
2058
+ tokensOut: validated.response.tokens.output,
1097
2059
  latencyMs: latencyMs2,
1098
2060
  success: true,
1099
- emptyResponse: responseWithStructured.tokens.output === 0,
1100
- toolsCalled: responseWithStructured.toolCalls.map((tc) => tc.name),
2061
+ emptyResponse: validated.response.tokens.output === 0,
2062
+ toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
1101
2063
  actualModel: targetModel !== initial.target ? targetModel : void 0,
1102
- responsePreview: responseWithStructured.text.slice(0, 200)
2064
+ mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
2065
+ promptPreview: extractPromptPreview(ir),
2066
+ responsePreview: validated.response.text.slice(0, 200),
2067
+ cacheReadInputTokens: validated.response.tokens.cached,
2068
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated,
2069
+ // alpha.28 — Glass-Box renderer substrate (migration 018). call()
2070
+ // owns the lifecycle so it has direct visibility into finishReason
2071
+ // (from the normalized provider response), totalMs (mirrors latencyMs
2072
+ // for non-streaming; future streaming variant may diverge), and the
2073
+ // fell-over-from / fallback-reason pair (already computed above for
2074
+ // the CallResult return shape).
2075
+ finishReason: validated.response.finishReason,
2076
+ totalMs: latencyMs2,
2077
+ fellOverFrom: fellOver ? initial.target : void 0,
2078
+ fallbackReason
1103
2079
  });
2080
+ if (fellOver) {
2081
+ const firstFailed = attempts.find((a) => a.status !== "success");
2082
+ if (firstFailed) {
2083
+ safeEmit(
2084
+ () => emitFallbackWalked(traceId, ir.appId, {
2085
+ from: initial.target,
2086
+ to: targetModel,
2087
+ reason: fallbackReason ?? "unknown",
2088
+ attempt: firstFailed
2089
+ })
2090
+ );
2091
+ }
2092
+ }
1104
2093
  return {
1105
2094
  handle: initial.handle,
1106
2095
  actualModel: targetModel,
1107
2096
  requestedModel: initial.target,
1108
2097
  provider: activeCompile.provider,
1109
- response: responseWithStructured,
2098
+ response: validated.response,
1110
2099
  latencyMs: latencyMs2,
1111
2100
  mutationsApplied: activeCompile.mutationsApplied,
1112
- attempts
2101
+ attempts,
2102
+ servedBy: targetModel,
2103
+ fellOverFrom: fellOver ? initial.target : void 0,
2104
+ fallbackReason,
2105
+ unreachableFiltered,
2106
+ policyBlockedFiltered,
2107
+ traceId
1113
2108
  };
1114
2109
  }
1115
2110
  attempts.push({
1116
2111
  model: targetModel,
1117
- status: exec.errorType,
1118
- errorCode: exec.errorCode,
1119
- message: exec.message
2112
+ status: validated.errorType,
2113
+ errorCode: validated.errorCode,
2114
+ message: validated.message
1120
2115
  });
1121
- lastErr = exec;
1122
- if (exec.errorType === "terminal" || opts.noFallback) {
2116
+ lastErr = validated;
2117
+ if (validated.errorType === "terminal" || opts.noFallback) {
2118
+ if (validated.errorCode === "auth" && !opts.noFallback && activeCompile.provider) {
2119
+ failedProviders.add(activeCompile.provider);
2120
+ continue;
2121
+ }
1123
2122
  break;
1124
2123
  }
1125
2124
  }
1126
2125
  const latencyMs = Date.now() - start;
1127
- void record({
2126
+ await record({
1128
2127
  handle: initial.handle,
1129
2128
  tokensIn: 0,
1130
2129
  tokensOut: 0,
1131
2130
  latencyMs,
1132
2131
  success: false,
1133
- errorType: lastErr?.errorCode
2132
+ errorType: lastErr?.errorCode,
2133
+ promptPreview: extractPromptPreview(ir)
1134
2134
  });
2135
+ const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
2136
+ const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
1135
2137
  throw new CallError(
1136
- `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
2138
+ `call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
1137
2139
  attempts,
1138
2140
  lastErr?.status,
1139
2141
  lastErr?.errorCode
@@ -1148,20 +2150,74 @@ function compileAndRegister(ir, opts) {
1148
2150
  registerCompile(ir.appId, ir.intent.archetype, ir, result);
1149
2151
  return result;
1150
2152
  }
1151
- function withStructuredOutput(response, ir) {
1152
- if (!ir.constraints?.structuredOutput) return response;
1153
- if (!response.text) return response;
2153
+ function extractPromptPreview(ir) {
2154
+ const turn = ir.currentTurn?.content;
2155
+ if (turn) return turn.slice(0, 200);
2156
+ const lastHist = ir.history?.[ir.history.length - 1]?.content;
2157
+ if (lastHist) return lastHist.slice(0, 200);
2158
+ return void 0;
2159
+ }
2160
+ function validateStructuredContract(exec, ir) {
2161
+ if (!ir.constraints?.structuredOutput) {
2162
+ return { ok: true, response: exec.response };
2163
+ }
2164
+ const finish = (exec.response.finishReason ?? "").toLowerCase();
2165
+ if (finish === "max_tokens" || finish === "length") {
2166
+ return {
2167
+ ok: false,
2168
+ status: exec.status,
2169
+ errorType: "retryable",
2170
+ errorCode: "max_tokens_on_structured_output",
2171
+ message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
2172
+ raw: exec.response.raw
2173
+ };
2174
+ }
2175
+ if (!exec.response.text) {
2176
+ return { ok: true, response: exec.response };
2177
+ }
1154
2178
  try {
1155
- const parsed = JSON.parse(response.text);
1156
- return { ...response, structuredOutput: parsed };
2179
+ const parsed = JSON.parse(exec.response.text);
2180
+ return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
1157
2181
  } catch (err) {
1158
2182
  return {
1159
- ...response,
1160
- structuredOutput: null,
1161
- parseError: err instanceof Error ? err.message : String(err)
2183
+ ok: false,
2184
+ status: exec.status,
2185
+ errorType: "retryable",
2186
+ errorCode: "structured_output_parse_failed",
2187
+ message: err instanceof Error ? err.message : String(err),
2188
+ raw: exec.response.raw
1162
2189
  };
1163
2190
  }
1164
2191
  }
2192
+ function normalizeFallbackReason(attempts) {
2193
+ const first = attempts.find((a) => a.status !== "success");
2194
+ if (!first) return void 0;
2195
+ const code = first.errorCode ?? "";
2196
+ if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
2197
+ if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
2198
+ return "cliff";
2199
+ }
2200
+ if (code === "cost_cap_exceeded") return "cost_cap";
2201
+ if (code === "auth" || code === "auth_inferred") return "provider_auth_failed";
2202
+ return "provider_error";
2203
+ }
2204
+ function generateTraceId() {
2205
+ try {
2206
+ const g = globalThis;
2207
+ if (g.crypto && typeof g.crypto.randomUUID === "function") {
2208
+ return g.crypto.randomUUID();
2209
+ }
2210
+ } catch {
2211
+ }
2212
+ const hex = (n) => Math.floor(Math.random() * Math.pow(16, n)).toString(16).padStart(n, "0");
2213
+ return `${hex(8)}-${hex(4)}-${hex(4)}-${hex(4)}-${hex(12)}`;
2214
+ }
2215
+ function safeEmit(fn) {
2216
+ try {
2217
+ fn();
2218
+ } catch {
2219
+ }
2220
+ }
1165
2221
 
1166
2222
  // src/oracle.ts
1167
2223
  var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
@@ -1251,6 +2307,397 @@ function clamp(n) {
1251
2307
  return Math.max(0, Math.min(1, n));
1252
2308
  }
1253
2309
 
2310
+ // src/advisories-api.ts
2311
+ var SEVERITY_SET = /* @__PURE__ */ new Set(["info", "warn", "critical"]);
2312
+ var STATUS_SET = /* @__PURE__ */ new Set(["open", "snoozed", "resolved"]);
2313
+ var RESOLUTION_SOURCE_SET = /* @__PURE__ */ new Set([
2314
+ "auto",
2315
+ "consumer-marked",
2316
+ "declined"
2317
+ ]);
2318
+ function asString(v) {
2319
+ return typeof v === "string" && v.length > 0 ? v : void 0;
2320
+ }
2321
+ function asSeverity(v) {
2322
+ if (typeof v === "string" && SEVERITY_SET.has(v)) {
2323
+ return v;
2324
+ }
2325
+ return "info";
2326
+ }
2327
+ function asStatus(v) {
2328
+ if (typeof v === "string" && STATUS_SET.has(v)) {
2329
+ return v;
2330
+ }
2331
+ return "open";
2332
+ }
2333
+ function asResolutionSource(v) {
2334
+ if (typeof v === "string" && RESOLUTION_SOURCE_SET.has(v)) {
2335
+ return v;
2336
+ }
2337
+ return void 0;
2338
+ }
2339
+ function rowToAdvisory(row) {
2340
+ const archetype = asString(row.applies_to_archetype);
2341
+ const model = asString(row.applies_to_model);
2342
+ const docsLink = asString(row.docs_url);
2343
+ const suggestion = asString(row.suggestion);
2344
+ let suggestedFix = null;
2345
+ if (docsLink || suggestion) {
2346
+ suggestedFix = { type: "manual" };
2347
+ if (docsLink) suggestedFix.docsLink = docsLink;
2348
+ if (suggestion) suggestedFix.before = suggestion;
2349
+ }
2350
+ const out = {
2351
+ id: typeof row.id === "string" ? row.id : "",
2352
+ rule: typeof row.rule === "string" ? row.rule : "",
2353
+ severity: asSeverity(row.severity),
2354
+ openedAt: typeof row.opened_at === "string" ? row.opened_at : "",
2355
+ lastObservedAt: typeof row.last_observed_at === "string" ? row.last_observed_at : "",
2356
+ observationCount: typeof row.observation_count === "number" ? row.observation_count : 0,
2357
+ appliesTo: {
2358
+ ...archetype ? { archetype } : {},
2359
+ ...model ? { model } : {}
2360
+ },
2361
+ message: typeof row.message === "string" ? row.message : "",
2362
+ suggestedFix,
2363
+ autoApplicable: false,
2364
+ // reserved — alpha.30+
2365
+ status: asStatus(row.status)
2366
+ };
2367
+ const resolvedAt = asString(row.resolved_at);
2368
+ if (resolvedAt) out.resolvedAt = resolvedAt;
2369
+ const resolutionSource = asResolutionSource(row.resolution_source);
2370
+ if (resolutionSource) out.resolutionSource = resolutionSource;
2371
+ const resolutionNote = asString(row.resolution_note);
2372
+ if (resolutionNote) out.resolutionNote = resolutionNote;
2373
+ return out;
2374
+ }
2375
+ function resolveFetch(injected) {
2376
+ return injected ?? ((...args) => globalThis.fetch(...args));
2377
+ }
2378
+ function normalizeEndpoint(endpoint) {
2379
+ return endpoint.replace(/\/+$/, "");
2380
+ }
2381
+ async function getActionableAdvisories(opts) {
2382
+ const {
2383
+ appId,
2384
+ severity,
2385
+ status,
2386
+ brainEndpoint,
2387
+ brainJwt,
2388
+ brainAnonKey,
2389
+ fetch: injectedFetch
2390
+ } = opts;
2391
+ if (!appId) {
2392
+ throw new Error("getActionableAdvisories: appId is required");
2393
+ }
2394
+ const doFetch = resolveFetch(injectedFetch);
2395
+ const base = normalizeEndpoint(brainEndpoint);
2396
+ const qs = new URLSearchParams();
2397
+ qs.set("app_id", `eq.${appId}`);
2398
+ if (severity) qs.set("severity", `eq.${severity}`);
2399
+ const effectiveStatus = status ?? "open";
2400
+ if (effectiveStatus !== "all") {
2401
+ qs.set("status", `eq.${effectiveStatus}`);
2402
+ }
2403
+ qs.set("order", "last_observed_at.desc");
2404
+ const url = `${base}/rest/v1/actionable_advisories_v?${qs.toString()}`;
2405
+ let res;
2406
+ try {
2407
+ res = await doFetch(url, {
2408
+ method: "GET",
2409
+ headers: {
2410
+ Authorization: `Bearer ${brainJwt}`,
2411
+ apikey: brainAnonKey,
2412
+ Accept: "application/json"
2413
+ }
2414
+ });
2415
+ } catch (err) {
2416
+ const msg = err instanceof Error ? err.message : String(err);
2417
+ throw new Error(`getActionableAdvisories: network error: ${msg}`);
2418
+ }
2419
+ if (res.status === 401 || res.status === 403) {
2420
+ throw new Error("getActionableAdvisories: brain auth misconfig");
2421
+ }
2422
+ if (res.status >= 500) {
2423
+ throw new Error(`getActionableAdvisories: brain unavailable (${res.status})`);
2424
+ }
2425
+ if (!res.ok) {
2426
+ throw new Error(`getActionableAdvisories: bad request (${res.status})`);
2427
+ }
2428
+ let rows;
2429
+ try {
2430
+ rows = await res.json();
2431
+ } catch {
2432
+ throw new Error("getActionableAdvisories: malformed brain response");
2433
+ }
2434
+ if (!Array.isArray(rows)) {
2435
+ throw new Error("getActionableAdvisories: expected array from brain");
2436
+ }
2437
+ const out = [];
2438
+ for (const raw of rows) {
2439
+ if (raw && typeof raw === "object") {
2440
+ out.push(rowToAdvisory(raw));
2441
+ }
2442
+ }
2443
+ return out;
2444
+ }
2445
+ async function markAdvisoryResolved(opts) {
2446
+ const {
2447
+ id,
2448
+ resolutionNote,
2449
+ brainEndpoint,
2450
+ brainJwt,
2451
+ brainAnonKey,
2452
+ fetch: injectedFetch
2453
+ } = opts;
2454
+ if (!id) {
2455
+ return { ok: false, reason: "id_required" };
2456
+ }
2457
+ const doFetch = resolveFetch(injectedFetch);
2458
+ const base = normalizeEndpoint(brainEndpoint);
2459
+ const lookupUrl = `${base}/rest/v1/actionable_advisories_v?id=eq.${encodeURIComponent(id)}&select=app_id,rule`;
2460
+ let lookupRes;
2461
+ try {
2462
+ lookupRes = await doFetch(lookupUrl, {
2463
+ method: "GET",
2464
+ headers: {
2465
+ Authorization: `Bearer ${brainJwt}`,
2466
+ apikey: brainAnonKey,
2467
+ Accept: "application/json"
2468
+ }
2469
+ });
2470
+ } catch (err) {
2471
+ const msg = err instanceof Error ? err.message : String(err);
2472
+ return { ok: false, reason: `network_error:${msg}` };
2473
+ }
2474
+ if (lookupRes.status === 401 || lookupRes.status === 403) {
2475
+ return { ok: false, reason: "brain_auth_misconfig" };
2476
+ }
2477
+ if (lookupRes.status >= 500) {
2478
+ return { ok: false, reason: "brain_unavailable" };
2479
+ }
2480
+ if (!lookupRes.ok) {
2481
+ return { ok: false, reason: `brain_lookup_failed:${lookupRes.status}` };
2482
+ }
2483
+ let lookupRows;
2484
+ try {
2485
+ lookupRows = await lookupRes.json();
2486
+ } catch {
2487
+ return { ok: false, reason: "brain_lookup_malformed" };
2488
+ }
2489
+ if (!Array.isArray(lookupRows) || lookupRows.length === 0) {
2490
+ return { ok: false, reason: "advisory_not_found" };
2491
+ }
2492
+ const tuple = lookupRows[0];
2493
+ const appId = typeof tuple.app_id === "string" ? tuple.app_id : "";
2494
+ const code = typeof tuple.rule === "string" ? tuple.rule : "";
2495
+ if (!appId || !code) {
2496
+ return { ok: false, reason: "advisory_tuple_invalid" };
2497
+ }
2498
+ const outcomesUrl = `${base}/rest/v1/compile_outcomes?app_id=eq.${encodeURIComponent(appId)}&select=id`;
2499
+ let outcomesRes;
2500
+ try {
2501
+ outcomesRes = await doFetch(outcomesUrl, {
2502
+ method: "GET",
2503
+ headers: {
2504
+ Authorization: `Bearer ${brainJwt}`,
2505
+ apikey: brainAnonKey,
2506
+ Accept: "application/json"
2507
+ }
2508
+ });
2509
+ } catch (err) {
2510
+ const msg = err instanceof Error ? err.message : String(err);
2511
+ return { ok: false, reason: `network_error:${msg}` };
2512
+ }
2513
+ if (outcomesRes.status === 401 || outcomesRes.status === 403) {
2514
+ return { ok: false, reason: "brain_auth_misconfig" };
2515
+ }
2516
+ if (outcomesRes.status >= 500) {
2517
+ return { ok: false, reason: "brain_unavailable" };
2518
+ }
2519
+ if (!outcomesRes.ok) {
2520
+ return { ok: false, reason: `brain_lookup_failed:${outcomesRes.status}` };
2521
+ }
2522
+ let outcomeRows;
2523
+ try {
2524
+ outcomeRows = await outcomesRes.json();
2525
+ } catch {
2526
+ return { ok: false, reason: "brain_lookup_malformed" };
2527
+ }
2528
+ if (!Array.isArray(outcomeRows)) {
2529
+ return { ok: false, reason: "brain_lookup_malformed" };
2530
+ }
2531
+ const outcomeIds = [];
2532
+ for (const row of outcomeRows) {
2533
+ if (row && typeof row === "object") {
2534
+ const idVal = row.id;
2535
+ if (typeof idVal === "number" && Number.isFinite(idVal)) {
2536
+ outcomeIds.push(idVal);
2537
+ }
2538
+ }
2539
+ }
2540
+ if (outcomeIds.length === 0) {
2541
+ return { ok: true };
2542
+ }
2543
+ const inList = outcomeIds.join(",");
2544
+ const patchUrl = `${base}/rest/v1/compile_outcome_advisories?outcome_id=in.(${inList})&code=eq.${encodeURIComponent(code)}&resolved_at=is.null`;
2545
+ const patchBody = {
2546
+ resolved_at: (/* @__PURE__ */ new Date()).toISOString(),
2547
+ resolution_source: "consumer-marked"
2548
+ };
2549
+ if (resolutionNote !== void 0) {
2550
+ patchBody.resolution_note = resolutionNote;
2551
+ }
2552
+ let patchRes;
2553
+ try {
2554
+ patchRes = await doFetch(patchUrl, {
2555
+ method: "PATCH",
2556
+ headers: {
2557
+ Authorization: `Bearer ${brainJwt}`,
2558
+ apikey: brainAnonKey,
2559
+ "Content-Type": "application/json",
2560
+ Accept: "application/json",
2561
+ // PostgREST default is no return; we don't need the row back.
2562
+ Prefer: "return=minimal"
2563
+ },
2564
+ body: JSON.stringify(patchBody)
2565
+ });
2566
+ } catch (err) {
2567
+ const msg = err instanceof Error ? err.message : String(err);
2568
+ return { ok: false, reason: `network_error:${msg}` };
2569
+ }
2570
+ if (patchRes.status === 401 || patchRes.status === 403) {
2571
+ return { ok: false, reason: "brain_auth_misconfig" };
2572
+ }
2573
+ if (patchRes.status >= 500) {
2574
+ return { ok: false, reason: "brain_unavailable" };
2575
+ }
2576
+ if (!patchRes.ok) {
2577
+ return { ok: false, reason: `patch_failed:${patchRes.status}` };
2578
+ }
2579
+ return { ok: true };
2580
+ }
2581
+
2582
+ // src/models-brain.ts
2583
+ function isModelRow(x) {
2584
+ if (!x || typeof x !== "object") return false;
2585
+ const r = x;
2586
+ return typeof r.model_id === "string" && typeof r.provider === "string";
2587
+ }
2588
+ function isAliasRow(x) {
2589
+ if (!x || typeof x !== "object") return false;
2590
+ const r = x;
2591
+ return typeof r.alias_id === "string" && typeof r.canonical_id === "string";
2592
+ }
2593
+ function rowToProfile(row) {
2594
+ try {
2595
+ if (row.cliffs !== void 0 && row.cliffs !== null && !Array.isArray(row.cliffs)) {
2596
+ return null;
2597
+ }
2598
+ if (row.recovery !== void 0 && row.recovery !== null && !Array.isArray(row.recovery)) {
2599
+ return null;
2600
+ }
2601
+ if (row.lowering !== void 0 && row.lowering !== null && (typeof row.lowering !== "object" || Array.isArray(row.lowering))) {
2602
+ return null;
2603
+ }
2604
+ return {
2605
+ id: row.model_id,
2606
+ provider: row.provider,
2607
+ status: row.status ?? "current",
2608
+ maxContextTokens: row.max_context_tokens ?? 0,
2609
+ maxOutputTokens: row.max_output_tokens ?? 0,
2610
+ maxTools: row.max_tools ?? 0,
2611
+ parallelToolCalls: row.parallel_tool_calls ?? false,
2612
+ structuredOutput: row.structured_output ?? "none",
2613
+ systemPromptMode: row.system_prompt_mode ?? "inline",
2614
+ streaming: row.streaming ?? true,
2615
+ cliffs: row.cliffs ?? [],
2616
+ costInputPer1m: row.cost_input_per_1m ?? 0,
2617
+ costOutputPer1m: row.cost_output_per_1m ?? 0,
2618
+ lowering: row.lowering ?? { system: { mode: "inline" }, cache: { strategy: "unsupported" } },
2619
+ recovery: row.recovery ?? [],
2620
+ strengths: row.strengths ?? [],
2621
+ weaknesses: row.weaknesses ?? [],
2622
+ notes: row.notes ?? void 0,
2623
+ verifiedAgainstDocs: row.verified_against_docs ?? void 0,
2624
+ archetypePerf: row.archetype_perf ?? void 0
2625
+ };
2626
+ } catch {
2627
+ return null;
2628
+ }
2629
+ }
2630
+ function profileToRow(profile, opts = {}) {
2631
+ const row = {
2632
+ model_id: profile.id,
2633
+ provider: profile.provider,
2634
+ status: profile.status,
2635
+ max_context_tokens: profile.maxContextTokens,
2636
+ max_output_tokens: profile.maxOutputTokens,
2637
+ max_tools: profile.maxTools,
2638
+ parallel_tool_calls: profile.parallelToolCalls,
2639
+ structured_output: profile.structuredOutput,
2640
+ system_prompt_mode: profile.systemPromptMode,
2641
+ streaming: profile.streaming,
2642
+ cliffs: profile.cliffs,
2643
+ cost_input_per_1m: profile.costInputPer1m,
2644
+ cost_output_per_1m: profile.costOutputPer1m,
2645
+ lowering: profile.lowering,
2646
+ recovery: profile.recovery,
2647
+ strengths: profile.strengths,
2648
+ weaknesses: profile.weaknesses,
2649
+ notes: profile.notes ?? null,
2650
+ archetype_perf: profile.archetypePerf ?? null,
2651
+ active: opts.active ?? true
2652
+ };
2653
+ if (opts.verifiedAgainstDocs !== void 0) {
2654
+ row.verified_against_docs = opts.verifiedAgainstDocs;
2655
+ } else if (profile.verifiedAgainstDocs !== void 0) {
2656
+ const v = profile.verifiedAgainstDocs;
2657
+ row.verified_against_docs = /^\d{4}-\d{2}-\d{2}/.test(v) ? v : null;
2658
+ }
2659
+ if (opts.versionAdded !== void 0) row.version_added = opts.versionAdded;
2660
+ if (opts.versionRemoved !== void 0) row.version_removed = opts.versionRemoved;
2661
+ return row;
2662
+ }
2663
+ function mapRowsToModels(rows) {
2664
+ const out = /* @__PURE__ */ new Map();
2665
+ for (const row of rows) {
2666
+ if (!isModelRow(row)) continue;
2667
+ const profile = rowToProfile(row);
2668
+ if (profile) out.set(profile.id, profile);
2669
+ }
2670
+ return out;
2671
+ }
2672
+ function mapRowsToAliases(rows) {
2673
+ const out = {};
2674
+ for (const row of rows) {
2675
+ if (!isAliasRow(row)) continue;
2676
+ out[row.alias_id] = row.canonical_id;
2677
+ }
2678
+ return out;
2679
+ }
2680
+ function bundledModels() {
2681
+ return new Map(allProfilesRaw().map((p) => [p.id, p]));
2682
+ }
2683
+ function bundledAliases() {
2684
+ return { ...ALIASES };
2685
+ }
2686
+ var loadModelsFromBrain = createBrainQueryCache({
2687
+ table: "kgauto_models",
2688
+ mapRows: mapRowsToModels,
2689
+ bundledFallback: bundledModels
2690
+ });
2691
+ var loadAliasesFromBrain = createBrainQueryCache({
2692
+ table: "kgauto_aliases",
2693
+ mapRows: mapRowsToAliases,
2694
+ bundledFallback: bundledAliases
2695
+ });
2696
+ _setProfileBrainHook({
2697
+ getProfile: (canonical) => loadModelsFromBrain().get(canonical),
2698
+ resolveAlias: (id) => loadAliasesFromBrain()[id]
2699
+ });
2700
+
1254
2701
  // src/index.ts
1255
2702
  function compile2(ir, opts) {
1256
2703
  const result = compile(ir, opts);
@@ -1258,12 +2705,19 @@ function compile2(ir, opts) {
1258
2705
  return result;
1259
2706
  }
1260
2707
  export {
2708
+ ABSOLUTE_FLOOR,
1261
2709
  ALIASES,
1262
2710
  ALL_ARCHETYPES,
2711
+ ARCHETYPE_FLOOR_DEFAULT,
1263
2712
  CallError,
1264
2713
  DIALECT_VERSION,
1265
2714
  INTENT_ARCHETYPES,
2715
+ MEASURED_GROUNDING_MIN_N,
2716
+ PROVIDER_ENV_KEYS,
2717
+ RULE_SEQUENTIAL_TOOL_CLIFF,
2718
+ TRANSLATOR_FLOOR,
1266
2719
  allProfiles,
2720
+ applySectionRewrites,
1267
2721
  bucketContext,
1268
2722
  bucketHistory,
1269
2723
  bucketToolCount,
@@ -1274,13 +2728,41 @@ export {
1274
2728
  configureBrain,
1275
2729
  countTokens,
1276
2730
  execute,
2731
+ getActionableAdvisories,
2732
+ getAllStarterChains,
2733
+ getAllStarterChainsWithGrounding,
2734
+ getArchetypePerfScore,
2735
+ getDefaultFallbackChain,
2736
+ getDefaultFallbackChainWithGrounding,
2737
+ getModelCompatibility,
2738
+ getPerAxisMetrics,
1277
2739
  getProfile,
2740
+ getReachabilityDiagnostic,
2741
+ getSequentialStarterChain,
2742
+ getSequentialStarterChainWithGrounding,
2743
+ getStarterChain,
2744
+ getStarterChainWithGrounding,
1278
2745
  hashShape,
1279
2746
  isArchetype,
2747
+ isBrainQueryActiveFor,
2748
+ isModelReachable,
2749
+ isProviderReachable,
1280
2750
  learningKey,
2751
+ loadAliasesFromBrain,
2752
+ loadArchetypePerfFromBrain,
2753
+ loadArchetypePerfNFromBrain,
2754
+ loadChainsFromBrain,
2755
+ loadModelsFromBrain,
2756
+ loadPricingFromBrain,
2757
+ markAdvisoryResolved,
2758
+ profileToRow,
1281
2759
  profilesByProvider,
1282
2760
  record,
2761
+ recordOutcome,
1283
2762
  resetTokenizer,
2763
+ resolvePricingAt,
2764
+ resolveProviderKey,
2765
+ runAdvisor,
1284
2766
  setTokenizer,
1285
2767
  tryGetProfile
1286
2768
  };