@warmdrift/kgauto-compiler 2.0.0-alpha.26 → 2.0.0-alpha.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,8 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ ABSOLUTE_FLOOR: () => ABSOLUTE_FLOOR,
23
24
  ALIASES: () => ALIASES,
24
25
  ALL_ARCHETYPES: () => ALL_ARCHETYPES,
26
+ ARCHETYPE_FLOOR_DEFAULT: () => ARCHETYPE_FLOOR_DEFAULT,
25
27
  CallError: () => CallError,
26
28
  DIALECT_VERSION: () => DIALECT_VERSION,
27
29
  INTENT_ARCHETYPES: () => INTENT_ARCHETYPES,
@@ -43,6 +45,7 @@ __export(index_exports, {
43
45
  getArchetypePerfScore: () => getArchetypePerfScore,
44
46
  getDefaultFallbackChain: () => getDefaultFallbackChain,
45
47
  getDefaultFallbackChainWithGrounding: () => getDefaultFallbackChainWithGrounding,
48
+ getModelCompatibility: () => getModelCompatibility,
46
49
  getPerAxisMetrics: () => getPerAxisMetrics,
47
50
  getProfile: () => getProfile,
48
51
  getReachabilityDiagnostic: () => getReachabilityDiagnostic,
@@ -1703,12 +1706,23 @@ var PROFILES_RAW = [
1703
1706
  // Each entry's pricing/context/cliffs/lowering reflects the template, NOT
1704
1707
  // provider docs. Verify before promoting status to 'current' (L-049/L-081).
1705
1708
  {
1709
+ // s37 (2026-05-21): UNVERIFIED-AUTO-ONBOARD → verified against
1710
+ // ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview +
1711
+ // ai.google.dev/gemini-api/docs/pricing. L-081 catches:
1712
+ // maxOutputTokens 65_535 → 65_536 (off-by-one)
1713
+ // costInputPer1m 0.30 → 0.50 (template-cloned from 2.5-flash; actual is 1.67× more expensive)
1714
+ // costOutputPer1m 2.50 → 3.00 (template-cloned; actual 1.2× more expensive)
1715
+ // cache discount default 0.25 → 0.10 (10× discount, $0.05/$0.50 per docs)
1716
+ // Cliffs inherited from 2.5-flash conservatively. The 8K-context-quality
1717
+ // cliff was a 2.5-Flash observation — Google positions Gemini 3 as
1718
+ // sustained-frontier-on-long-context; brain evidence will validate/relax.
1719
+ // Kept as guard for now.
1706
1720
  id: "gemini-3-flash-preview",
1707
- verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
1721
+ verifiedAgainstDocs: "2026-05-21",
1708
1722
  provider: "google",
1709
1723
  status: "preview",
1710
1724
  maxContextTokens: 1048576,
1711
- maxOutputTokens: 65535,
1725
+ maxOutputTokens: 65536,
1712
1726
  maxTools: 128,
1713
1727
  parallelToolCalls: true,
1714
1728
  structuredOutput: "native",
@@ -1719,13 +1733,13 @@ var PROFILES_RAW = [
1719
1733
  metric: "input_tokens",
1720
1734
  threshold: 8e3,
1721
1735
  action: "downgrade_quality_warning",
1722
- reason: "Quality degrades significantly above ~8K context tokens"
1736
+ reason: "Inherited from 2.5-flash guard; brain evidence on Gemini 3 long-context quality will validate/relax"
1723
1737
  },
1724
1738
  {
1725
1739
  metric: "tool_count",
1726
1740
  threshold: 20,
1727
1741
  action: "drop_to_top_relevant",
1728
- reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
1742
+ reason: "Tool reliability drops above ~20 tools (despite 128 hard limit) \u2014 inherited from Flash family"
1729
1743
  },
1730
1744
  {
1731
1745
  metric: "thinking_with_short_output",
@@ -1734,24 +1748,22 @@ var PROFILES_RAW = [
1734
1748
  reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
1735
1749
  },
1736
1750
  {
1737
- // s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
1738
- // tt-intelligence/summarize/gemini-2.5-flash with tools offered.
1739
- // v1's disable_thinking_for_short_output already fired and didn't
1740
- // help — disabling thinking is necessary but not sufficient. Tools
1741
- // present + summarize intent confuses Flash into a no-output state
1742
- // (likely tool-decision purgatory). Strip tools entirely for this
1743
- // archetype on this model.
1751
+ // Inherited from gemini-2.5-flash s11 trust artifact. Family-likely
1752
+ // failure mode for Flash architecture. Keep preemptively until brain
1753
+ // evidence on Gemini 3 specifically.
1744
1754
  metric: "tool_count",
1745
1755
  threshold: 1,
1746
1756
  whenIntent: "summarize",
1747
1757
  action: "strip_tools",
1748
- reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
1758
+ reason: "Inherited from 2.5-flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3-flash-preview specifically."
1749
1759
  }
1750
1760
  ],
1751
- costInputPer1m: 0.3,
1752
- costOutputPer1m: 2.5,
1761
+ costInputPer1m: 0.5,
1762
+ costOutputPer1m: 3,
1753
1763
  lowering: {
1754
1764
  ...GOOGLE_LOWERING_BASE,
1765
+ // 10× cache discount per Google pricing: $0.05/M cached vs $0.50/M input.
1766
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
1755
1767
  thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1756
1768
  },
1757
1769
  recovery: [
@@ -1777,40 +1789,45 @@ var PROFILES_RAW = [
1777
1789
  ],
1778
1790
  strengths: ["speed", "volume", "classification", "1m_context", "cost"],
1779
1791
  weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
1780
- notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
1781
- // Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
1782
- // 15-75 calls/step beats Sonnet L-040), summarize, classify.
1792
+ notes: "Verified s37 (2026-05-21) against Google docs. Step-change positioning vs 2.5-flash on agentic loops per Google's release notes (Dec 2025). Pricing 1.67\xD7/1.2\xD7 higher than 2.5-flash; cache discount 10\xD7 (vs 4\xD7 for 2.5). Status=preview until brain evidence accumulates.",
1793
+ // Anchored to 2.5-flash archetypePerf as starter, with judgment adjustments
1794
+ // for Google's "step-change on agentic" positioning. Brain evidence (zero
1795
+ // rows today) will replace these starter values.
1783
1796
  archetypePerf: {
1784
1797
  hunt: 9,
1785
- // L-040: parallel tool throughput 15-75/step
1798
+ // Inherits 2.5-flash L-040 parallel-tool tier; Google positions 3 as agentic-loop upgrade
1786
1799
  classify: 7,
1787
- // brain-validated, 218 rows
1800
+ // Inherits 2.5-flash brain-validated tier (218 rows on 2.5)
1788
1801
  summarize: 7,
1789
- // brain-validated; cliff strips tools when present
1802
+ // Inherits 2.5-flash; cliff strips tools when present
1790
1803
  transform: 7,
1791
- ask: 7,
1792
- generate: 6,
1793
- plan: 5,
1804
+ ask: 8,
1805
+ // +1 vs 2.5-flash — sustained-frontier positioning
1806
+ generate: 7,
1807
+ // +1 vs 2.5-flash — agentic coding upgrade per Google
1808
+ plan: 6,
1809
+ // +1 vs 2.5-flash — complex iterations per positioning
1794
1810
  extract: 6,
1795
- // alpha.8 MAX_TOKENS history on structured output
1796
- critique: 4
1797
- // reasoning shallower than Sonnet/Opus
1811
+ critique: 5
1812
+ // +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
1798
1813
  }
1799
1814
  },
1800
1815
  {
1801
- // ── Gemini 2.5 Flash-Lite ──
1802
- // Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
1803
- // it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
1804
- // stable. Positioned BELOW Flash on the cost/perf frontier:
1805
- // input $0.10/M (Flash $0.30/M) —cheaper
1806
- // output $0.40/M (Flash $2.50/M) — 6× cheaper
1807
- // cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
1808
- // Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
1809
- // is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
1810
- // thresholds. The brain will validate/relax these as evidence accumulates
1811
- // per (archetype, model) tuple. Currently ZERO brain rows for this model.
1816
+ // ── Gemini 3.1 Flash-Lite ──
1817
+ // Onboarded 2026-05-16 by auto-onboarder; s37 (2026-05-21) verified
1818
+ // against ai.google.dev/gemini-api/docs/pricing.
1819
+ //
1820
+ // L-081 CATCHES (template clone from 2.5-flash-lite was 2.5-3.75× too cheap):
1821
+ // costInputPer1m 0.10 0.25 (template clone undervalued by 2.)
1822
+ // costOutputPer1m 0.40 1.50 (template clone undervalued by 3.75×)
1823
+ //
1824
+ // Real 3.1-flash-lite is NOT a cost-equivalent successor to 2.5-flash-lite —
1825
+ // it sits between 2.5-flash-lite ($0.10/$0.40) and 2.5-flash ($0.30/$2.50).
1826
+ // Cache discount 10× verified ($0.025/M cached vs $0.25/M input).
1827
+ //
1828
+ // Cliffs are HYPOTHESIZED from 2.5-flash family; brain evidence pending.
1812
1829
  id: "gemini-3.1-flash-lite",
1813
- verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
1830
+ verifiedAgainstDocs: "2026-05-21",
1814
1831
  provider: "google",
1815
1832
  status: "preview",
1816
1833
  maxContextTokens: 1048576,
@@ -1851,12 +1868,12 @@ var PROFILES_RAW = [
1851
1868
  reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
1852
1869
  }
1853
1870
  ],
1854
- costInputPer1m: 0.1,
1855
- costOutputPer1m: 0.4,
1871
+ costInputPer1m: 0.25,
1872
+ costOutputPer1m: 1.5,
1856
1873
  lowering: {
1857
1874
  ...GOOGLE_LOWERING_BASE,
1858
- // Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
1859
- // $0.10/M input. Material for repeat-prompt workloads (classify shape).
1875
+ // Cache discount 10× (vs Flash 4×) — Google docs s37: $0.025/M cached vs
1876
+ // $0.25/M input. Material for repeat-prompt workloads (classify shape).
1860
1877
  cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
1861
1878
  thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1862
1879
  },
@@ -1881,13 +1898,13 @@ var PROFILES_RAW = [
1881
1898
  reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
1882
1899
  }
1883
1900
  ],
1884
- strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
1901
+ strengths: ["low_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
1885
1902
  weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
1886
- notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash-lite`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
1887
- // Tier 3 emergency floor for summarize/classify chains. ZERO brain
1888
- // rows — all values are starter hypotheses anchored to "smaller
1889
- // sibling of Flash, at-or-below Flash perf on every archetype." The
1890
- // first 50 brain rows per archetype will validate or relax these.
1903
+ notes: "Verified s37 (2026-05-21) against Google docs. Sits between 2.5-flash-lite (cheaper) and 2.5-flash (more expensive) on cost frontier; 2.5\xD7 more expensive than initial template-clone. Cliffs hypothesized from Flash family \u2014 brain evidence pending.",
1904
+ // Tier 2-3 floor for summarize/classify chains at the new (verified) price
1905
+ // point. ZERO brain rows — values are starter hypotheses anchored to
1906
+ // "smaller sibling of Flash at higher cost than 2.5-flash-lite." The first
1907
+ // 50 brain rows per archetype will validate or relax these.
1891
1908
  archetypePerf: {
1892
1909
  classify: 6,
1893
1910
  // starter hypothesis — verify (Flash is 7, lite likely ≤)
@@ -1902,6 +1919,125 @@ var PROFILES_RAW = [
1902
1919
  plan: 3,
1903
1920
  critique: 3
1904
1921
  }
1922
+ },
1923
+ // ─────────────────────────────────────────────────────────────────────────
1924
+ // Gemini 3.5 Flash — hand-onboarded s37 (2026-05-21)
1925
+ //
1926
+ // Google positioning ("Most intelligent for sustained frontier performance
1927
+ // on agentic and coding tasks" / "particularly effective for rapid agentic
1928
+ // loops involving complex coding cycles and iterations") suggests this is
1929
+ // the Flash-family upgrade specifically aimed at hunt-shape workloads.
1930
+ // Pricing 5× input / 3.6× output vs 2.5-flash — material cost premium.
1931
+ // archetypePerf adjusted +1 vs 2.5-flash on ask/generate/plan/critique
1932
+ // (sustained-frontier positioning); hunt held at 9 inherited from L-040
1933
+ // family parallel-tool tier; brain evidence will validate within 50 rows.
1934
+ //
1935
+ // Cliffs inherited conservatively from 2.5-flash. Google's "sustained
1936
+ // frontier on long-context" positioning suggests the 8K cliff may not
1937
+ // apply to 3.5 — keep as guard until brain evidence shows otherwise.
1938
+ //
1939
+ // Specs verified against:
1940
+ // ai.google.dev/gemini-api/docs/models/gemini-3.5-flash
1941
+ // ai.google.dev/gemini-api/docs/pricing (Standard tier)
1942
+ // ─────────────────────────────────────────────────────────────────────────
1943
+ {
1944
+ id: "gemini-3.5-flash",
1945
+ verifiedAgainstDocs: "2026-05-21",
1946
+ provider: "google",
1947
+ status: "current",
1948
+ maxContextTokens: 1048576,
1949
+ maxOutputTokens: 65536,
1950
+ maxTools: 128,
1951
+ parallelToolCalls: true,
1952
+ structuredOutput: "native",
1953
+ systemPromptMode: "separate",
1954
+ streaming: true,
1955
+ cliffs: [
1956
+ {
1957
+ metric: "input_tokens",
1958
+ threshold: 8e3,
1959
+ action: "downgrade_quality_warning",
1960
+ reason: "Inherited from 2.5-flash guard; Google positions 3.5 as sustained-frontier-on-long-context but brain evidence pending"
1961
+ },
1962
+ {
1963
+ metric: "tool_count",
1964
+ threshold: 20,
1965
+ action: "drop_to_top_relevant",
1966
+ reason: "Inherited from Flash family: tool reliability drops above ~20 (despite 128 hard limit). Validate per (archetype, model) after n\u226520."
1967
+ },
1968
+ {
1969
+ metric: "thinking_with_short_output",
1970
+ threshold: 1,
1971
+ action: "force_thinking_budget_zero",
1972
+ reason: "Thinking mode supported per Google docs; same drain risk as 2.5-flash \u2014 thinking tokens consume maxOutputTokens"
1973
+ },
1974
+ {
1975
+ // Inherited from 2.5-flash s11 trust artifact (5/5 empty rate on
1976
+ // tt-intelligence/summarize/gemini-2.5-flash with tools offered).
1977
+ // Family-likely failure mode for Flash architecture across versions.
1978
+ // Keep preemptively until brain evidence on 3.5-flash specifically.
1979
+ metric: "tool_count",
1980
+ threshold: 1,
1981
+ whenIntent: "summarize",
1982
+ action: "strip_tools",
1983
+ reason: "Inherited from 2.5-flash s11 cliff (kgauto commit 3872832): summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3.5-flash specifically."
1984
+ }
1985
+ ],
1986
+ costInputPer1m: 1.5,
1987
+ costOutputPer1m: 9,
1988
+ lowering: {
1989
+ ...GOOGLE_LOWERING_BASE,
1990
+ // 10× cache discount per Google pricing: $0.15/M cached vs $1.50/M input.
1991
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
1992
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
1993
+ },
1994
+ recovery: [
1995
+ {
1996
+ signal: "empty_response_after_tool",
1997
+ action: "retry_with_params",
1998
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
1999
+ maxRetries: 1,
2000
+ reason: "Inherited Flash-family pattern: empty after tool result \u2014 retry with thinking off"
2001
+ },
2002
+ {
2003
+ signal: "empty_response",
2004
+ action: "retry_with_params",
2005
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
2006
+ maxRetries: 1,
2007
+ reason: "Empty response \u2014 try with thinking off"
2008
+ },
2009
+ {
2010
+ signal: "malformed_function_call",
2011
+ action: "escalate",
2012
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
2013
+ }
2014
+ ],
2015
+ strengths: ["agentic_loops", "coding", "1m_context", "parallel_tools", "thinking_mode", "sustained_frontier"],
2016
+ weaknesses: ["cost_vs_2_5_flash", "no_brain_evidence_yet"],
2017
+ notes: "Hand-onboarded s37 (2026-05-21) verified against Google docs. Stable status; positioned as Flash-family upgrade for agentic loops and coding. 5\xD7/3.6\xD7 more expensive than 2.5-flash but Google claims step-change on sustained frontier work. archetypePerf adjustments are judgment-grounded starter hypotheses \u2014 brain evidence will validate within ~50 rows per archetype.",
2018
+ // Starter hypothesis: anchored to 2.5-flash archetypePerf with +1
2019
+ // adjustments where Google's positioning explicitly supports
2020
+ // (agentic/coding/sustained). Hunt held at 9 inherited from L-040 family
2021
+ // parallel-tool tier. Brain evidence will replace.
2022
+ archetypePerf: {
2023
+ hunt: 9,
2024
+ // Inherited from 2.5-flash L-040 parallel-tool tier; Google positions 3.5 as agentic-loop champion
2025
+ classify: 7,
2026
+ // Inherited from 2.5-flash brain-validated tier (218 rows on 2.5)
2027
+ summarize: 7,
2028
+ // Inherited from 2.5-flash; cliff strips tools when present
2029
+ transform: 7,
2030
+ ask: 8,
2031
+ // +1 vs 2.5-flash — sustained-frontier positioning
2032
+ generate: 8,
2033
+ // +1 vs 2.5-flash (6→8) — Google: "complex coding cycles and iterations"
2034
+ plan: 7,
2035
+ // +1 vs 2.5-flash (5→7) — "complex iterations" positioning
2036
+ extract: 7,
2037
+ // +1 vs 2.5-flash — sustained-frontier on structured tasks
2038
+ critique: 5
2039
+ // +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
2040
+ }
1905
2041
  }
1906
2042
  ];
1907
2043
  var ALIASES = {
@@ -2174,6 +2310,86 @@ function getArchetypePerfScore(modelId, archetype) {
2174
2310
  return { score, n, grounding };
2175
2311
  }
2176
2312
 
2313
+ // src/compatibility.ts
2314
+ var ARCHETYPE_FLOOR_DEFAULT = 6;
2315
+ var ABSOLUTE_FLOOR = 4;
2316
+ function rawArchetypePerf(profile, archetype) {
2317
+ return profile.archetypePerf?.[archetype] ?? 5;
2318
+ }
2319
+ function hasSequentialToolCliffForHunt(profile) {
2320
+ if (profile.parallelToolCalls !== false) return false;
2321
+ const huntScore = profile.archetypePerf?.hunt ?? 5;
2322
+ return huntScore < ARCHETYPE_FLOOR_DEFAULT;
2323
+ }
2324
+ function adapterForCliff(profile, archetype) {
2325
+ if (archetype === "hunt" && hasSequentialToolCliffForHunt(profile)) {
2326
+ const otherScores = [];
2327
+ if (profile.archetypePerf) {
2328
+ for (const [k, v] of Object.entries(profile.archetypePerf)) {
2329
+ if (k === "hunt") continue;
2330
+ if (typeof v === "number") otherScores.push(v);
2331
+ }
2332
+ }
2333
+ const sorted = [...otherScores].sort((a, b) => a - b);
2334
+ const median = sorted.length === 0 ? ARCHETYPE_FLOOR_DEFAULT + 1 : sorted[Math.floor(sorted.length / 2)] ?? ARCHETYPE_FLOOR_DEFAULT + 1;
2335
+ const estimated = Math.max(ARCHETYPE_FLOOR_DEFAULT + 1, median);
2336
+ return {
2337
+ adapter: {
2338
+ parameter: "toolOrchestration",
2339
+ value: "sequential",
2340
+ consequence: "Tool calls run one at a time instead of in parallel \u2014 slower per step but reliable for this model."
2341
+ },
2342
+ estimatedScoreWithAdapter: estimated
2343
+ };
2344
+ }
2345
+ return void 0;
2346
+ }
2347
+ function archetypeDescriptor(archetype) {
2348
+ return archetype;
2349
+ }
2350
+ function getModelCompatibility(modelId, intent) {
2351
+ const profile = tryGetProfile(modelId);
2352
+ if (!profile) {
2353
+ return {
2354
+ status: "reject",
2355
+ reason: `Model "${modelId}" is not registered with kgauto \u2014 no compatibility data available.`,
2356
+ archetypePerf: 0
2357
+ };
2358
+ }
2359
+ const { archetype, toolOrchestration } = intent;
2360
+ const rawScore = rawArchetypePerf(profile, archetype);
2361
+ const descriptor = archetypeDescriptor(archetype);
2362
+ const adapterMatch = adapterForCliff(profile, archetype);
2363
+ if (toolOrchestration === "sequential" && adapterMatch && adapterMatch.adapter.parameter === "toolOrchestration" && adapterMatch.adapter.value === "sequential") {
2364
+ return {
2365
+ status: "compatible",
2366
+ reason: `Suited for ${descriptor} with sequential tool calls.`,
2367
+ archetypePerf: rawScore
2368
+ };
2369
+ }
2370
+ if (rawScore >= ARCHETYPE_FLOOR_DEFAULT) {
2371
+ return {
2372
+ status: "compatible",
2373
+ reason: `Suited for ${descriptor}.`,
2374
+ archetypePerf: rawScore
2375
+ };
2376
+ }
2377
+ if (adapterMatch) {
2378
+ return {
2379
+ status: "requires-adapter",
2380
+ reason: `Best with ${adapterMatch.adapter.value} ${adapterMatch.adapter.parameter === "toolOrchestration" ? "tool calls" : adapterMatch.adapter.parameter} for ${descriptor} \u2014 slower but works.`,
2381
+ archetypePerf: rawScore,
2382
+ archetypePerfWithAdapter: adapterMatch.estimatedScoreWithAdapter,
2383
+ adapter: adapterMatch.adapter
2384
+ };
2385
+ }
2386
+ return {
2387
+ status: "reject",
2388
+ reason: `Not suited for ${descriptor} \u2014 would underperform significantly.`,
2389
+ archetypePerf: rawScore
2390
+ };
2391
+ }
2392
+
2177
2393
  // src/advisor.ts
2178
2394
  var QUALITY_FLOOR_FOR_RECOMMENDATION = 6;
2179
2395
  var TIER_DOWN_COST_RATIO = 0.5;
@@ -2190,6 +2406,7 @@ function runAdvisor(ir, result, profile, policy, phase2) {
2190
2406
  out.push(...detectModelStaleEvidence(ir, profile));
2191
2407
  out.push(...detectTierDown(ir, profile, phase2));
2192
2408
  }
2409
+ out.push(...detectArchetypePerfFloorBreach(ir, profile));
2193
2410
  return out;
2194
2411
  }
2195
2412
  function detectCachingOff(ir, profile) {
@@ -2360,6 +2577,36 @@ function detectTierDown(ir, profile, phase2) {
2360
2577
  }
2361
2578
  ];
2362
2579
  }
2580
+ function detectArchetypePerfFloorBreach(ir, profile) {
2581
+ const compat = getModelCompatibility(profile.id, {
2582
+ archetype: ir.intent.archetype,
2583
+ toolOrchestration: ir.constraints?.toolOrchestration
2584
+ });
2585
+ if (compat.status === "compatible") return [];
2586
+ if (compat.status === "requires-adapter") {
2587
+ return [
2588
+ {
2589
+ level: "warn",
2590
+ code: "archetype-perf-floor-breach",
2591
+ message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}). A known adapter would lift it: ${compat.adapter.parameter}=${compat.adapter.value}. ${compat.adapter.consequence}`,
2592
+ suggestion: `Pass \`ir.constraints.${compat.adapter.parameter} = '${compat.adapter.value}'\` for this call, OR pick a model whose archetypePerf for ${ir.intent.archetype} already clears the floor (call \`getModelCompatibility(modelId, { archetype: '${ir.intent.archetype}' })\` to check). Estimated post-adapter score: ${compat.archetypePerfWithAdapter}/10.`,
2593
+ recommendationType: "prompt-fix",
2594
+ suggestedAdaptation: compat.adapter,
2595
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
2596
+ }
2597
+ ];
2598
+ }
2599
+ return [
2600
+ {
2601
+ level: "critical",
2602
+ code: "archetype-perf-floor-breach",
2603
+ message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}) and no known adapter would lift it. ${compat.reason}`,
2604
+ suggestion: `Swap to a model whose archetypePerf for ${ir.intent.archetype} clears the floor. Use \`getModelCompatibility(candidateId, { archetype: '${ir.intent.archetype}' })\` to vet candidates, or \`getDefaultFallbackChain({ archetype: '${ir.intent.archetype}', posture: 'open' })\` for a library-picked chain that respects the floor by construction.`,
2605
+ recommendationType: "model-swap",
2606
+ docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
2607
+ }
2608
+ ];
2609
+ }
2363
2610
 
2364
2611
  // src/compile.ts
2365
2612
  var counter = 0;
@@ -2620,6 +2867,9 @@ function registerCompile(appId, archetype, ir, result) {
2620
2867
  tokens
2621
2868
  );
2622
2869
  const shapeKey = `${shape.contextBucket}-${shape.toolCountBucket}-${shape.historyDepth}-${shape.outputMode}`;
2870
+ const toolsCount = result.diagnostics.toolsKept;
2871
+ const historyDepth = Array.isArray(ir.history) ? ir.history.length : 0;
2872
+ const systemPromptChars = estimateSystemPromptChars(ir.sections);
2623
2873
  compileRegistry.set(result.handle, {
2624
2874
  appId,
2625
2875
  archetype,
@@ -2633,9 +2883,24 @@ function registerCompile(appId, archetype, ir, result) {
2633
2883
  historyCacheableTokens: result.diagnostics.historyCacheableTokens,
2634
2884
  historyTokensTotal: result.diagnostics.historyTokensTotal,
2635
2885
  // alpha.20 E3: capture consumer's declared mode for the brain payload.
2636
- toolOrchestration: result.diagnostics.toolOrchestration
2886
+ toolOrchestration: result.diagnostics.toolOrchestration,
2887
+ // alpha.28: shape fields for Glass-Box renderer.
2888
+ toolsCount,
2889
+ historyDepth,
2890
+ systemPromptChars
2637
2891
  });
2638
2892
  }
2893
+ function estimateSystemPromptChars(sections) {
2894
+ if (!Array.isArray(sections) || sections.length === 0) return void 0;
2895
+ let total = 0;
2896
+ for (const s of sections) {
2897
+ if (s && typeof s === "object") {
2898
+ const content = s.content;
2899
+ if (typeof content === "string") total += content.length;
2900
+ }
2901
+ }
2902
+ return total > 0 ? total : void 0;
2903
+ }
2639
2904
  async function record(input) {
2640
2905
  const reg = compileRegistry.get(input.handle);
2641
2906
  if (reg) compileRegistry.delete(input.handle);
@@ -2713,6 +2978,8 @@ function buildPayload(input, reg) {
2713
2978
  const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
2714
2979
  const costModel = actual;
2715
2980
  const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
2981
+ const fellOverFrom = input.fellOverFrom ?? requested;
2982
+ const fallbackReason = fellOverFrom ? input.fallbackReason : void 0;
2716
2983
  return {
2717
2984
  handle: input.handle,
2718
2985
  app_id: reg?.appId,
@@ -2747,7 +3014,16 @@ function buildPayload(input, reg) {
2747
3014
  // the brain can measure per-mode model perf separately (DeepSeek in
2748
3015
  // sequential vs parallel mode is two different stories — L-040).
2749
3016
  // Null when consumer hadn't adopted the constraint yet.
2750
- tool_orchestration: reg?.toolOrchestration ?? null
3017
+ tool_orchestration: reg?.toolOrchestration ?? null,
3018
+ // alpha.28 — Glass-Box renderer substrate (migration 018). All optional;
3019
+ // omitted-undefined PostgREST inserts store NULL → renderer renders "—".
3020
+ finish_reason: input.finishReason,
3021
+ total_ms: input.totalMs ?? input.latencyMs,
3022
+ tools_count: input.toolsCount ?? reg?.toolsCount,
3023
+ history_depth: input.historyDepth ?? reg?.historyDepth,
3024
+ system_prompt_chars: input.systemPromptChars ?? reg?.systemPromptChars,
3025
+ fell_over_from: fellOverFrom,
3026
+ fallback_reason: fallbackReason
2751
3027
  };
2752
3028
  }
2753
3029
  function computeCostUsd(modelId, tokensIn, tokensOut) {
@@ -3945,6 +4221,8 @@ async function call(ir, opts = {}) {
3945
4221
  latencyMs: latencyMs2
3946
4222
  })
3947
4223
  );
4224
+ const fellOver = targetModel !== initial.target;
4225
+ const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
3948
4226
  await record({
3949
4227
  handle: initial.handle,
3950
4228
  tokensIn: validated.response.tokens.input,
@@ -3958,10 +4236,18 @@ async function call(ir, opts = {}) {
3958
4236
  promptPreview: extractPromptPreview(ir),
3959
4237
  responsePreview: validated.response.text.slice(0, 200),
3960
4238
  cacheReadInputTokens: validated.response.tokens.cached,
3961
- cacheCreationInputTokens: validated.response.tokens.cacheCreated
4239
+ cacheCreationInputTokens: validated.response.tokens.cacheCreated,
4240
+ // alpha.28 — Glass-Box renderer substrate (migration 018). call()
4241
+ // owns the lifecycle so it has direct visibility into finishReason
4242
+ // (from the normalized provider response), totalMs (mirrors latencyMs
4243
+ // for non-streaming; future streaming variant may diverge), and the
4244
+ // fell-over-from / fallback-reason pair (already computed above for
4245
+ // the CallResult return shape).
4246
+ finishReason: validated.response.finishReason,
4247
+ totalMs: latencyMs2,
4248
+ fellOverFrom: fellOver ? initial.target : void 0,
4249
+ fallbackReason
3962
4250
  });
3963
- const fellOver = targetModel !== initial.target;
3964
- const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
3965
4251
  if (fellOver) {
3966
4252
  const firstFailed = attempts.find((a) => a.status !== "success");
3967
4253
  if (firstFailed) {
@@ -4319,8 +4605,10 @@ function compile2(ir, opts) {
4319
4605
  }
4320
4606
  // Annotate the CommonJS export names for ESM import in node:
4321
4607
  0 && (module.exports = {
4608
+ ABSOLUTE_FLOOR,
4322
4609
  ALIASES,
4323
4610
  ALL_ARCHETYPES,
4611
+ ARCHETYPE_FLOOR_DEFAULT,
4324
4612
  CallError,
4325
4613
  DIALECT_VERSION,
4326
4614
  INTENT_ARCHETYPES,
@@ -4342,6 +4630,7 @@ function compile2(ir, opts) {
4342
4630
  getArchetypePerfScore,
4343
4631
  getDefaultFallbackChain,
4344
4632
  getDefaultFallbackChainWithGrounding,
4633
+ getModelCompatibility,
4345
4634
  getPerAxisMetrics,
4346
4635
  getProfile,
4347
4636
  getReachabilityDiagnostic,