@warmdrift/kgauto-compiler 2.0.0-alpha.26 → 2.0.0-alpha.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-7MTHFSNY.mjs → chunk-JQGRWJZO.mjs} +181 -48
- package/dist/chunk-WXCFWUCN.mjs +678 -0
- package/dist/glassbox/index.d.mts +3 -3
- package/dist/glassbox/index.d.ts +3 -3
- package/dist/glassbox-routes/index.d.mts +88 -6
- package/dist/glassbox-routes/index.d.ts +88 -6
- package/dist/glassbox-routes/index.js +1820 -8
- package/dist/glassbox-routes/index.mjs +320 -8
- package/dist/index.d.mts +184 -3
- package/dist/index.d.ts +184 -3
- package/dist/index.js +342 -53
- package/dist/index.mjs +108 -581
- package/dist/{ir-B_XX2LAO.d.ts → ir-5W0efxt9.d.ts} +86 -1
- package/dist/{ir-B9zqlwjH.d.mts → ir-MXCJA8L7.d.mts} +86 -1
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/profiles.js +181 -48
- package/dist/profiles.mjs +1 -1
- package/dist/{types-bt0aVJb8.d.ts → types-CiZ9HLIU.d.ts} +1 -1
- package/dist/{types-o9etg93a.d.mts → types-sDZQzPM6.d.mts} +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -20,8 +20,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
+
ABSOLUTE_FLOOR: () => ABSOLUTE_FLOOR,
|
|
23
24
|
ALIASES: () => ALIASES,
|
|
24
25
|
ALL_ARCHETYPES: () => ALL_ARCHETYPES,
|
|
26
|
+
ARCHETYPE_FLOOR_DEFAULT: () => ARCHETYPE_FLOOR_DEFAULT,
|
|
25
27
|
CallError: () => CallError,
|
|
26
28
|
DIALECT_VERSION: () => DIALECT_VERSION,
|
|
27
29
|
INTENT_ARCHETYPES: () => INTENT_ARCHETYPES,
|
|
@@ -43,6 +45,7 @@ __export(index_exports, {
|
|
|
43
45
|
getArchetypePerfScore: () => getArchetypePerfScore,
|
|
44
46
|
getDefaultFallbackChain: () => getDefaultFallbackChain,
|
|
45
47
|
getDefaultFallbackChainWithGrounding: () => getDefaultFallbackChainWithGrounding,
|
|
48
|
+
getModelCompatibility: () => getModelCompatibility,
|
|
46
49
|
getPerAxisMetrics: () => getPerAxisMetrics,
|
|
47
50
|
getProfile: () => getProfile,
|
|
48
51
|
getReachabilityDiagnostic: () => getReachabilityDiagnostic,
|
|
@@ -1703,12 +1706,23 @@ var PROFILES_RAW = [
|
|
|
1703
1706
|
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
|
1704
1707
|
// provider docs. Verify before promoting status to 'current' (L-049/L-081).
|
|
1705
1708
|
{
|
|
1709
|
+
// s37 (2026-05-21): UNVERIFIED-AUTO-ONBOARD → verified against
|
|
1710
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview +
|
|
1711
|
+
// ai.google.dev/gemini-api/docs/pricing. L-081 catches:
|
|
1712
|
+
// maxOutputTokens 65_535 → 65_536 (off-by-one)
|
|
1713
|
+
// costInputPer1m 0.30 → 0.50 (template-cloned from 2.5-flash; actual is 1.67× more expensive)
|
|
1714
|
+
// costOutputPer1m 2.50 → 3.00 (template-cloned; actual 1.2× more expensive)
|
|
1715
|
+
// cache discount default 0.25 → 0.10 (10× discount, $0.05/$0.50 per docs)
|
|
1716
|
+
// Cliffs inherited from 2.5-flash conservatively. The 8K-context-quality
|
|
1717
|
+
// cliff was a 2.5-Flash observation — Google positions Gemini 3 as
|
|
1718
|
+
// sustained-frontier-on-long-context; brain evidence will validate/relax.
|
|
1719
|
+
// Kept as guard for now.
|
|
1706
1720
|
id: "gemini-3-flash-preview",
|
|
1707
|
-
verifiedAgainstDocs: "
|
|
1721
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1708
1722
|
provider: "google",
|
|
1709
1723
|
status: "preview",
|
|
1710
1724
|
maxContextTokens: 1048576,
|
|
1711
|
-
maxOutputTokens:
|
|
1725
|
+
maxOutputTokens: 65536,
|
|
1712
1726
|
maxTools: 128,
|
|
1713
1727
|
parallelToolCalls: true,
|
|
1714
1728
|
structuredOutput: "native",
|
|
@@ -1719,13 +1733,13 @@ var PROFILES_RAW = [
|
|
|
1719
1733
|
metric: "input_tokens",
|
|
1720
1734
|
threshold: 8e3,
|
|
1721
1735
|
action: "downgrade_quality_warning",
|
|
1722
|
-
reason: "
|
|
1736
|
+
reason: "Inherited from 2.5-flash guard; brain evidence on Gemini 3 long-context quality will validate/relax"
|
|
1723
1737
|
},
|
|
1724
1738
|
{
|
|
1725
1739
|
metric: "tool_count",
|
|
1726
1740
|
threshold: 20,
|
|
1727
1741
|
action: "drop_to_top_relevant",
|
|
1728
|
-
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
|
|
1742
|
+
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit) \u2014 inherited from Flash family"
|
|
1729
1743
|
},
|
|
1730
1744
|
{
|
|
1731
1745
|
metric: "thinking_with_short_output",
|
|
@@ -1734,24 +1748,22 @@ var PROFILES_RAW = [
|
|
|
1734
1748
|
reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
|
|
1735
1749
|
},
|
|
1736
1750
|
{
|
|
1737
|
-
//
|
|
1738
|
-
//
|
|
1739
|
-
//
|
|
1740
|
-
// help — disabling thinking is necessary but not sufficient. Tools
|
|
1741
|
-
// present + summarize intent confuses Flash into a no-output state
|
|
1742
|
-
// (likely tool-decision purgatory). Strip tools entirely for this
|
|
1743
|
-
// archetype on this model.
|
|
1751
|
+
// Inherited from gemini-2.5-flash s11 trust artifact. Family-likely
|
|
1752
|
+
// failure mode for Flash architecture. Keep preemptively until brain
|
|
1753
|
+
// evidence on Gemini 3 specifically.
|
|
1744
1754
|
metric: "tool_count",
|
|
1745
1755
|
threshold: 1,
|
|
1746
1756
|
whenIntent: "summarize",
|
|
1747
1757
|
action: "strip_tools",
|
|
1748
|
-
reason: "
|
|
1758
|
+
reason: "Inherited from 2.5-flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3-flash-preview specifically."
|
|
1749
1759
|
}
|
|
1750
1760
|
],
|
|
1751
|
-
costInputPer1m: 0.
|
|
1752
|
-
costOutputPer1m:
|
|
1761
|
+
costInputPer1m: 0.5,
|
|
1762
|
+
costOutputPer1m: 3,
|
|
1753
1763
|
lowering: {
|
|
1754
1764
|
...GOOGLE_LOWERING_BASE,
|
|
1765
|
+
// 10× cache discount per Google pricing: $0.05/M cached vs $0.50/M input.
|
|
1766
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1755
1767
|
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1756
1768
|
},
|
|
1757
1769
|
recovery: [
|
|
@@ -1777,40 +1789,45 @@ var PROFILES_RAW = [
|
|
|
1777
1789
|
],
|
|
1778
1790
|
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
1779
1791
|
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
1780
|
-
notes: "
|
|
1781
|
-
//
|
|
1782
|
-
//
|
|
1792
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Step-change positioning vs 2.5-flash on agentic loops per Google's release notes (Dec 2025). Pricing 1.67\xD7/1.2\xD7 higher than 2.5-flash; cache discount 10\xD7 (vs 4\xD7 for 2.5). Status=preview until brain evidence accumulates.",
|
|
1793
|
+
// Anchored to 2.5-flash archetypePerf as starter, with judgment adjustments
|
|
1794
|
+
// for Google's "step-change on agentic" positioning. Brain evidence (zero
|
|
1795
|
+
// rows today) will replace these starter values.
|
|
1783
1796
|
archetypePerf: {
|
|
1784
1797
|
hunt: 9,
|
|
1785
|
-
// L-040
|
|
1798
|
+
// Inherits 2.5-flash L-040 parallel-tool tier; Google positions 3 as agentic-loop upgrade
|
|
1786
1799
|
classify: 7,
|
|
1787
|
-
// brain-validated
|
|
1800
|
+
// Inherits 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
1788
1801
|
summarize: 7,
|
|
1789
|
-
//
|
|
1802
|
+
// Inherits 2.5-flash; cliff strips tools when present
|
|
1790
1803
|
transform: 7,
|
|
1791
|
-
ask:
|
|
1792
|
-
|
|
1793
|
-
|
|
1804
|
+
ask: 8,
|
|
1805
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
1806
|
+
generate: 7,
|
|
1807
|
+
// +1 vs 2.5-flash — agentic coding upgrade per Google
|
|
1808
|
+
plan: 6,
|
|
1809
|
+
// +1 vs 2.5-flash — complex iterations per positioning
|
|
1794
1810
|
extract: 6,
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
// reasoning shallower than Sonnet/Opus
|
|
1811
|
+
critique: 5
|
|
1812
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
1798
1813
|
}
|
|
1799
1814
|
},
|
|
1800
1815
|
{
|
|
1801
|
-
// ── Gemini
|
|
1802
|
-
// Onboarded 2026-05-
|
|
1803
|
-
//
|
|
1804
|
-
//
|
|
1805
|
-
//
|
|
1806
|
-
//
|
|
1807
|
-
//
|
|
1808
|
-
//
|
|
1809
|
-
//
|
|
1810
|
-
//
|
|
1811
|
-
//
|
|
1816
|
+
// ── Gemini 3.1 Flash-Lite ──
|
|
1817
|
+
// Onboarded 2026-05-16 by auto-onboarder; s37 (2026-05-21) verified
|
|
1818
|
+
// against ai.google.dev/gemini-api/docs/pricing.
|
|
1819
|
+
//
|
|
1820
|
+
// L-081 CATCHES (template clone from 2.5-flash-lite was 2.5-3.75× too cheap):
|
|
1821
|
+
// costInputPer1m 0.10 → 0.25 (template clone undervalued by 2.5×)
|
|
1822
|
+
// costOutputPer1m 0.40 → 1.50 (template clone undervalued by 3.75×)
|
|
1823
|
+
//
|
|
1824
|
+
// Real 3.1-flash-lite is NOT a cost-equivalent successor to 2.5-flash-lite —
|
|
1825
|
+
// it sits between 2.5-flash-lite ($0.10/$0.40) and 2.5-flash ($0.30/$2.50).
|
|
1826
|
+
// Cache discount 10× verified ($0.025/M cached vs $0.25/M input).
|
|
1827
|
+
//
|
|
1828
|
+
// Cliffs are HYPOTHESIZED from 2.5-flash family; brain evidence pending.
|
|
1812
1829
|
id: "gemini-3.1-flash-lite",
|
|
1813
|
-
verifiedAgainstDocs: "
|
|
1830
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1814
1831
|
provider: "google",
|
|
1815
1832
|
status: "preview",
|
|
1816
1833
|
maxContextTokens: 1048576,
|
|
@@ -1851,12 +1868,12 @@ var PROFILES_RAW = [
|
|
|
1851
1868
|
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
1852
1869
|
}
|
|
1853
1870
|
],
|
|
1854
|
-
costInputPer1m: 0.
|
|
1855
|
-
costOutputPer1m:
|
|
1871
|
+
costInputPer1m: 0.25,
|
|
1872
|
+
costOutputPer1m: 1.5,
|
|
1856
1873
|
lowering: {
|
|
1857
1874
|
...GOOGLE_LOWERING_BASE,
|
|
1858
|
-
// Cache discount 10× (vs Flash 4×) — Google
|
|
1859
|
-
// $0.
|
|
1875
|
+
// Cache discount 10× (vs Flash 4×) — Google docs s37: $0.025/M cached vs
|
|
1876
|
+
// $0.25/M input. Material for repeat-prompt workloads (classify shape).
|
|
1860
1877
|
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1861
1878
|
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1862
1879
|
},
|
|
@@ -1881,13 +1898,13 @@ var PROFILES_RAW = [
|
|
|
1881
1898
|
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
1882
1899
|
}
|
|
1883
1900
|
],
|
|
1884
|
-
strengths: ["
|
|
1901
|
+
strengths: ["low_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
1885
1902
|
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
1886
|
-
notes: "
|
|
1887
|
-
// Tier 3
|
|
1888
|
-
// rows —
|
|
1889
|
-
// sibling of Flash
|
|
1890
|
-
//
|
|
1903
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Sits between 2.5-flash-lite (cheaper) and 2.5-flash (more expensive) on cost frontier; 2.5\xD7 more expensive than initial template-clone. Cliffs hypothesized from Flash family \u2014 brain evidence pending.",
|
|
1904
|
+
// Tier 2-3 floor for summarize/classify chains at the new (verified) price
|
|
1905
|
+
// point. ZERO brain rows — values are starter hypotheses anchored to
|
|
1906
|
+
// "smaller sibling of Flash at higher cost than 2.5-flash-lite." The first
|
|
1907
|
+
// 50 brain rows per archetype will validate or relax these.
|
|
1891
1908
|
archetypePerf: {
|
|
1892
1909
|
classify: 6,
|
|
1893
1910
|
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
@@ -1902,6 +1919,125 @@ var PROFILES_RAW = [
|
|
|
1902
1919
|
plan: 3,
|
|
1903
1920
|
critique: 3
|
|
1904
1921
|
}
|
|
1922
|
+
},
|
|
1923
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1924
|
+
// Gemini 3.5 Flash — hand-onboarded s37 (2026-05-21)
|
|
1925
|
+
//
|
|
1926
|
+
// Google positioning ("Most intelligent for sustained frontier performance
|
|
1927
|
+
// on agentic and coding tasks" / "particularly effective for rapid agentic
|
|
1928
|
+
// loops involving complex coding cycles and iterations") suggests this is
|
|
1929
|
+
// the Flash-family upgrade specifically aimed at hunt-shape workloads.
|
|
1930
|
+
// Pricing 5× input / 3.6× output vs 2.5-flash — material cost premium.
|
|
1931
|
+
// archetypePerf adjusted +1 vs 2.5-flash on ask/generate/plan/critique
|
|
1932
|
+
// (sustained-frontier positioning); hunt held at 9 inherited from L-040
|
|
1933
|
+
// family parallel-tool tier; brain evidence will validate within 50 rows.
|
|
1934
|
+
//
|
|
1935
|
+
// Cliffs inherited conservatively from 2.5-flash. Google's "sustained
|
|
1936
|
+
// frontier on long-context" positioning suggests the 8K cliff may not
|
|
1937
|
+
// apply to 3.5 — keep as guard until brain evidence shows otherwise.
|
|
1938
|
+
//
|
|
1939
|
+
// Specs verified against:
|
|
1940
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3.5-flash
|
|
1941
|
+
// ai.google.dev/gemini-api/docs/pricing (Standard tier)
|
|
1942
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1943
|
+
{
|
|
1944
|
+
id: "gemini-3.5-flash",
|
|
1945
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1946
|
+
provider: "google",
|
|
1947
|
+
status: "current",
|
|
1948
|
+
maxContextTokens: 1048576,
|
|
1949
|
+
maxOutputTokens: 65536,
|
|
1950
|
+
maxTools: 128,
|
|
1951
|
+
parallelToolCalls: true,
|
|
1952
|
+
structuredOutput: "native",
|
|
1953
|
+
systemPromptMode: "separate",
|
|
1954
|
+
streaming: true,
|
|
1955
|
+
cliffs: [
|
|
1956
|
+
{
|
|
1957
|
+
metric: "input_tokens",
|
|
1958
|
+
threshold: 8e3,
|
|
1959
|
+
action: "downgrade_quality_warning",
|
|
1960
|
+
reason: "Inherited from 2.5-flash guard; Google positions 3.5 as sustained-frontier-on-long-context but brain evidence pending"
|
|
1961
|
+
},
|
|
1962
|
+
{
|
|
1963
|
+
metric: "tool_count",
|
|
1964
|
+
threshold: 20,
|
|
1965
|
+
action: "drop_to_top_relevant",
|
|
1966
|
+
reason: "Inherited from Flash family: tool reliability drops above ~20 (despite 128 hard limit). Validate per (archetype, model) after n\u226520."
|
|
1967
|
+
},
|
|
1968
|
+
{
|
|
1969
|
+
metric: "thinking_with_short_output",
|
|
1970
|
+
threshold: 1,
|
|
1971
|
+
action: "force_thinking_budget_zero",
|
|
1972
|
+
reason: "Thinking mode supported per Google docs; same drain risk as 2.5-flash \u2014 thinking tokens consume maxOutputTokens"
|
|
1973
|
+
},
|
|
1974
|
+
{
|
|
1975
|
+
// Inherited from 2.5-flash s11 trust artifact (5/5 empty rate on
|
|
1976
|
+
// tt-intelligence/summarize/gemini-2.5-flash with tools offered).
|
|
1977
|
+
// Family-likely failure mode for Flash architecture across versions.
|
|
1978
|
+
// Keep preemptively until brain evidence on 3.5-flash specifically.
|
|
1979
|
+
metric: "tool_count",
|
|
1980
|
+
threshold: 1,
|
|
1981
|
+
whenIntent: "summarize",
|
|
1982
|
+
action: "strip_tools",
|
|
1983
|
+
reason: "Inherited from 2.5-flash s11 cliff (kgauto commit 3872832): summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3.5-flash specifically."
|
|
1984
|
+
}
|
|
1985
|
+
],
|
|
1986
|
+
costInputPer1m: 1.5,
|
|
1987
|
+
costOutputPer1m: 9,
|
|
1988
|
+
lowering: {
|
|
1989
|
+
...GOOGLE_LOWERING_BASE,
|
|
1990
|
+
// 10× cache discount per Google pricing: $0.15/M cached vs $1.50/M input.
|
|
1991
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1992
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1993
|
+
},
|
|
1994
|
+
recovery: [
|
|
1995
|
+
{
|
|
1996
|
+
signal: "empty_response_after_tool",
|
|
1997
|
+
action: "retry_with_params",
|
|
1998
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1999
|
+
maxRetries: 1,
|
|
2000
|
+
reason: "Inherited Flash-family pattern: empty after tool result \u2014 retry with thinking off"
|
|
2001
|
+
},
|
|
2002
|
+
{
|
|
2003
|
+
signal: "empty_response",
|
|
2004
|
+
action: "retry_with_params",
|
|
2005
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
2006
|
+
maxRetries: 1,
|
|
2007
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
2008
|
+
},
|
|
2009
|
+
{
|
|
2010
|
+
signal: "malformed_function_call",
|
|
2011
|
+
action: "escalate",
|
|
2012
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
2013
|
+
}
|
|
2014
|
+
],
|
|
2015
|
+
strengths: ["agentic_loops", "coding", "1m_context", "parallel_tools", "thinking_mode", "sustained_frontier"],
|
|
2016
|
+
weaknesses: ["cost_vs_2_5_flash", "no_brain_evidence_yet"],
|
|
2017
|
+
notes: "Hand-onboarded s37 (2026-05-21) verified against Google docs. Stable status; positioned as Flash-family upgrade for agentic loops and coding. 5\xD7/3.6\xD7 more expensive than 2.5-flash but Google claims step-change on sustained frontier work. archetypePerf adjustments are judgment-grounded starter hypotheses \u2014 brain evidence will validate within ~50 rows per archetype.",
|
|
2018
|
+
// Starter hypothesis: anchored to 2.5-flash archetypePerf with +1
|
|
2019
|
+
// adjustments where Google's positioning explicitly supports
|
|
2020
|
+
// (agentic/coding/sustained). Hunt held at 9 inherited from L-040 family
|
|
2021
|
+
// parallel-tool tier. Brain evidence will replace.
|
|
2022
|
+
archetypePerf: {
|
|
2023
|
+
hunt: 9,
|
|
2024
|
+
// Inherited from 2.5-flash L-040 parallel-tool tier; Google positions 3.5 as agentic-loop champion
|
|
2025
|
+
classify: 7,
|
|
2026
|
+
// Inherited from 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
2027
|
+
summarize: 7,
|
|
2028
|
+
// Inherited from 2.5-flash; cliff strips tools when present
|
|
2029
|
+
transform: 7,
|
|
2030
|
+
ask: 8,
|
|
2031
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
2032
|
+
generate: 8,
|
|
2033
|
+
// +1 vs 2.5-flash (6→8) — Google: "complex coding cycles and iterations"
|
|
2034
|
+
plan: 7,
|
|
2035
|
+
// +1 vs 2.5-flash (5→7) — "complex iterations" positioning
|
|
2036
|
+
extract: 7,
|
|
2037
|
+
// +1 vs 2.5-flash — sustained-frontier on structured tasks
|
|
2038
|
+
critique: 5
|
|
2039
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
2040
|
+
}
|
|
1905
2041
|
}
|
|
1906
2042
|
];
|
|
1907
2043
|
var ALIASES = {
|
|
@@ -2174,6 +2310,86 @@ function getArchetypePerfScore(modelId, archetype) {
|
|
|
2174
2310
|
return { score, n, grounding };
|
|
2175
2311
|
}
|
|
2176
2312
|
|
|
2313
|
+
// src/compatibility.ts
|
|
2314
|
+
var ARCHETYPE_FLOOR_DEFAULT = 6;
|
|
2315
|
+
var ABSOLUTE_FLOOR = 4;
|
|
2316
|
+
function rawArchetypePerf(profile, archetype) {
|
|
2317
|
+
return profile.archetypePerf?.[archetype] ?? 5;
|
|
2318
|
+
}
|
|
2319
|
+
function hasSequentialToolCliffForHunt(profile) {
|
|
2320
|
+
if (profile.parallelToolCalls !== false) return false;
|
|
2321
|
+
const huntScore = profile.archetypePerf?.hunt ?? 5;
|
|
2322
|
+
return huntScore < ARCHETYPE_FLOOR_DEFAULT;
|
|
2323
|
+
}
|
|
2324
|
+
function adapterForCliff(profile, archetype) {
|
|
2325
|
+
if (archetype === "hunt" && hasSequentialToolCliffForHunt(profile)) {
|
|
2326
|
+
const otherScores = [];
|
|
2327
|
+
if (profile.archetypePerf) {
|
|
2328
|
+
for (const [k, v] of Object.entries(profile.archetypePerf)) {
|
|
2329
|
+
if (k === "hunt") continue;
|
|
2330
|
+
if (typeof v === "number") otherScores.push(v);
|
|
2331
|
+
}
|
|
2332
|
+
}
|
|
2333
|
+
const sorted = [...otherScores].sort((a, b) => a - b);
|
|
2334
|
+
const median = sorted.length === 0 ? ARCHETYPE_FLOOR_DEFAULT + 1 : sorted[Math.floor(sorted.length / 2)] ?? ARCHETYPE_FLOOR_DEFAULT + 1;
|
|
2335
|
+
const estimated = Math.max(ARCHETYPE_FLOOR_DEFAULT + 1, median);
|
|
2336
|
+
return {
|
|
2337
|
+
adapter: {
|
|
2338
|
+
parameter: "toolOrchestration",
|
|
2339
|
+
value: "sequential",
|
|
2340
|
+
consequence: "Tool calls run one at a time instead of in parallel \u2014 slower per step but reliable for this model."
|
|
2341
|
+
},
|
|
2342
|
+
estimatedScoreWithAdapter: estimated
|
|
2343
|
+
};
|
|
2344
|
+
}
|
|
2345
|
+
return void 0;
|
|
2346
|
+
}
|
|
2347
|
+
function archetypeDescriptor(archetype) {
|
|
2348
|
+
return archetype;
|
|
2349
|
+
}
|
|
2350
|
+
function getModelCompatibility(modelId, intent) {
|
|
2351
|
+
const profile = tryGetProfile(modelId);
|
|
2352
|
+
if (!profile) {
|
|
2353
|
+
return {
|
|
2354
|
+
status: "reject",
|
|
2355
|
+
reason: `Model "${modelId}" is not registered with kgauto \u2014 no compatibility data available.`,
|
|
2356
|
+
archetypePerf: 0
|
|
2357
|
+
};
|
|
2358
|
+
}
|
|
2359
|
+
const { archetype, toolOrchestration } = intent;
|
|
2360
|
+
const rawScore = rawArchetypePerf(profile, archetype);
|
|
2361
|
+
const descriptor = archetypeDescriptor(archetype);
|
|
2362
|
+
const adapterMatch = adapterForCliff(profile, archetype);
|
|
2363
|
+
if (toolOrchestration === "sequential" && adapterMatch && adapterMatch.adapter.parameter === "toolOrchestration" && adapterMatch.adapter.value === "sequential") {
|
|
2364
|
+
return {
|
|
2365
|
+
status: "compatible",
|
|
2366
|
+
reason: `Suited for ${descriptor} with sequential tool calls.`,
|
|
2367
|
+
archetypePerf: rawScore
|
|
2368
|
+
};
|
|
2369
|
+
}
|
|
2370
|
+
if (rawScore >= ARCHETYPE_FLOOR_DEFAULT) {
|
|
2371
|
+
return {
|
|
2372
|
+
status: "compatible",
|
|
2373
|
+
reason: `Suited for ${descriptor}.`,
|
|
2374
|
+
archetypePerf: rawScore
|
|
2375
|
+
};
|
|
2376
|
+
}
|
|
2377
|
+
if (adapterMatch) {
|
|
2378
|
+
return {
|
|
2379
|
+
status: "requires-adapter",
|
|
2380
|
+
reason: `Best with ${adapterMatch.adapter.value} ${adapterMatch.adapter.parameter === "toolOrchestration" ? "tool calls" : adapterMatch.adapter.parameter} for ${descriptor} \u2014 slower but works.`,
|
|
2381
|
+
archetypePerf: rawScore,
|
|
2382
|
+
archetypePerfWithAdapter: adapterMatch.estimatedScoreWithAdapter,
|
|
2383
|
+
adapter: adapterMatch.adapter
|
|
2384
|
+
};
|
|
2385
|
+
}
|
|
2386
|
+
return {
|
|
2387
|
+
status: "reject",
|
|
2388
|
+
reason: `Not suited for ${descriptor} \u2014 would underperform significantly.`,
|
|
2389
|
+
archetypePerf: rawScore
|
|
2390
|
+
};
|
|
2391
|
+
}
|
|
2392
|
+
|
|
2177
2393
|
// src/advisor.ts
|
|
2178
2394
|
var QUALITY_FLOOR_FOR_RECOMMENDATION = 6;
|
|
2179
2395
|
var TIER_DOWN_COST_RATIO = 0.5;
|
|
@@ -2190,6 +2406,7 @@ function runAdvisor(ir, result, profile, policy, phase2) {
|
|
|
2190
2406
|
out.push(...detectModelStaleEvidence(ir, profile));
|
|
2191
2407
|
out.push(...detectTierDown(ir, profile, phase2));
|
|
2192
2408
|
}
|
|
2409
|
+
out.push(...detectArchetypePerfFloorBreach(ir, profile));
|
|
2193
2410
|
return out;
|
|
2194
2411
|
}
|
|
2195
2412
|
function detectCachingOff(ir, profile) {
|
|
@@ -2360,6 +2577,36 @@ function detectTierDown(ir, profile, phase2) {
|
|
|
2360
2577
|
}
|
|
2361
2578
|
];
|
|
2362
2579
|
}
|
|
2580
|
+
function detectArchetypePerfFloorBreach(ir, profile) {
|
|
2581
|
+
const compat = getModelCompatibility(profile.id, {
|
|
2582
|
+
archetype: ir.intent.archetype,
|
|
2583
|
+
toolOrchestration: ir.constraints?.toolOrchestration
|
|
2584
|
+
});
|
|
2585
|
+
if (compat.status === "compatible") return [];
|
|
2586
|
+
if (compat.status === "requires-adapter") {
|
|
2587
|
+
return [
|
|
2588
|
+
{
|
|
2589
|
+
level: "warn",
|
|
2590
|
+
code: "archetype-perf-floor-breach",
|
|
2591
|
+
message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}). A known adapter would lift it: ${compat.adapter.parameter}=${compat.adapter.value}. ${compat.adapter.consequence}`,
|
|
2592
|
+
suggestion: `Pass \`ir.constraints.${compat.adapter.parameter} = '${compat.adapter.value}'\` for this call, OR pick a model whose archetypePerf for ${ir.intent.archetype} already clears the floor (call \`getModelCompatibility(modelId, { archetype: '${ir.intent.archetype}' })\` to check). Estimated post-adapter score: ${compat.archetypePerfWithAdapter}/10.`,
|
|
2593
|
+
recommendationType: "prompt-fix",
|
|
2594
|
+
suggestedAdaptation: compat.adapter,
|
|
2595
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2596
|
+
}
|
|
2597
|
+
];
|
|
2598
|
+
}
|
|
2599
|
+
return [
|
|
2600
|
+
{
|
|
2601
|
+
level: "critical",
|
|
2602
|
+
code: "archetype-perf-floor-breach",
|
|
2603
|
+
message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}) and no known adapter would lift it. ${compat.reason}`,
|
|
2604
|
+
suggestion: `Swap to a model whose archetypePerf for ${ir.intent.archetype} clears the floor. Use \`getModelCompatibility(candidateId, { archetype: '${ir.intent.archetype}' })\` to vet candidates, or \`getDefaultFallbackChain({ archetype: '${ir.intent.archetype}', posture: 'open' })\` for a library-picked chain that respects the floor by construction.`,
|
|
2605
|
+
recommendationType: "model-swap",
|
|
2606
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2607
|
+
}
|
|
2608
|
+
];
|
|
2609
|
+
}
|
|
2363
2610
|
|
|
2364
2611
|
// src/compile.ts
|
|
2365
2612
|
var counter = 0;
|
|
@@ -2620,6 +2867,9 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
2620
2867
|
tokens
|
|
2621
2868
|
);
|
|
2622
2869
|
const shapeKey = `${shape.contextBucket}-${shape.toolCountBucket}-${shape.historyDepth}-${shape.outputMode}`;
|
|
2870
|
+
const toolsCount = result.diagnostics.toolsKept;
|
|
2871
|
+
const historyDepth = Array.isArray(ir.history) ? ir.history.length : 0;
|
|
2872
|
+
const systemPromptChars = estimateSystemPromptChars(ir.sections);
|
|
2623
2873
|
compileRegistry.set(result.handle, {
|
|
2624
2874
|
appId,
|
|
2625
2875
|
archetype,
|
|
@@ -2633,9 +2883,24 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
2633
2883
|
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
2634
2884
|
historyTokensTotal: result.diagnostics.historyTokensTotal,
|
|
2635
2885
|
// alpha.20 E3: capture consumer's declared mode for the brain payload.
|
|
2636
|
-
toolOrchestration: result.diagnostics.toolOrchestration
|
|
2886
|
+
toolOrchestration: result.diagnostics.toolOrchestration,
|
|
2887
|
+
// alpha.28: shape fields for Glass-Box renderer.
|
|
2888
|
+
toolsCount,
|
|
2889
|
+
historyDepth,
|
|
2890
|
+
systemPromptChars
|
|
2637
2891
|
});
|
|
2638
2892
|
}
|
|
2893
|
+
function estimateSystemPromptChars(sections) {
|
|
2894
|
+
if (!Array.isArray(sections) || sections.length === 0) return void 0;
|
|
2895
|
+
let total = 0;
|
|
2896
|
+
for (const s of sections) {
|
|
2897
|
+
if (s && typeof s === "object") {
|
|
2898
|
+
const content = s.content;
|
|
2899
|
+
if (typeof content === "string") total += content.length;
|
|
2900
|
+
}
|
|
2901
|
+
}
|
|
2902
|
+
return total > 0 ? total : void 0;
|
|
2903
|
+
}
|
|
2639
2904
|
async function record(input) {
|
|
2640
2905
|
const reg = compileRegistry.get(input.handle);
|
|
2641
2906
|
if (reg) compileRegistry.delete(input.handle);
|
|
@@ -2713,6 +2978,8 @@ function buildPayload(input, reg) {
|
|
|
2713
2978
|
const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
|
|
2714
2979
|
const costModel = actual;
|
|
2715
2980
|
const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
|
|
2981
|
+
const fellOverFrom = input.fellOverFrom ?? requested;
|
|
2982
|
+
const fallbackReason = fellOverFrom ? input.fallbackReason : void 0;
|
|
2716
2983
|
return {
|
|
2717
2984
|
handle: input.handle,
|
|
2718
2985
|
app_id: reg?.appId,
|
|
@@ -2747,7 +3014,16 @@ function buildPayload(input, reg) {
|
|
|
2747
3014
|
// the brain can measure per-mode model perf separately (DeepSeek in
|
|
2748
3015
|
// sequential vs parallel mode is two different stories — L-040).
|
|
2749
3016
|
// Null when consumer hadn't adopted the constraint yet.
|
|
2750
|
-
tool_orchestration: reg?.toolOrchestration ?? null
|
|
3017
|
+
tool_orchestration: reg?.toolOrchestration ?? null,
|
|
3018
|
+
// alpha.28 — Glass-Box renderer substrate (migration 018). All optional;
|
|
3019
|
+
// omitted-undefined PostgREST inserts store NULL → renderer renders "—".
|
|
3020
|
+
finish_reason: input.finishReason,
|
|
3021
|
+
total_ms: input.totalMs ?? input.latencyMs,
|
|
3022
|
+
tools_count: input.toolsCount ?? reg?.toolsCount,
|
|
3023
|
+
history_depth: input.historyDepth ?? reg?.historyDepth,
|
|
3024
|
+
system_prompt_chars: input.systemPromptChars ?? reg?.systemPromptChars,
|
|
3025
|
+
fell_over_from: fellOverFrom,
|
|
3026
|
+
fallback_reason: fallbackReason
|
|
2751
3027
|
};
|
|
2752
3028
|
}
|
|
2753
3029
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -3945,6 +4221,8 @@ async function call(ir, opts = {}) {
|
|
|
3945
4221
|
latencyMs: latencyMs2
|
|
3946
4222
|
})
|
|
3947
4223
|
);
|
|
4224
|
+
const fellOver = targetModel !== initial.target;
|
|
4225
|
+
const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
|
|
3948
4226
|
await record({
|
|
3949
4227
|
handle: initial.handle,
|
|
3950
4228
|
tokensIn: validated.response.tokens.input,
|
|
@@ -3958,10 +4236,18 @@ async function call(ir, opts = {}) {
|
|
|
3958
4236
|
promptPreview: extractPromptPreview(ir),
|
|
3959
4237
|
responsePreview: validated.response.text.slice(0, 200),
|
|
3960
4238
|
cacheReadInputTokens: validated.response.tokens.cached,
|
|
3961
|
-
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
4239
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated,
|
|
4240
|
+
// alpha.28 — Glass-Box renderer substrate (migration 018). call()
|
|
4241
|
+
// owns the lifecycle so it has direct visibility into finishReason
|
|
4242
|
+
// (from the normalized provider response), totalMs (mirrors latencyMs
|
|
4243
|
+
// for non-streaming; future streaming variant may diverge), and the
|
|
4244
|
+
// fell-over-from / fallback-reason pair (already computed above for
|
|
4245
|
+
// the CallResult return shape).
|
|
4246
|
+
finishReason: validated.response.finishReason,
|
|
4247
|
+
totalMs: latencyMs2,
|
|
4248
|
+
fellOverFrom: fellOver ? initial.target : void 0,
|
|
4249
|
+
fallbackReason
|
|
3962
4250
|
});
|
|
3963
|
-
const fellOver = targetModel !== initial.target;
|
|
3964
|
-
const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
|
|
3965
4251
|
if (fellOver) {
|
|
3966
4252
|
const firstFailed = attempts.find((a) => a.status !== "success");
|
|
3967
4253
|
if (firstFailed) {
|
|
@@ -4319,8 +4605,10 @@ function compile2(ir, opts) {
|
|
|
4319
4605
|
}
|
|
4320
4606
|
// Annotate the CommonJS export names for ESM import in node:
|
|
4321
4607
|
0 && (module.exports = {
|
|
4608
|
+
ABSOLUTE_FLOOR,
|
|
4322
4609
|
ALIASES,
|
|
4323
4610
|
ALL_ARCHETYPES,
|
|
4611
|
+
ARCHETYPE_FLOOR_DEFAULT,
|
|
4324
4612
|
CallError,
|
|
4325
4613
|
DIALECT_VERSION,
|
|
4326
4614
|
INTENT_ARCHETYPES,
|
|
@@ -4342,6 +4630,7 @@ function compile2(ir, opts) {
|
|
|
4342
4630
|
getArchetypePerfScore,
|
|
4343
4631
|
getDefaultFallbackChain,
|
|
4344
4632
|
getDefaultFallbackChainWithGrounding,
|
|
4633
|
+
getModelCompatibility,
|
|
4345
4634
|
getPerAxisMetrics,
|
|
4346
4635
|
getProfile,
|
|
4347
4636
|
getReachabilityDiagnostic,
|