@warmdrift/kgauto-compiler 2.0.0-alpha.14 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DICCTQLG.mjs → chunk-SFF5EVTL.mjs} +205 -0
- package/dist/index.js +497 -222
- package/dist/index.mjs +250 -180
- package/dist/profiles.js +205 -0
- package/dist/profiles.mjs +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
getProfile,
|
|
18
18
|
profilesByProvider,
|
|
19
19
|
tryGetProfile
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-SFF5EVTL.mjs";
|
|
21
21
|
|
|
22
22
|
// src/tokenizer.ts
|
|
23
23
|
var tokenizerImpl = defaultCharBasedCounter;
|
|
@@ -139,33 +139,69 @@ function passCompressHistory(ir, opts = {}) {
|
|
|
139
139
|
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
140
140
|
const historyTokensTotal = totalHistoryTokens(history);
|
|
141
141
|
const countThresholdHit = history.length > summarizeOlderThan;
|
|
142
|
-
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens
|
|
142
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens;
|
|
143
143
|
if (!countThresholdHit && !tokenThresholdHit) {
|
|
144
144
|
return { value: ir, mutations: [], historyTokensTotal };
|
|
145
145
|
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
146
|
+
if (history.length > keepRecent) {
|
|
147
|
+
const cutIndex = history.length - keepRecent;
|
|
148
|
+
const old = history.slice(0, cutIndex);
|
|
149
|
+
const recent = history.slice(cutIndex);
|
|
150
|
+
const userTurns = old.filter((m) => m.role === "user");
|
|
151
|
+
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
152
|
+
const oldTokens = totalHistoryTokens(old);
|
|
153
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
154
|
+
const summary = {
|
|
155
|
+
role: "system",
|
|
156
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
157
|
+
};
|
|
158
|
+
return {
|
|
159
|
+
value: { ...ir, history: [summary, ...recent] },
|
|
160
|
+
mutations: [
|
|
161
|
+
{
|
|
162
|
+
id: `compress-history-${old.length}`,
|
|
163
|
+
source: "static_pass",
|
|
164
|
+
passName: "compress_history",
|
|
165
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
166
|
+
}
|
|
167
|
+
],
|
|
168
|
+
historyTokensTotal
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
if (tokenThresholdHit) {
|
|
172
|
+
let fattestIdx = -1;
|
|
173
|
+
let fattestTokens = 0;
|
|
174
|
+
for (let i = 0; i < history.length; i++) {
|
|
175
|
+
const m = history[i];
|
|
176
|
+
if (!m || typeof m.content !== "string") continue;
|
|
177
|
+
const t = countTokens(m.content);
|
|
178
|
+
if (t > fattestTokens) {
|
|
179
|
+
fattestTokens = t;
|
|
180
|
+
fattestIdx = i;
|
|
165
181
|
}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
182
|
+
}
|
|
183
|
+
const FAT_DOMINANCE_FLOOR = 0.3;
|
|
184
|
+
const fattest = fattestIdx >= 0 ? history[fattestIdx] : void 0;
|
|
185
|
+
if (fattest && historyTokensTotal > 0 && fattestTokens / historyTokensTotal >= FAT_DOMINANCE_FLOOR) {
|
|
186
|
+
const firstLine = fattest.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
187
|
+
const newContent = `[Earlier ${fattest.role} message content omitted: ~${fattestTokens} tokens. Preview: "${firstLine}"]`;
|
|
188
|
+
const newHistory = history.slice();
|
|
189
|
+
newHistory[fattestIdx] = { ...fattest, content: newContent };
|
|
190
|
+
return {
|
|
191
|
+
value: { ...ir, history: newHistory },
|
|
192
|
+
mutations: [
|
|
193
|
+
{
|
|
194
|
+
id: `compress-fat-message-${fattestIdx}`,
|
|
195
|
+
source: "static_pass",
|
|
196
|
+
passName: "compress_history",
|
|
197
|
+
description: `Replaced fat ${fattest.role} message #${fattestIdx} content (~${fattestTokens} of ${historyTokensTotal} tokens, ${Math.round(fattestTokens / historyTokensTotal * 100)}% of history) with summary stub \u2014 token threshold ${summarizeAboveTokens} exceeded (history.length ${history.length} <= keepRecent ${keepRecent}, slice not possible)`
|
|
198
|
+
}
|
|
199
|
+
],
|
|
200
|
+
historyTokensTotal
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
169
205
|
}
|
|
170
206
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
171
207
|
const mutations = [];
|
|
@@ -1498,6 +1534,185 @@ function tryParseJson(s) {
|
|
|
1498
1534
|
}
|
|
1499
1535
|
}
|
|
1500
1536
|
|
|
1537
|
+
// src/chains-brain.ts
|
|
1538
|
+
function isChainsRow(x) {
|
|
1539
|
+
if (!x || typeof x !== "object") return false;
|
|
1540
|
+
const r = x;
|
|
1541
|
+
return typeof r.archetype === "string" && typeof r.tier === "number" && typeof r.model_id === "string";
|
|
1542
|
+
}
|
|
1543
|
+
function mapRowsToChains(rows) {
|
|
1544
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
1545
|
+
for (const row of rows) {
|
|
1546
|
+
if (!isChainsRow(row)) continue;
|
|
1547
|
+
const list = grouped.get(row.archetype) ?? [];
|
|
1548
|
+
list.push(row);
|
|
1549
|
+
grouped.set(row.archetype, list);
|
|
1550
|
+
}
|
|
1551
|
+
const out = {};
|
|
1552
|
+
for (const [archetype, group] of grouped.entries()) {
|
|
1553
|
+
group.sort((a, b) => a.tier - b.tier);
|
|
1554
|
+
out[archetype] = group.map((r) => r.model_id);
|
|
1555
|
+
}
|
|
1556
|
+
const bundled = getAllStarterChains();
|
|
1557
|
+
for (const archetype of Object.keys(bundled)) {
|
|
1558
|
+
if (!out[archetype]) out[archetype] = bundled[archetype];
|
|
1559
|
+
}
|
|
1560
|
+
return out;
|
|
1561
|
+
}
|
|
1562
|
+
var loadChainsFromBrain = createBrainQueryCache({
|
|
1563
|
+
table: "kgauto_chains",
|
|
1564
|
+
mapRows: mapRowsToChains,
|
|
1565
|
+
bundledFallback: getAllStarterChains
|
|
1566
|
+
});
|
|
1567
|
+
|
|
1568
|
+
// src/fallback.ts
|
|
1569
|
+
var STARTER_CHAINS = {
|
|
1570
|
+
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
1571
|
+
critique: [
|
|
1572
|
+
"claude-opus-4-7",
|
|
1573
|
+
"claude-sonnet-4-6",
|
|
1574
|
+
"gemini-2.5-pro"
|
|
1575
|
+
],
|
|
1576
|
+
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
1577
|
+
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
1578
|
+
plan: [
|
|
1579
|
+
"claude-sonnet-4-6",
|
|
1580
|
+
"claude-opus-4-7",
|
|
1581
|
+
"gemini-2.5-pro",
|
|
1582
|
+
"deepseek-v4-pro"
|
|
1583
|
+
],
|
|
1584
|
+
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
1585
|
+
// Flash floor for the open-posture chain.
|
|
1586
|
+
generate: [
|
|
1587
|
+
"claude-sonnet-4-6",
|
|
1588
|
+
"claude-haiku-4-5",
|
|
1589
|
+
"gemini-2.5-pro",
|
|
1590
|
+
"gemini-2.5-flash"
|
|
1591
|
+
],
|
|
1592
|
+
ask: [
|
|
1593
|
+
"claude-sonnet-4-6",
|
|
1594
|
+
"claude-haiku-4-5",
|
|
1595
|
+
"gemini-2.5-pro",
|
|
1596
|
+
"gemini-2.5-flash"
|
|
1597
|
+
],
|
|
1598
|
+
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
1599
|
+
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
1600
|
+
extract: [
|
|
1601
|
+
"claude-sonnet-4-6",
|
|
1602
|
+
"claude-haiku-4-5",
|
|
1603
|
+
"gemini-2.5-pro"
|
|
1604
|
+
],
|
|
1605
|
+
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
1606
|
+
transform: [
|
|
1607
|
+
"claude-sonnet-4-6",
|
|
1608
|
+
"claude-haiku-4-5",
|
|
1609
|
+
"gemini-2.5-pro",
|
|
1610
|
+
"gemini-2.5-flash"
|
|
1611
|
+
],
|
|
1612
|
+
// Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
|
|
1613
|
+
// Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
|
|
1614
|
+
// Haiku (reduced tool budget — cliff at 16 fires).
|
|
1615
|
+
hunt: [
|
|
1616
|
+
"gemini-2.5-flash",
|
|
1617
|
+
"gemini-2.5-pro",
|
|
1618
|
+
"claude-sonnet-4-6",
|
|
1619
|
+
"claude-haiku-4-5"
|
|
1620
|
+
],
|
|
1621
|
+
// Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
|
|
1622
|
+
// for quality safety; Flash-Lite emergency floor (onboarded s22).
|
|
1623
|
+
summarize: [
|
|
1624
|
+
"gemini-2.5-flash",
|
|
1625
|
+
"deepseek-v4-flash",
|
|
1626
|
+
"claude-haiku-4-5",
|
|
1627
|
+
"gemini-2.5-flash-lite"
|
|
1628
|
+
],
|
|
1629
|
+
// Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
|
|
1630
|
+
// Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
|
|
1631
|
+
classify: [
|
|
1632
|
+
"gemini-2.5-flash",
|
|
1633
|
+
"deepseek-v4-flash",
|
|
1634
|
+
"claude-haiku-4-5",
|
|
1635
|
+
"gemini-2.5-flash-lite"
|
|
1636
|
+
]
|
|
1637
|
+
};
|
|
1638
|
+
function getDefaultFallbackChain(opts) {
|
|
1639
|
+
const { archetype, primary, maxDepth = 3, policy, reachability } = opts;
|
|
1640
|
+
if (maxDepth < 1) {
|
|
1641
|
+
throw new Error(
|
|
1642
|
+
`getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
|
|
1643
|
+
);
|
|
1644
|
+
}
|
|
1645
|
+
const allChains = loadChainsFromBrain();
|
|
1646
|
+
const starter = allChains[archetype];
|
|
1647
|
+
if (!starter) {
|
|
1648
|
+
throw new Error(
|
|
1649
|
+
`getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(allChains).join(", ")}`
|
|
1650
|
+
);
|
|
1651
|
+
}
|
|
1652
|
+
let chain;
|
|
1653
|
+
if (primary) {
|
|
1654
|
+
chain = [primary, ...starter.filter((id) => id !== primary)];
|
|
1655
|
+
} else {
|
|
1656
|
+
chain = [...starter];
|
|
1657
|
+
}
|
|
1658
|
+
if (policy?.blockedModels && policy.blockedModels.length > 0) {
|
|
1659
|
+
const blocked = new Set(policy.blockedModels);
|
|
1660
|
+
chain = chain.filter((id) => !blocked.has(id));
|
|
1661
|
+
}
|
|
1662
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1663
|
+
const deduped = [];
|
|
1664
|
+
for (const id of chain) {
|
|
1665
|
+
if (!seen.has(id)) {
|
|
1666
|
+
seen.add(id);
|
|
1667
|
+
deduped.push(id);
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
let filtered = deduped;
|
|
1671
|
+
if (reachability) {
|
|
1672
|
+
filtered = deduped.filter((id) => isModelReachable(id, reachability));
|
|
1673
|
+
}
|
|
1674
|
+
return filtered.slice(0, maxDepth);
|
|
1675
|
+
}
|
|
1676
|
+
function getStarterChain(archetype) {
|
|
1677
|
+
const chain = STARTER_CHAINS[archetype];
|
|
1678
|
+
if (!chain) {
|
|
1679
|
+
throw new Error(
|
|
1680
|
+
`getStarterChain: unknown archetype "${archetype}"`
|
|
1681
|
+
);
|
|
1682
|
+
}
|
|
1683
|
+
return [...chain];
|
|
1684
|
+
}
|
|
1685
|
+
function getAllStarterChains() {
|
|
1686
|
+
const out = {};
|
|
1687
|
+
for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
|
|
1688
|
+
out[archetype] = [...chain];
|
|
1689
|
+
}
|
|
1690
|
+
return out;
|
|
1691
|
+
}
|
|
1692
|
+
function ensureCrossProviderTail(opts) {
|
|
1693
|
+
const { chain, archetype, apiKeys, envSource } = opts;
|
|
1694
|
+
if (chain.length < 1) return { chain };
|
|
1695
|
+
const providers = /* @__PURE__ */ new Set();
|
|
1696
|
+
for (const t of chain) {
|
|
1697
|
+
const p = tryGetProfile(t);
|
|
1698
|
+
if (p) providers.add(p.provider);
|
|
1699
|
+
}
|
|
1700
|
+
if (providers.size >= 2) return { chain };
|
|
1701
|
+
const existingProvider = providers.values().next().value;
|
|
1702
|
+
if (!existingProvider) return { chain };
|
|
1703
|
+
const allChains = loadChainsFromBrain();
|
|
1704
|
+
const fullChain = allChains[archetype];
|
|
1705
|
+
if (!fullChain) return { chain };
|
|
1706
|
+
for (const candidate of fullChain) {
|
|
1707
|
+
if (chain.includes(candidate)) continue;
|
|
1708
|
+
const cp = tryGetProfile(candidate);
|
|
1709
|
+
if (!cp || cp.provider === existingProvider) continue;
|
|
1710
|
+
if (!isModelReachable(candidate, { apiKeys, envSource })) continue;
|
|
1711
|
+
return { chain: [...chain, candidate], appended: candidate };
|
|
1712
|
+
}
|
|
1713
|
+
return { chain };
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1501
1716
|
// src/call.ts
|
|
1502
1717
|
async function call(ir, opts = {}) {
|
|
1503
1718
|
const initial = compileAndRegister(ir, opts);
|
|
@@ -1543,6 +1758,17 @@ async function call(ir, opts = {}) {
|
|
|
1543
1758
|
"no_reachable_models"
|
|
1544
1759
|
);
|
|
1545
1760
|
}
|
|
1761
|
+
const archetypeName = ir.intent?.archetype;
|
|
1762
|
+
if (archetypeName) {
|
|
1763
|
+
const ensured = ensureCrossProviderTail({
|
|
1764
|
+
chain: targetsToTry,
|
|
1765
|
+
archetype: archetypeName,
|
|
1766
|
+
apiKeys: opts.apiKeys
|
|
1767
|
+
});
|
|
1768
|
+
if (ensured.appended) {
|
|
1769
|
+
targetsToTry = ensured.chain;
|
|
1770
|
+
}
|
|
1771
|
+
}
|
|
1546
1772
|
}
|
|
1547
1773
|
let activeCompile = initial;
|
|
1548
1774
|
let lastErr;
|
|
@@ -1801,162 +2027,6 @@ function clamp(n) {
|
|
|
1801
2027
|
return Math.max(0, Math.min(1, n));
|
|
1802
2028
|
}
|
|
1803
2029
|
|
|
1804
|
-
// src/chains-brain.ts
|
|
1805
|
-
function isChainsRow(x) {
|
|
1806
|
-
if (!x || typeof x !== "object") return false;
|
|
1807
|
-
const r = x;
|
|
1808
|
-
return typeof r.archetype === "string" && typeof r.tier === "number" && typeof r.model_id === "string";
|
|
1809
|
-
}
|
|
1810
|
-
function mapRowsToChains(rows) {
|
|
1811
|
-
const grouped = /* @__PURE__ */ new Map();
|
|
1812
|
-
for (const row of rows) {
|
|
1813
|
-
if (!isChainsRow(row)) continue;
|
|
1814
|
-
const list = grouped.get(row.archetype) ?? [];
|
|
1815
|
-
list.push(row);
|
|
1816
|
-
grouped.set(row.archetype, list);
|
|
1817
|
-
}
|
|
1818
|
-
const out = {};
|
|
1819
|
-
for (const [archetype, group] of grouped.entries()) {
|
|
1820
|
-
group.sort((a, b) => a.tier - b.tier);
|
|
1821
|
-
out[archetype] = group.map((r) => r.model_id);
|
|
1822
|
-
}
|
|
1823
|
-
const bundled = getAllStarterChains();
|
|
1824
|
-
for (const archetype of Object.keys(bundled)) {
|
|
1825
|
-
if (!out[archetype]) out[archetype] = bundled[archetype];
|
|
1826
|
-
}
|
|
1827
|
-
return out;
|
|
1828
|
-
}
|
|
1829
|
-
var loadChainsFromBrain = createBrainQueryCache({
|
|
1830
|
-
table: "kgauto_chains",
|
|
1831
|
-
mapRows: mapRowsToChains,
|
|
1832
|
-
bundledFallback: getAllStarterChains
|
|
1833
|
-
});
|
|
1834
|
-
|
|
1835
|
-
// src/fallback.ts
|
|
1836
|
-
var STARTER_CHAINS = {
|
|
1837
|
-
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
1838
|
-
critique: [
|
|
1839
|
-
"claude-opus-4-7",
|
|
1840
|
-
"claude-sonnet-4-6",
|
|
1841
|
-
"gemini-2.5-pro"
|
|
1842
|
-
],
|
|
1843
|
-
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
1844
|
-
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
1845
|
-
plan: [
|
|
1846
|
-
"claude-sonnet-4-6",
|
|
1847
|
-
"claude-opus-4-7",
|
|
1848
|
-
"gemini-2.5-pro",
|
|
1849
|
-
"deepseek-v4-pro"
|
|
1850
|
-
],
|
|
1851
|
-
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
1852
|
-
// Flash floor for the open-posture chain.
|
|
1853
|
-
generate: [
|
|
1854
|
-
"claude-sonnet-4-6",
|
|
1855
|
-
"claude-haiku-4-5",
|
|
1856
|
-
"gemini-2.5-pro",
|
|
1857
|
-
"gemini-2.5-flash"
|
|
1858
|
-
],
|
|
1859
|
-
ask: [
|
|
1860
|
-
"claude-sonnet-4-6",
|
|
1861
|
-
"claude-haiku-4-5",
|
|
1862
|
-
"gemini-2.5-pro",
|
|
1863
|
-
"gemini-2.5-flash"
|
|
1864
|
-
],
|
|
1865
|
-
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
1866
|
-
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
1867
|
-
extract: [
|
|
1868
|
-
"claude-sonnet-4-6",
|
|
1869
|
-
"claude-haiku-4-5",
|
|
1870
|
-
"gemini-2.5-pro"
|
|
1871
|
-
],
|
|
1872
|
-
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
1873
|
-
transform: [
|
|
1874
|
-
"claude-sonnet-4-6",
|
|
1875
|
-
"claude-haiku-4-5",
|
|
1876
|
-
"gemini-2.5-pro",
|
|
1877
|
-
"gemini-2.5-flash"
|
|
1878
|
-
],
|
|
1879
|
-
// Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
|
|
1880
|
-
// Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
|
|
1881
|
-
// Haiku (reduced tool budget — cliff at 16 fires).
|
|
1882
|
-
hunt: [
|
|
1883
|
-
"gemini-2.5-flash",
|
|
1884
|
-
"gemini-2.5-pro",
|
|
1885
|
-
"claude-sonnet-4-6",
|
|
1886
|
-
"claude-haiku-4-5"
|
|
1887
|
-
],
|
|
1888
|
-
// Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
|
|
1889
|
-
// for quality safety; Flash-Lite emergency floor (onboarded s22).
|
|
1890
|
-
summarize: [
|
|
1891
|
-
"gemini-2.5-flash",
|
|
1892
|
-
"deepseek-v4-flash",
|
|
1893
|
-
"claude-haiku-4-5",
|
|
1894
|
-
"gemini-2.5-flash-lite"
|
|
1895
|
-
],
|
|
1896
|
-
// Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
|
|
1897
|
-
// Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
|
|
1898
|
-
classify: [
|
|
1899
|
-
"gemini-2.5-flash",
|
|
1900
|
-
"deepseek-v4-flash",
|
|
1901
|
-
"claude-haiku-4-5",
|
|
1902
|
-
"gemini-2.5-flash-lite"
|
|
1903
|
-
]
|
|
1904
|
-
};
|
|
1905
|
-
function getDefaultFallbackChain(opts) {
|
|
1906
|
-
const { archetype, primary, maxDepth = 3, policy, reachability } = opts;
|
|
1907
|
-
if (maxDepth < 1) {
|
|
1908
|
-
throw new Error(
|
|
1909
|
-
`getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
|
|
1910
|
-
);
|
|
1911
|
-
}
|
|
1912
|
-
const allChains = loadChainsFromBrain();
|
|
1913
|
-
const starter = allChains[archetype];
|
|
1914
|
-
if (!starter) {
|
|
1915
|
-
throw new Error(
|
|
1916
|
-
`getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(allChains).join(", ")}`
|
|
1917
|
-
);
|
|
1918
|
-
}
|
|
1919
|
-
let chain;
|
|
1920
|
-
if (primary) {
|
|
1921
|
-
chain = [primary, ...starter.filter((id) => id !== primary)];
|
|
1922
|
-
} else {
|
|
1923
|
-
chain = [...starter];
|
|
1924
|
-
}
|
|
1925
|
-
if (policy?.blockedModels && policy.blockedModels.length > 0) {
|
|
1926
|
-
const blocked = new Set(policy.blockedModels);
|
|
1927
|
-
chain = chain.filter((id) => !blocked.has(id));
|
|
1928
|
-
}
|
|
1929
|
-
const seen = /* @__PURE__ */ new Set();
|
|
1930
|
-
const deduped = [];
|
|
1931
|
-
for (const id of chain) {
|
|
1932
|
-
if (!seen.has(id)) {
|
|
1933
|
-
seen.add(id);
|
|
1934
|
-
deduped.push(id);
|
|
1935
|
-
}
|
|
1936
|
-
}
|
|
1937
|
-
let filtered = deduped;
|
|
1938
|
-
if (reachability) {
|
|
1939
|
-
filtered = deduped.filter((id) => isModelReachable(id, reachability));
|
|
1940
|
-
}
|
|
1941
|
-
return filtered.slice(0, maxDepth);
|
|
1942
|
-
}
|
|
1943
|
-
function getStarterChain(archetype) {
|
|
1944
|
-
const chain = STARTER_CHAINS[archetype];
|
|
1945
|
-
if (!chain) {
|
|
1946
|
-
throw new Error(
|
|
1947
|
-
`getStarterChain: unknown archetype "${archetype}"`
|
|
1948
|
-
);
|
|
1949
|
-
}
|
|
1950
|
-
return [...chain];
|
|
1951
|
-
}
|
|
1952
|
-
function getAllStarterChains() {
|
|
1953
|
-
const out = {};
|
|
1954
|
-
for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
|
|
1955
|
-
out[archetype] = [...chain];
|
|
1956
|
-
}
|
|
1957
|
-
return out;
|
|
1958
|
-
}
|
|
1959
|
-
|
|
1960
2030
|
// src/archetype-perf-brain.ts
|
|
1961
2031
|
function isPerfRow(x) {
|
|
1962
2032
|
if (!x || typeof x !== "object") return false;
|
package/dist/profiles.js
CHANGED
|
@@ -644,6 +644,211 @@ var PROFILES_RAW = [
|
|
|
644
644
|
hunt: 4
|
|
645
645
|
// sequential tools — same as V4-Flash
|
|
646
646
|
}
|
|
647
|
+
},
|
|
648
|
+
// ── Auto-onboarded (UNVERIFIED) ──
|
|
649
|
+
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
650
|
+
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
|
651
|
+
// provider docs. Verify before promoting status to 'current' (L-049/L-081).
|
|
652
|
+
{
|
|
653
|
+
id: "gemini-3-flash-preview",
|
|
654
|
+
verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
|
|
655
|
+
provider: "google",
|
|
656
|
+
status: "preview",
|
|
657
|
+
maxContextTokens: 1048576,
|
|
658
|
+
maxOutputTokens: 65535,
|
|
659
|
+
maxTools: 128,
|
|
660
|
+
parallelToolCalls: true,
|
|
661
|
+
structuredOutput: "native",
|
|
662
|
+
systemPromptMode: "separate",
|
|
663
|
+
streaming: true,
|
|
664
|
+
cliffs: [
|
|
665
|
+
{
|
|
666
|
+
metric: "input_tokens",
|
|
667
|
+
threshold: 8e3,
|
|
668
|
+
action: "downgrade_quality_warning",
|
|
669
|
+
reason: "Quality degrades significantly above ~8K context tokens"
|
|
670
|
+
},
|
|
671
|
+
{
|
|
672
|
+
metric: "tool_count",
|
|
673
|
+
threshold: 20,
|
|
674
|
+
action: "drop_to_top_relevant",
|
|
675
|
+
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
|
|
676
|
+
},
|
|
677
|
+
{
|
|
678
|
+
metric: "thinking_with_short_output",
|
|
679
|
+
threshold: 1,
|
|
680
|
+
action: "force_thinking_budget_zero",
|
|
681
|
+
reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
|
|
682
|
+
},
|
|
683
|
+
{
|
|
684
|
+
// s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
|
|
685
|
+
// tt-intelligence/summarize/gemini-2.5-flash with tools offered.
|
|
686
|
+
// v1's disable_thinking_for_short_output already fired and didn't
|
|
687
|
+
// help — disabling thinking is necessary but not sufficient. Tools
|
|
688
|
+
// present + summarize intent confuses Flash into a no-output state
|
|
689
|
+
// (likely tool-decision purgatory). Strip tools entirely for this
|
|
690
|
+
// archetype on this model.
|
|
691
|
+
metric: "tool_count",
|
|
692
|
+
threshold: 1,
|
|
693
|
+
whenIntent: "summarize",
|
|
694
|
+
action: "strip_tools",
|
|
695
|
+
reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
|
|
696
|
+
}
|
|
697
|
+
],
|
|
698
|
+
costInputPer1m: 0.3,
|
|
699
|
+
costOutputPer1m: 2.5,
|
|
700
|
+
lowering: {
|
|
701
|
+
...GOOGLE_LOWERING_BASE,
|
|
702
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
703
|
+
},
|
|
704
|
+
recovery: [
|
|
705
|
+
{
|
|
706
|
+
signal: "empty_response_after_tool",
|
|
707
|
+
action: "retry_with_params",
|
|
708
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
709
|
+
maxRetries: 1,
|
|
710
|
+
reason: "Known: empty after tool result \u2014 retry with thinking off"
|
|
711
|
+
},
|
|
712
|
+
{
|
|
713
|
+
signal: "empty_response",
|
|
714
|
+
action: "retry_with_params",
|
|
715
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
716
|
+
maxRetries: 1,
|
|
717
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
718
|
+
},
|
|
719
|
+
{
|
|
720
|
+
signal: "malformed_function_call",
|
|
721
|
+
action: "escalate",
|
|
722
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
723
|
+
}
|
|
724
|
+
],
|
|
725
|
+
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
726
|
+
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
727
|
+
notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
|
|
728
|
+
// Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
|
|
729
|
+
// 15-75 calls/step beats Sonnet — L-040), summarize, classify.
|
|
730
|
+
archetypePerf: {
|
|
731
|
+
hunt: 9,
|
|
732
|
+
// L-040: parallel tool throughput 15-75/step
|
|
733
|
+
classify: 7,
|
|
734
|
+
// brain-validated, 218 rows
|
|
735
|
+
summarize: 7,
|
|
736
|
+
// brain-validated; cliff strips tools when present
|
|
737
|
+
transform: 7,
|
|
738
|
+
ask: 7,
|
|
739
|
+
generate: 6,
|
|
740
|
+
plan: 5,
|
|
741
|
+
extract: 6,
|
|
742
|
+
// alpha.8 MAX_TOKENS history on structured output
|
|
743
|
+
critique: 4
|
|
744
|
+
// reasoning shallower than Sonnet/Opus
|
|
745
|
+
}
|
|
746
|
+
},
|
|
747
|
+
{
|
|
748
|
+
// ── Gemini 2.5 Flash-Lite ──
|
|
749
|
+
// Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
|
|
750
|
+
// it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
|
|
751
|
+
// stable. Positioned BELOW Flash on the cost/perf frontier:
|
|
752
|
+
// input $0.10/M (Flash $0.30/M) — 3× cheaper
|
|
753
|
+
// output $0.40/M (Flash $2.50/M) — 6× cheaper
|
|
754
|
+
// cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
|
|
755
|
+
// Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
|
|
756
|
+
// is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
|
|
757
|
+
// thresholds. The brain will validate/relax these as evidence accumulates
|
|
758
|
+
// per (archetype, model) tuple. Currently ZERO brain rows for this model.
|
|
759
|
+
id: "gemini-3.1-flash-lite",
|
|
760
|
+
verifiedAgainstDocs: "UNVERIFIED-AUTO-ONBOARD",
|
|
761
|
+
provider: "google",
|
|
762
|
+
status: "preview",
|
|
763
|
+
maxContextTokens: 1048576,
|
|
764
|
+
maxOutputTokens: 65536,
|
|
765
|
+
maxTools: 128,
|
|
766
|
+
parallelToolCalls: true,
|
|
767
|
+
structuredOutput: "native",
|
|
768
|
+
systemPromptMode: "separate",
|
|
769
|
+
streaming: true,
|
|
770
|
+
cliffs: [
|
|
771
|
+
{
|
|
772
|
+
metric: "input_tokens",
|
|
773
|
+
threshold: 8e3,
|
|
774
|
+
action: "downgrade_quality_warning",
|
|
775
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
776
|
+
},
|
|
777
|
+
{
|
|
778
|
+
metric: "tool_count",
|
|
779
|
+
threshold: 10,
|
|
780
|
+
action: "drop_to_top_relevant",
|
|
781
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
782
|
+
},
|
|
783
|
+
{
|
|
784
|
+
metric: "thinking_with_short_output",
|
|
785
|
+
threshold: 1,
|
|
786
|
+
action: "force_thinking_budget_zero",
|
|
787
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
788
|
+
},
|
|
789
|
+
{
|
|
790
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
791
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
792
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
793
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
794
|
+
metric: "tool_count",
|
|
795
|
+
threshold: 1,
|
|
796
|
+
whenIntent: "summarize",
|
|
797
|
+
action: "strip_tools",
|
|
798
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
799
|
+
}
|
|
800
|
+
],
|
|
801
|
+
costInputPer1m: 0.1,
|
|
802
|
+
costOutputPer1m: 0.4,
|
|
803
|
+
lowering: {
|
|
804
|
+
...GOOGLE_LOWERING_BASE,
|
|
805
|
+
// Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
|
|
806
|
+
// $0.10/M input. Material for repeat-prompt workloads (classify shape).
|
|
807
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
808
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
809
|
+
},
|
|
810
|
+
recovery: [
|
|
811
|
+
{
|
|
812
|
+
signal: "empty_response_after_tool",
|
|
813
|
+
action: "retry_with_params",
|
|
814
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
815
|
+
maxRetries: 1,
|
|
816
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
817
|
+
},
|
|
818
|
+
{
|
|
819
|
+
signal: "empty_response",
|
|
820
|
+
action: "retry_with_params",
|
|
821
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
822
|
+
maxRetries: 1,
|
|
823
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
824
|
+
},
|
|
825
|
+
{
|
|
826
|
+
signal: "malformed_function_call",
|
|
827
|
+
action: "escalate",
|
|
828
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
829
|
+
}
|
|
830
|
+
],
|
|
831
|
+
strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
832
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
833
|
+
notes: "AUTO-ONBOARDED 2026-05-16 from `gemini-2.5-flash-lite`. Pricing, context, cliffs are template-cloned and UNVERIFIED \u2014 confirm against provider docs before promoting status to 'current'.",
|
|
834
|
+
// Tier 3 emergency floor for summarize/classify chains. ZERO brain
|
|
835
|
+
// rows — all values are starter hypotheses anchored to "smaller
|
|
836
|
+
// sibling of Flash, at-or-below Flash perf on every archetype." The
|
|
837
|
+
// first 50 brain rows per archetype will validate or relax these.
|
|
838
|
+
archetypePerf: {
|
|
839
|
+
classify: 6,
|
|
840
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
841
|
+
summarize: 6,
|
|
842
|
+
// starter hypothesis — verify; cliff strips tools
|
|
843
|
+
transform: 6,
|
|
844
|
+
// starter hypothesis — verify
|
|
845
|
+
ask: 5,
|
|
846
|
+
hunt: 5,
|
|
847
|
+
generate: 4,
|
|
848
|
+
extract: 4,
|
|
849
|
+
plan: 3,
|
|
850
|
+
critique: 3
|
|
851
|
+
}
|
|
647
852
|
}
|
|
648
853
|
];
|
|
649
854
|
var ALIASES = {
|